From e97a46c3551160b5696b13aae672597496d374db Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 9 Jul 2024 13:57:48 -0600 Subject: [PATCH 001/131] Bug fix to support the %H format in METplus via printf. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 245369509b..0f2c4c0716 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -163,9 +163,12 @@ cannot be empty: #----------------------------------------------------------------------- # case "${METplus_time_fmt}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S"|"%H") + "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") fmt="${METplus_time_fmt}" ;; + "%H") + fmt="%02.0f" + ;; "%HHH") # # Print format assumes that the argument to printf (i.e. the number to From 815c941f291a764c86e7a0e0c6d2996b2e94ec9a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 9 Jul 2024 14:42:20 -0600 Subject: [PATCH 002/131] Bug fix to the bug fix! --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 0f2c4c0716..572f7c68c4 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -167,7 +167,18 @@ cannot be empty: fmt="${METplus_time_fmt}" ;; "%H") - fmt="%02.0f" +# +# The "%H" format needs to be treated differently depending on if it's +# formatting a "lead" time type or another (e.g. "init" or "vald") because +# for "lead", the printf function is used below (which doesn't understand +# the "%H" format) whereas for the others, the date utility is used (which +# does understand "%H"). +# + if [ "${METplus_time_type}" = "lead" ]; then + fmt="%02.0f" + else + fmt="${METplus_time_fmt}" + fi ;; "%HHH") # From bc8548060558ecc9e3f8b2a8f64f9bc7910ac608 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 11 Jul 2024 12:10:29 -0600 Subject: [PATCH 003/131] Bug fix from Michelle H. for prepbufr files: "On May 22, the name of the tar file where the prepbufr files live changed" --- parm/data_locations.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 7901f4c085..dd3b5ddd17 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -351,11 +351,13 @@ NDAS_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - "com2_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "gpfs_dell1_nco_ops_com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_obsproc_v1.1_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.2_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" file_names: obs: - "./nam.t{hh}z.prepbufr.tm*.nr" From 81d61b8ea2d233eece6abcbd08086dec393f1ba3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 11 Jul 2024 12:22:09 -0600 Subject: [PATCH 004/131] Bug fix for removing phantom 00-hour tasks from workflow. Bug found by Michelle Harrold, solution by Michael Kavulich. --- parm/wflow/verify_det.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index e82d7c61e1..3acfa3e836 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -1,6 +1,6 @@ default_task_verify_det: &default_task_verify_det account: '&ACCOUNT;' - attrs: + attrs: &default_attrs cycledefs: forecast maxtries: '1' envars: &default_vars @@ -30,6 +30,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: @@ -62,6 +63,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: From 35530abd74948fc50d1e6ebc25a25a8a7cd2f8c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 16 Jul 2024 11:01:43 -0600 Subject: [PATCH 005/131] Bug fix: Append cycle date to names of deterministic GridStat and PointStat tasks' METplus log files. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 4f871e6e1b..91c5a7896b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -277,7 +277,7 @@ fi # metplus_config_tmpl_bn="GridStat_or_PointStat" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${ensmem_name}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # From 6c548ceeb17f60d7fa11417ae8ef7451a5269321 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 16 Jul 2024 14:05:06 -0600 Subject: [PATCH 006/131] Version of ex-script for pulling obs that works for multiple overlapping cycles for CCPA and MRMS but not yet for NDAS or NOHRSC. --- scripts/exregional_get_verif_obs.sh | 295 +++++++++++++++++++--------- 1 file changed, 199 insertions(+), 96 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a74f11cd3a..564860899f 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -123,6 +123,11 @@ imm=$(echo ${PDY} | cut -c5-6) idd=$(echo ${PDY} | cut -c7-8) ihh=${cyc} +echo +echo "HELLO GGGGGGGG" +iyyyymmddhh=${PDY}${cyc} +echo "iyyyymmddhh = ${iyyyymmddhh}" + # Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" @@ -144,126 +149,184 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) +echo +echo "HELLO HHHHHHHH" +echo "vyyyymmdd = ${vyyyymmdd}" +echo "vyyyymmdd_p1 = ${vyyyymmdd_p1}" +echo "ihh = ${ihh}" +#exit + #remove leading zero again, this time keep original vhh_noZero=$((10#${vhh})) - - # Retrieve CCPA observations +# +#----------------------------------------------------------------------- +# +# Retrieve CCPA observations. +# +#----------------------------------------------------------------------- +# if [[ ${OBTYPE} == "CCPA" ]]; then - #CCPA is accumulation observations, so none to retrieve for hour zero + # CCPA is accumulation observations, so for hour 0 there are no files + # to retrieve. if [[ ${current_fcst} -eq 0 ]]; then current_fcst=$((${current_fcst} + 1)) continue fi - # Staging location for raw CCPA data from HPSS - ccpa_raw=${OBS_DIR}/raw + # Accumulation is for accumulation of CCPA data to pull (hardcoded to + # 01h, see note above). + accum=01 - # Reorganized CCPA location + # Directory in which the daily subdirectories containing the CCPA grib2 + # files will appear after this script is done. Make sure this exists. ccpa_proc=${OBS_DIR} + if [[ ! -d "${ccpa_proc}/${vyyyymmdd}" ]]; then + mkdir -p ${ccpa_proc}/${vyyyymmdd} + fi - # Accumulation is for accumulation of CCPA data to pull (hardcoded to 01h, see note above.) - accum=01 + # File name within the HPSS archive file. Note that this only includes + # the valid hour in its name; the year, month, and day are specified in + # the name of the directory in which it is located within the archive. + ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" + + # Full path to final location of the CCPA grib2 file for the current valid + # time. Note that this path includes the valid date (year, month, and day) + # information in the name of a subdirectory and the valid hour-of-day in + # the name of the file. + ccpa_fp_proc="${ccpa_proc}/${vyyyymmdd}/${ccpa_fn}" + + # Temporary staging directory for raw CCPA files from HPSS. These "raw" + # directories are temporary directories in which archive files from HPSS + # are placed and files within those archives extracted. Note that the + # name of this subdirectory is cycle-specific to avoid other get_obs_ccpa + # workflow tasks (i.e. those corresponding to cycles other than the current + # one) writing into the same directory. + ccpa_raw="${ccpa_proc}/raw_${iyyyymmddhh}" # Check if file exists on disk; if not, pull it. - ccpa_file="$ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - if [[ -f "${ccpa_file}" ]]; then + if [[ -f "${ccpa_fp_proc}" ]]; then + echo "${OBTYPE} file exists on disk:" - echo "${ccpa_file}" + echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + else - echo "${OBTYPE} file does not exist on disk:" - echo "${ccpa_file}" - echo "Will attempt to retrieve from remote locations" - # Create necessary raw and prop directories - if [[ ! -d "$ccpa_raw/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd} - fi - if [[ ! -d "$ccpa_raw/${vyyyymmdd_p1}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd_p1} + echo "${OBTYPE} file does not exist on disk:" + echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." + + # Create the necessary raw (sub)directories on disk. Note that we need + # to create a subdirectory for 1 day + the current valid date because + # that is needed to get around a metadata error in the CCPA files on HPSS + # (in particular, one hour CCPA files have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504). + if [[ ! -d "${ccpa_raw}/${vyyyymmdd}" ]]; then + mkdir -p ${ccpa_raw}/${vyyyymmdd} fi - if [[ ! -d "$ccpa_proc/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_proc/${vyyyymmdd} + if [[ ! -d "${ccpa_raw}/${vyyyymmdd_p1}" ]]; then + mkdir -p ${ccpa_raw}/${vyyyymmdd_p1} fi - # Check if valid hour is 00 + + valid_time=${vyyyymmdd}${vhh} + output_path="${ccpa_raw}/${vyyyymmdd}" if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd_p1} \ - --summary_file ${logfile}" + valid_time=${vyyyymmdd_p1}${vhh} + output_path="${ccpa_raw}/${vyyyymmdd_p1}" + fi - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS + # The retrieve_data.py script below uses the current working directory as + # the location into which to extract the contents of the HPSS archive (tar) + # file. Thus, if there are multiple get_obs_ccpa tasks running (i.e. ones + # for different cycles), they will be extracting files into the same (current) + # directory. That causes errors in the workflow. To avoid this, change + # location to the raw directory. This will avoid such errors because the + # raw directory has a cycle-specific name. + cd ${ccpa_raw} + + # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file + # corresponding to the current valid time (valid_time). + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${valid_time} \ + --data_stores hpss \ + --data_type CCPA_obs \ + --output_path ${output_path} \ + --summary_file ${logfile}" - The following command exited with a non-zero exit status: - ${cmd} + echo "CALLING: ${cmd}" + $cmd || print_err_msg_exit "\ + Could not retrieve CCPA data from HPSS. + + The following command exited with a non-zero exit status: + ${cmd} " - else - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd} \ - --summary_file ${logfile}" + # Move CCPA file to its final location. + # + # Since this script is part of a workflow, other tasks (for other cycles) + # that call this script may have extracted and placed the current file + # in its final location between the time we checked for its existence + # above above (and didn't find it) and now. This can happen because + # there can be overlap between the verification times for the current + # cycle and those of other cycles. For this reason, check again for the + # existence of the file in its final location. If it's already been + # created by another task, don't bother to move it from its raw location + # to its final location. + if [[ -f "${ccpa_fp_proc}" ]]; then - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS + echo "${OBTYPE} file exists on disk:" + echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" + echo "It was likely created by a get_obs_ccpa workflow task for another cycle." + echo "NOT moving file from its temporary (raw) location to its final location." - The following command exited with a non-zero exit status: - ${cmd} -" - fi + else - # One hour CCPA files have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - # After data is pulled, reorganize into correct valid yyyymmdd structure. - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 6 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 7 && ${vhh_noZero} -le 12 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 13 && ${vhh_noZero} -le 18 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - fi - elif [[ ${vhh_noZero} -eq 0 ]]; then - # One hour CCPA files on HPSS have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} + # Full path to the CCPA file that was pulled and extracted above and + # placed in the raw directory. + ccpa_fp_raw="${output_path}/${ccpa_fn}" + + # One hour CCPA files have incorrect metadata in the files under the "00" + # directory from 20180718 to 20210504. After data is pulled, reorganize + # into correct valid yyyymmdd structure. + if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then + mv ${ccpa_fp_raw} ${ccpa_fp_proc} + elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then + if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then + wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s + else + mv ${ccpa_fp_raw} ${ccpa_fp_proc} + fi fi + fi fi - # Retrieve MRMS observations +# +#----------------------------------------------------------------------- +# +# Retrieve MRMS observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "MRMS" ]]; then + # Top-level MRMS directory - # raw MRMS data from HPSS - mrms_raw=${OBS_DIR}/raw # Reorganized MRMS location mrms_proc=${OBS_DIR} + # raw MRMS data from HPSS + #mrms_raw=${OBS_DIR}/raw + mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. for field in ${VAR[@]}; do + if [ "${field}" = "REFC" ]; then field_base_name="MergedReflectivityQCComposite" level="_00.50_" @@ -279,32 +342,53 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do " fi - mrms_file="$mrms_proc/${vyyyymmdd}/${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_day_dir="${mrms_proc}/${vyyyymmdd}" + mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" + +# if [[ -f "${mrms_fp}" ]]; then +# +# echo "${OBTYPE} file for field \"${field}\" exists on disk:" +# echo " mrms_fp = \"${mrms_fp}\"" +# echo "Will NOT attempt to retrieve from remote locations." + + if [[ -d "${mrms_day_dir}" ]]; then + + echo "${OBTYPE} directory for field \"${field}\" and day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir = \"${mrms_day_dir}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Will NOT attempt to retrieve the current file" + echo " mrms_fp = \"${mrms_fp}\"" + echo "from remote locations." - if [[ -f "${mrms_file}" ]]; then - echo "${OBTYPE} file exists on disk for field ${field}:\n${mrms_file}" else - echo "${OBTYPE} file does not exist on disk for field ${field}:\n${mrms_file}" - echo "Will attempt to retrieve from remote locations" + + echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" + echo " mrms_fp = \"${mrms_fp}\"" + echo "Will attempt to retrieve from remote locations." + # Create directories if necessary - if [[ ! -d "$mrms_raw/${vyyyymmdd}" ]]; then - mkdir -p $mrms_raw/${vyyyymmdd} + if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then + mkdir -p ${mrms_raw}/${vyyyymmdd} fi if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then mkdir -p $mrms_proc/${vyyyymmdd} fi + valid_time=${vyyyymmdd}${vhh} + output_path="${mrms_raw}/${vyyyymmdd}" + cd ${mrms_raw} # Pull MRMS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ + --cycle_date ${valid_time} \ --data_stores hpss \ --data_type MRMS_obs \ - --output_path $mrms_raw/${vyyyymmdd} \ + --output_path ${output_path} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -326,8 +410,13 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do fi done - - # Retrieve NDAS observations +# +#----------------------------------------------------------------------- +# +# Retrieve NDAS observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "NDAS" ]]; then # raw NDAS data from HPSS ndas_raw=${OBS_DIR}/raw @@ -363,9 +452,17 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do continue fi +echo "" +echo "HELLO AAAAA" +echo "vhh_noZero = ${vhh_noZero}" + if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then +echo "" +echo "HELLO BBBBB" if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then +echo "" +echo "HELLO CCCCC" mkdir -p $ndas_raw/${vyyyymmdd}${vhh} fi @@ -459,8 +556,13 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do fi fi - - # Retrieve NOHRSC observations +# +#----------------------------------------------------------------------- +# +# Retrieve NOHRSC observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "NOHRSC" ]]; then #NOHRSC is accumulation observations, so none to retrieve for hour zero @@ -534,7 +636,8 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC " fi # Increment to next forecast hour - # Increment to next forecast hour + + # Increment to next forecast hour echo "Finished fcst hr=${current_fcst}" current_fcst=$((${current_fcst} + 1)) @@ -542,7 +645,7 @@ done # Clean up raw, unprocessed observation files -rm -rf ${OBS_DIR}/raw +#rm -rf ${OBS_DIR}/raw # #----------------------------------------------------------------------- From 307f92ee1f998f303d93859ebd0b26bc63db1385 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 17 Jul 2024 09:53:55 -0600 Subject: [PATCH 007/131] Changes to make get_obs_mrms tasks to work for mulitple cycles and without performing unnecessary repeated pulls. --- scripts/exregional_get_verif_obs.sh | 87 ++++++++++++++--------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 564860899f..c86ba6796a 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -89,7 +89,7 @@ set -x # hh (00 through 05). If using custom staged data, you will have to # rename the files accordingly. # -# If data is retrieved from HPSS, it will automatically staged by this +# If data is retrieved from HPSS, it will be automatically staged by this # this script. # # @@ -293,13 +293,15 @@ echo "ihh = ${ihh}" # One hour CCPA files have incorrect metadata in the files under the "00" # directory from 20180718 to 20210504. After data is pulled, reorganize # into correct valid yyyymmdd structure. + #mv_or_cp="mv" + mv_or_cp="cp" if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then - mv ${ccpa_fp_raw} ${ccpa_fp_proc} + ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s else - mv ${ccpa_fp_raw} ${ccpa_fp_proc} + ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} fi fi @@ -320,59 +322,53 @@ echo "ihh = ${ihh}" # Reorganized MRMS location mrms_proc=${OBS_DIR} - # raw MRMS data from HPSS - #mrms_raw=${OBS_DIR}/raw - mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" - - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do + mrms_day_dir="${mrms_proc}/${vyyyymmdd}" - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} + if [[ -d "${mrms_day_dir}" ]]; then - Valid options are 'REFC', 'RETOP'. -" - fi + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir = \"${mrms_day_dir}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve the current data from remote locations" - mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - mrms_day_dir="${mrms_proc}/${vyyyymmdd}" - mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" - -# if [[ -f "${mrms_fp}" ]]; then -# -# echo "${OBTYPE} file for field \"${field}\" exists on disk:" -# echo " mrms_fp = \"${mrms_fp}\"" -# echo "Will NOT attempt to retrieve from remote locations." - - if [[ -d "${mrms_day_dir}" ]]; then + else - echo "${OBTYPE} directory for field \"${field}\" and day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir = \"${mrms_day_dir}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Will NOT attempt to retrieve the current file" - echo " mrms_fp = \"${mrms_fp}\"" - echo "from remote locations." + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + for field in ${VAR[@]}; do + + # raw MRMS data from HPSS + #mrms_raw=${OBS_DIR}/raw + #mrms_raw="${mrms_proc}/raw_${field}_${iyyyymmddhh}" + mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + + if [ "${field}" = "REFC" ]; then + field_base_name="MergedReflectivityQCComposite" + level="_00.50_" + elif [ "${field}" = "RETOP" ]; then + field_base_name="EchoTop" + level="_18_00.50_" + else + echo "Invalid field: ${field}" + print_err_msg_exit "\ + Invalid field specified: ${field} + + Valid options are 'REFC', 'RETOP'. +" + fi - else + mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" echo " mrms_fp = \"${mrms_fp}\"" echo "Will attempt to retrieve from remote locations." - # Create directories if necessary + # Create directories if necessary. if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then mkdir -p ${mrms_raw}/${vyyyymmdd} fi - if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then - mkdir -p $mrms_proc/${vyyyymmdd} + if [[ ! -d "${mrms_proc}/${vyyyymmdd}" ]]; then + mkdir -p ${mrms_proc}/${vyyyymmdd} fi valid_time=${vyyyymmdd}${vhh} @@ -408,8 +404,9 @@ echo "ihh = ${ihh}" hour=$((${hour} + 1)) # hourly increment done - fi - done + done + + fi # #----------------------------------------------------------------------- # From be542168f738cb7f3b93594bd62413ca30d4428b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 17 Jul 2024 23:20:49 -0600 Subject: [PATCH 008/131] Minor improvement for consistency. --- parm/wflow/verify_pre.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index b7511bf63f..da43336a0d 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -64,12 +64,12 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre + command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' From af2ab4c531aa5ca80a513f8fd164485862217b68 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 01:39:37 -0600 Subject: [PATCH 009/131] New version of CCPA obs fetching (rename variables, include lots more comments). --- scripts/exregional_get_verif_obs.sh | 264 ++++++++++++++++++++-------- 1 file changed, 193 insertions(+), 71 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index c86ba6796a..957770e5f1 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -42,8 +42,8 @@ set -x # # {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 # -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# If data is retrieved from HPSS, it will be automatically staged by this +# script. # # Notes about the data and how it's used for verification: # @@ -53,7 +53,7 @@ set -x # 2. There is a problem with the valid time in the metadata for files # valid from 19 - 00 UTC (or files under the '00' directory). This is # accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accouned +# have manually staged data on disk you should be sure this is accounted # for. See in-line comments below for details. # # @@ -167,44 +167,45 @@ echo "ihh = ${ihh}" # if [[ ${OBTYPE} == "CCPA" ]]; then - # CCPA is accumulation observations, so for hour 0 there are no files - # to retrieve. + # CCPA is accumulation observations. We do not need to retrieve any + # observed accumulations at forecast hour 0 because there aren't yet + # any accumulations in the forecast(s) to compare it to. if [[ ${current_fcst} -eq 0 ]]; then current_fcst=$((${current_fcst} + 1)) continue fi - # Accumulation is for accumulation of CCPA data to pull (hardcoded to - # 01h, see note above). + # CCPA accumulation period to consider. Here, we only retrieve data for + # 01h accumulations (see note above). Other accumulations (03h, 06h, 24h) + # are obtained elsewhere in the workflow by adding up these 01h accumulations. accum=01 - # Directory in which the daily subdirectories containing the CCPA grib2 - # files will appear after this script is done. Make sure this exists. - ccpa_proc=${OBS_DIR} - if [[ ! -d "${ccpa_proc}/${vyyyymmdd}" ]]; then - mkdir -p ${ccpa_proc}/${vyyyymmdd} - fi - - # File name within the HPSS archive file. Note that this only includes - # the valid hour in its name; the year, month, and day are specified in - # the name of the directory in which it is located within the archive. + # Base directory in which the daily subdirectories containing the CCPA + # grib2 files will appear after this script is done, and the daily such + # subdirectory for the current valid time (year, month, and day). We + # refer to these as the "processed" base and daily subdirectories because + # they contain the final files after all processing by this script is + # complete. + ccpa_basedir_proc=${OBS_DIR} + ccpa_day_dir_proc="${ccpa_basedir_proc}/${vyyyymmdd}" + # Make sure these directories exist. + mkdir -p ${ccpa_day_dir_proc} + + # Name of the grib2 file to extract from the archive (tar) file. Note + # that this only contains the valid hour; the valid year, month, and day + # are specified in the name of the directory within the archive in which + # the file is located. ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - # Full path to final location of the CCPA grib2 file for the current valid - # time. Note that this path includes the valid date (year, month, and day) - # information in the name of a subdirectory and the valid hour-of-day in - # the name of the file. - ccpa_fp_proc="${ccpa_proc}/${vyyyymmdd}/${ccpa_fn}" - - # Temporary staging directory for raw CCPA files from HPSS. These "raw" - # directories are temporary directories in which archive files from HPSS - # are placed and files within those archives extracted. Note that the - # name of this subdirectory is cycle-specific to avoid other get_obs_ccpa - # workflow tasks (i.e. those corresponding to cycles other than the current - # one) writing into the same directory. - ccpa_raw="${ccpa_proc}/raw_${iyyyymmddhh}" - - # Check if file exists on disk; if not, pull it. + # Full path to the location of the processed CCPA grib2 file for the + # current valid time. Note that this path includes the valid date (year, + # month, and day) information in the name of a subdirectory and the valid + # hour-of-day in the name of the file. + ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" + + # Check if the CCPA grib2 file for the current valid time already exists + # at its procedded location on disk. If so, skip and go to the next valid + # time. If not, pull it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -216,34 +217,152 @@ echo "ihh = ${ihh}" echo "${OBTYPE} file does not exist on disk:" echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" echo "Will attempt to retrieve from remote locations." + # + #----------------------------------------------------------------------- + # + # Below, we will use the retrieve_data.py script to retrieve the CCPA + # grib2 file from a data store (e.g. HPSS). Before doing so, note the + # following: + # + # * The daily archive (tar) file containing CCPA obs has a name of the + # form + # + # [PREFIX].YYYYMMDD.tar + # + # where YYYYMMDD is a given year, month, and day combination, and + # [PREFIX] is a string that is not relevant to the discussion here + # (the value it can take on depends on which of several time periods + # YYYYMMDD falls in, and the retrieve_data.py tries various values + # until it finds one for which a tar file exists). Unintuitively, this + # archive file contains accumulation data for valid times starting at + # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current + # day (YYYYMMDD). In other words, the valid times of the contents of + # this archive file are shifted back by 6 hours relative to the time + # string appearing in the name of the file. See section "DETAILS..." + # for a detailed description of the directory structure in the CCPA + # archive files. + # + # * We call retrieve_data.py in a temporary cycle-specific subdirectory + # in order to prevent get_obs_ccpa tasks for different cycles from + # clobbering each other's output. We refer to this as the "raw" CCPA + # base directory because it contains files as they are found in the + # archives before any processing by this script. + # + # * In each (cycle-specific) raw base directory, the data is arranged in + # daily subdirectories with the same timing as in the archive (tar) + # files (which are described in the section "DETAILS..." below). In + # particular, each daily subdirectory has the form YYYYMDD, and it may + # contain CCPA grib2 files for accumulations valid at hour 19 of the + # previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). + # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the + # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer + # to these as raw daily (sub)directories to distinguish them from the + # processed daily subdirectories under the processed (final) CCPA base + # directory (ccpa_basedir_proc). + # + # * For a given cycle, some of the valid times at which there is forecast + # output may not have a corresponding file under the raw base directory + # for that cycle. This is because another cycle that overlaps this cycle + # has already obtained the grib2 CCPA file for that valid time and placed + # it in its processed location; as a result, the retrieveal of that grib2 + # file for this cycle is skipped. + # + # * To obtain a more intuitive temporal arrangement of the data in the + # processed CCPA directory structure than the temporal arrangement used + # in the archives and raw directories, we process the raw files such + # that the data in the processed directory structure is shifted forward + # in time 6 hours relative to the data in the archives and raw directories. + # This results in a processed base directory that, like the raw base + # directory, also contains daily subdirectories of the form YYYYMMDD, + # but each such subdirectory may only contain CCPA data at valid hours + # within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but + # may not contain data that is valid on the previous, next, or any other + # day). + # + # * For data between 20180718 and 20210504, the 01h accumulation data + # (which is the only accumulation we are retrieving) have incorrect + # metadata under the "00" directory in the archive files (meaning for + # hour 00 and hours 19-23, which are the ones in the "00" directory). + # Below, we use wgrib2 to make a correction for this when transferring + # (moving or copying) grib2 files from the raw daily directories to + # the processed daily directories. + # + # + # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES + # ---------------------------------------------------------- + # + # The daily archive file containing CCPA obs is named + # + # [PREFIX].YYYYMMDD.tar + # + # This file contains accumulation data for valid times starting at hour + # 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day + # (YYYYMMDD). In particular, when untarred, the daily archive file + # expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and + # 18 subdirectories contain grib2 files for accumulations valid at or + # below the hour-of-day given by the subdirectory name (and on YYYYMMDD). + # For example, the 06 directory contains data valid at: + # + # * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; + # * YYYYMMDD[03, 06] for 03h accumulations; + # * YYYYMMDD[06] for 06h accumulations. + # + # The valid times for the data in the 12 and 18 subdirectories are + # analogous. However, the 00 subdirectory is different in that it + # contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE + # this time, i.e. the data for valid times other than YYYYMMDD00 are on + # the PREVIOUS day. Thus, the 00 subdirectory contains data valid at + # (note the DD-1, meaning one day prior): + # + # * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; + # * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; + # * YYYYMMDD00 for 06h accumulations. + # + #----------------------------------------------------------------------- + # - # Create the necessary raw (sub)directories on disk. Note that we need - # to create a subdirectory for 1 day + the current valid date because - # that is needed to get around a metadata error in the CCPA files on HPSS - # (in particular, one hour CCPA files have incorrect metadata in the files - # under the "00" directory from 20180718 to 20210504). - if [[ ! -d "${ccpa_raw}/${vyyyymmdd}" ]]; then - mkdir -p ${ccpa_raw}/${vyyyymmdd} - fi - if [[ ! -d "${ccpa_raw}/${vyyyymmdd_p1}" ]]; then - mkdir -p ${ccpa_raw}/${vyyyymmdd_p1} - fi - - valid_time=${vyyyymmdd}${vhh} - output_path="${ccpa_raw}/${vyyyymmdd}" - if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then + # Set parameters for retrieving CCPA data using retrieve_data.py. + # Definitions: + # + # valid_time: + # The valid time in the name of the archive (tar) file from which data + # will be pulled. Due to the way the data is arranged in the CCPA archive + # files (as described above), for valid hours 19 to 23 of the current day, + # this must be set to the corresponding valid time on the NEXT day. + # + # ccpa_basedir_raw: + # Raw base directory that will contain the raw daily subdirectory in which + # the retrieved CCPA grib2 file will be placed. Note that this must be + # cycle-dependent (where the cycle is given by the variable iyyyymmddhh) + # to avoid get_obs_ccpa workflow tasks for other cycles writing to the + # same directories/files. Note also that this doesn't have to depend on + # the current valid hour (0-18 vs. 19-23), but for clarity and ease of + # debugging, here we do make it valid-hour-dependent. + # + # ccpa_day_dir_raw: + # Raw daily subdirectory under the raw base directory. This is dependent + # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) + # in order to maintain the same data timing arrangement in the raw daily + # directories as in the archive files. + # + if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then + valid_time=${vyyyymmdd}${vhh} + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" + elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then valid_time=${vyyyymmdd_p1}${vhh} - output_path="${ccpa_raw}/${vyyyymmdd_p1}" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}_vhh19-23" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" fi + mkdir -p ${ccpa_day_dir_raw} - # The retrieve_data.py script below uses the current working directory as - # the location into which to extract the contents of the HPSS archive (tar) - # file. Thus, if there are multiple get_obs_ccpa tasks running (i.e. ones - # for different cycles), they will be extracting files into the same (current) - # directory. That causes errors in the workflow. To avoid this, change - # location to the raw directory. This will avoid such errors because the - # raw directory has a cycle-specific name. - cd ${ccpa_raw} + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_ccpa tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the CCPA tar files into the directory it was called from, + # which is the working directory of this script right before retrieve_data.py + # is called. + cd ${ccpa_basedir_raw} # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file # corresponding to the current valid time (valid_time). @@ -255,7 +374,7 @@ echo "ihh = ${ihh}" --cycle_date ${valid_time} \ --data_stores hpss \ --data_type CCPA_obs \ - --output_path ${output_path} \ + --output_path ${ccpa_day_dir_raw} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -266,38 +385,41 @@ echo "ihh = ${ihh}" ${cmd} " - # Move CCPA file to its final location. + # Create the processed CCPA grib2 files. This usually consists of just + # moving or copying the raw file to its processed location, but for valid + # times between 20180718 and 20210504, it involves using wgrib2 to correct + # an error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. # # Since this script is part of a workflow, other tasks (for other cycles) # that call this script may have extracted and placed the current file - # in its final location between the time we checked for its existence - # above above (and didn't find it) and now. This can happen because - # there can be overlap between the verification times for the current - # cycle and those of other cycles. For this reason, check again for the - # existence of the file in its final location. If it's already been - # created by another task, don't bother to move it from its raw location - # to its final location. + # in its processed location between the time we checked for its existence + # above (and didn't find it) and now. This can happen because there can + # be overlap between the verification times for the current cycle and + # those of other cycles. For this reason, check again for the existence + # of the file in its processed location. If it has already been created + # by another task, don't bother to create it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" - echo "It was likely created by a get_obs_ccpa workflow task for another cycle." - echo "NOT moving file from its temporary (raw) location to its final location." + echo "It was likely created by a get_obs_ccpa workflow task for another cycle that overlaps the current one." + echo "NOT moving or copying file from its raw location to its processed location." else # Full path to the CCPA file that was pulled and extracted above and # placed in the raw directory. - ccpa_fp_raw="${output_path}/${ccpa_fn}" + ccpa_fp_raw="${ccpa_day_dir_raw}/${ccpa_fn}" - # One hour CCPA files have incorrect metadata in the files under the "00" - # directory from 20180718 to 20210504. After data is pulled, reorganize - # into correct valid yyyymmdd structure. #mv_or_cp="mv" mv_or_cp="cp" if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then + # One hour CCPA files have incorrect metadata in the files under the "00" + # directory from 20180718 to 20210504. After data is pulled, reorganize + # into correct valid yyyymmdd structure. if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s else From 85c3d58a0c855ea347a2350c62b0eae88ac38bee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 13:53:26 -0600 Subject: [PATCH 010/131] Minor changes to ccpa section. --- scripts/exregional_get_verif_obs.sh | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 957770e5f1..1e49d1f45c 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -138,6 +138,11 @@ fcst_length=$((10#${fcst_length})) current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do + +echo +echo "HELLO GGGGGGGG" +echo "current_fcst = ${current_fcst}" + # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") @@ -191,10 +196,8 @@ echo "ihh = ${ihh}" # Make sure these directories exist. mkdir -p ${ccpa_day_dir_proc} - # Name of the grib2 file to extract from the archive (tar) file. Note - # that this only contains the valid hour; the valid year, month, and day - # are specified in the name of the directory within the archive in which - # the file is located. + # Name of the grib2 file to extract from the archive (tar) file as well + # as the name of the processed grib2 file. ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" # Full path to the location of the processed CCPA grib2 file for the @@ -391,14 +394,14 @@ echo "ihh = ${ihh}" # an error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. # - # Since this script is part of a workflow, other tasks (for other cycles) - # that call this script may have extracted and placed the current file - # in its processed location between the time we checked for its existence - # above (and didn't find it) and now. This can happen because there can - # be overlap between the verification times for the current cycle and - # those of other cycles. For this reason, check again for the existence - # of the file in its processed location. If it has already been created - # by another task, don't bother to create it. + # Since this script is part of a workflow, another get_obs_ccpa task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_ccpa + # task, don't bother to recreate it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" From b7c6f00d7b75f0534fd1e2789e90d09c787c4309 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 13:56:25 -0600 Subject: [PATCH 011/131] Changes for MRMS. --- scripts/exregional_get_verif_obs.sh | 213 +++++++++++++++++++--------- 1 file changed, 143 insertions(+), 70 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 1e49d1f45c..3ae8405e36 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -442,96 +442,169 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "MRMS" ]]; then - # Top-level MRMS directory + # Base directory in which the daily subdirectories containing the MRMS + # grib2 files for REFC (composite reflectivity) and REFC (echo top) will + # appear after this script is done, and the daily such subdirectory for + # the current valid time (year, month, and day). We refer to these as + # the "processed" base and daily subdirectories because they contain the + # final files after all processing by this script is complete. + mrms_basedir_proc=${OBS_DIR} + mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" + + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + for field in ${VAR[@]}; do + + # Set parameters needed in setting the MRMS grib2 file name to create in the day directory. + if [ "${field}" = "REFC" ]; then + file_base_name="MergedReflectivityQCComposite" + level="_00.50_" + elif [ "${field}" = "RETOP" ]; then + file_base_name="EchoTop" + level="_18_00.50_" + else + echo "Invalid field: ${field}" + print_err_msg_exit "\ + Invalid field specified: ${field} + + Valid options are 'REFC', 'RETOP'. +" + fi - # Reorganized MRMS location - mrms_proc=${OBS_DIR} +# Name of the MRMS grib2 file for the current field and valid time that +# will appear in the processed daily subdirectory after this script finishes. + mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + +# Full path to the processed MRMS grib2 file for the current field and +# valid time. + mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" - mrms_day_dir="${mrms_proc}/${vyyyymmdd}" +# Check if the processed MRMS grib2 file for the current field and valid +# time already exists on disk. If so, skip and go to the next valid time. +# If not, pull it. + if [[ -f "${mrms_fp_proc}" ]]; then - if [[ -d "${mrms_day_dir}" ]]; then + echo "${OBTYPE} file exists on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir = \"${mrms_day_dir}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + else - else + echo "${OBTYPE} file does not exist on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do - - # raw MRMS data from HPSS - #mrms_raw=${OBS_DIR}/raw - #mrms_raw="${mrms_proc}/raw_${field}_${iyyyymmddhh}" - mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + # Raw base directory that will contain the raw daily subdirectory in which + # the gzipped MRMS grib2 retrieved from archive file will be placed. Note + # that the name of this directory depends on (contains) the valid year, + # month, and day (but not on the cycle, i.e. not on iyyyymmddhh) in order + # to avoid having get_obs_mrms tasks from other cycles clobbering the + # output from this one. It is also possible to make this directory name + # depend instead on the cycle, but that turns out to cause an inefficiency + # in that get_obs_mrms tasks for different cycles will not be able to + # detect that another cycle has already retrieved the data for the current + # valid day from an archive and will unnecessarily repeat the retrieval. + #mrms_basedir_raw="${mrms_basedir_proc}/raw_${iyyyymmddhh}" + mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" + # Raw daily subdirectory under the raw base directory. + mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" + + +# Check if the raw daily directory already exists on disk. If so, it +# means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP +# and for all times (hours, minutes, and seconds) in the current valid +# day -- have already been or are in the process of being retrieved from +# the archive (tar) files. If so, skip the retrieval process. If not, +# proceed to retrieve all the files and place them in the raw daily +# directory. + if [[ -d "${mrms_day_dir_raw}" ]]; then + +# Change the following comments. + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" + mkdir -p ${mrms_day_dir_raw} + valid_time=${vyyyymmdd}${vhh} + + cd ${mrms_basedir_raw} + +# Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 +# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, +# and seconds) in the current valid day -- and place them in the raw daily +# directory. Note that this will pull both the REFC and RETOP files in +# one call. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${valid_time} \ + --data_stores hpss \ + --data_type MRMS_obs \ + --output_path ${mrms_day_dir_raw} \ + --summary_file ${logfile}" + + echo "CALLING: ${cmd}" + + $cmd || print_err_msg_exit "\ + Could not retrieve MRMS data from HPSS + + The following command exited with a non-zero exit status: + ${cmd} +" - echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" - echo " mrms_fp = \"${mrms_fp}\"" - echo "Will attempt to retrieve from remote locations." +# Create a flag file that can be used to confirm the completion of the +# retrieval of all files for the current valid day. + touch ${mrms_day_dir_raw}/pull_completed.txt - # Create directories if necessary. - if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then - mkdir -p ${mrms_raw}/${vyyyymmdd} - fi - if [[ ! -d "${mrms_proc}/${vyyyymmdd}" ]]; then - mkdir -p ${mrms_proc}/${vyyyymmdd} fi - valid_time=${vyyyymmdd}${vhh} - output_path="${mrms_raw}/${vyyyymmdd}" +# Make sure the retrieval process for the current day (which may have +# been executed above for this cycle or by another cycle) has completed +# by checking for the existence of the flag file that marks complettion. +# If not, keep checking until the flag file shows up. + while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do + echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." + sleep 5s + done - cd ${mrms_raw} - # Pull MRMS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${output_path} \ - --summary_file ${logfile}" +# Since this script is part of a workflow, another get_obs_mrms task (i.e. +# for another cycle) may have extracted and placed the current file in its +# processed location between the time we checked for its existence above +# (and didn't find it) and now. This can happen because there can be +# overlap between the verification times for the current cycle and those +# of other cycles. For this reason, check again for the existence of the +# processed file. If it has already been created by another get_obs_mrms +# task, don't bother to recreate it. + if [[ -f "${mrms_fp_proc}" ]]; then - echo "CALLING: ${cmd}" + echo "${OBTYPE} file exists on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS + else - The following command exited with a non-zero exit status: - ${cmd} -" +# Search the raw daily directory for the current valid day to find the +# gizipped MRMS grib2 file whose time stamp (in the file name) is closest +# to the current valid day and hour. Then unzip that file and copy it +# to the processed daily directory, in the process renaming it to replace +# the minutes and hours in the file name with "0000". + valid_time=${vyyyymmdd}${vhh} + python ${USHdir}/mrms_pull_topofhour.py \ + --valid_time ${valid_time} \ + --outdir ${mrms_basedir_proc} \ + --source ${mrms_basedir_raw} \ + --product ${file_base_name} - hour=0 - while [[ ${hour} -le 23 ]]; do - HH=$(printf "%02d" $hour) - echo "hour=${hour}" - python ${USHdir}/mrms_pull_topofhour.py --valid_time ${vyyyymmdd}${HH} --outdir ${mrms_proc} --source ${mrms_raw} --product ${field_base_name} - hour=$((${hour} + 1)) # hourly increment - done + fi - done + fi - fi + done # #----------------------------------------------------------------------- # From 2bc8ed1c65bd61c52b490d838dd49afb4d11c95b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 16:04:45 -0600 Subject: [PATCH 012/131] Clean up comments in the MRMS section. --- scripts/exregional_get_verif_obs.sh | 130 +++++++++++++++------------- 1 file changed, 70 insertions(+), 60 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 3ae8405e36..254b5166a3 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -444,17 +444,17 @@ echo "ihh = ${ihh}" # Base directory in which the daily subdirectories containing the MRMS # grib2 files for REFC (composite reflectivity) and REFC (echo top) will - # appear after this script is done, and the daily such subdirectory for - # the current valid time (year, month, and day). We refer to these as - # the "processed" base and daily subdirectories because they contain the - # final files after all processing by this script is complete. + # be located after this script is done, and the daily such subdirectory + # for the current valid time (year, month, and day). We refer to these + # as the "processed" base and daily subdirectories because they contain + # the final files after all processing by this script is complete. mrms_basedir_proc=${OBS_DIR} mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + # Loop over the fields (REFC and RETOP). for field in ${VAR[@]}; do - # Set parameters needed in setting the MRMS grib2 file name to create in the day directory. + # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then file_base_name="MergedReflectivityQCComposite" level="_00.50_" @@ -470,17 +470,20 @@ echo "ihh = ${ihh}" " fi -# Name of the MRMS grib2 file for the current field and valid time that -# will appear in the processed daily subdirectory after this script finishes. + # Name of the MRMS grib2 file for the current field and valid time that + # will appear in the processed daily subdirectory after this script finishes. + # This is the name of the processed file. Note that this is generally + # not the name of the gzipped grib2 files that may be retrieved below + # from archive files using the retrieve_data.py script. mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" -# Full path to the processed MRMS grib2 file for the current field and -# valid time. + # Full path to the processed MRMS grib2 file for the current field and + # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" -# Check if the processed MRMS grib2 file for the current field and valid -# time already exists on disk. If so, skip and go to the next valid time. -# If not, pull it. + # Check if the processed MRMS grib2 file for the current field and valid + # time already exists on disk. If so, skip this valid time and go to the + # next one. If not, pull it. if [[ -f "${mrms_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -493,50 +496,57 @@ echo "ihh = ${ihh}" echo " mrms_fp_proc = \"${mrms_fp_proc}\"" echo "Will attempt to retrieve from remote locations." - # Raw base directory that will contain the raw daily subdirectory in which - # the gzipped MRMS grib2 retrieved from archive file will be placed. Note - # that the name of this directory depends on (contains) the valid year, - # month, and day (but not on the cycle, i.e. not on iyyyymmddhh) in order - # to avoid having get_obs_mrms tasks from other cycles clobbering the - # output from this one. It is also possible to make this directory name - # depend instead on the cycle, but that turns out to cause an inefficiency - # in that get_obs_mrms tasks for different cycles will not be able to - # detect that another cycle has already retrieved the data for the current - # valid day from an archive and will unnecessarily repeat the retrieval. - #mrms_basedir_raw="${mrms_basedir_proc}/raw_${iyyyymmddhh}" + # Base directory that will contain the daily subdirectories in which the + # gzipped MRMS grib2 files retrieved from archive files will be placed, + # and the daily subdirectory for the current valid year, month, and day. + # We refer to these as the "raw" MRMS base and daily directories because + # they contain files as they are found in the archives before any processing + # by this script. + # + # Note that the name of the raw base directory depends on (contains) the + # valid year, month, and day (but not on the cycle, i.e. not on iyyyymmddhh) + # in order to avoid having get_obs_mrms tasks from other cycles clobbering + # the output from this one. It is also possible to make the name of this + # directory name depend instead on the cycle, but that turns out to cause + # an inefficiency in that get_obs_mrms tasks for different cycles will + # not be able to detect that another cycle has already retrieved the data + # for the current valid day will unnecessarily repeat the retrieval. mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" - - # Raw daily subdirectory under the raw base directory. mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" -# Check if the raw daily directory already exists on disk. If so, it -# means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP -# and for all times (hours, minutes, and seconds) in the current valid -# day -- have already been or are in the process of being retrieved from -# the archive (tar) files. If so, skip the retrieval process. If not, -# proceed to retrieve all the files and place them in the raw daily -# directory. + # Check if the raw daily directory already exists on disk. If so, it + # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP + # and for all times (hours, minutes, and seconds) in the current valid + # day -- have already been or are in the process of being retrieved from + # the archive (tar) files. If so, skip the retrieval process. If not, + # proceed to retrieve all the files and place them in the raw daily + # directory. if [[ -d "${mrms_day_dir_raw}" ]]; then -# Change the following comments. echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." else mkdir -p ${mrms_day_dir_raw} valid_time=${vyyyymmdd}${vhh} + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_mrms tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the MRMS tar files into the directory it was called from, + # which is the working directory of this script right before retrieve_data.py + # is called. cd ${mrms_basedir_raw} -# Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 -# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, -# and seconds) in the current valid day -- and place them in the raw daily -# directory. Note that this will pull both the REFC and RETOP files in -# one call. + # Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 + # files -- i.e. for both REFC and RETOP and for all times (hours, minutes, + # and seconds) in the current valid day -- and place them in the raw daily + # directory. Note that this will pull both the REFC and RETOP files in + # one call. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -557,29 +567,29 @@ echo "ihh = ${ihh}" ${cmd} " -# Create a flag file that can be used to confirm the completion of the -# retrieval of all files for the current valid day. + # Create a flag file that can be used to confirm the completion of the + # retrieval of all files for the current valid day. touch ${mrms_day_dir_raw}/pull_completed.txt fi -# Make sure the retrieval process for the current day (which may have -# been executed above for this cycle or by another cycle) has completed -# by checking for the existence of the flag file that marks complettion. -# If not, keep checking until the flag file shows up. + # Make sure the retrieval process for the current day (which may have + # been executed above for this cycle or by another cycle) has completed + # by checking for the existence of the flag file that marks completion. + # If not, keep checking until the flag file shows up. while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." sleep 5s done -# Since this script is part of a workflow, another get_obs_mrms task (i.e. -# for another cycle) may have extracted and placed the current file in its -# processed location between the time we checked for its existence above -# (and didn't find it) and now. This can happen because there can be -# overlap between the verification times for the current cycle and those -# of other cycles. For this reason, check again for the existence of the -# processed file. If it has already been created by another get_obs_mrms -# task, don't bother to recreate it. + # Since this script is part of a workflow, another get_obs_mrms task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_mrms + # task, don't bother to recreate it. if [[ -f "${mrms_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -588,11 +598,11 @@ echo "ihh = ${ihh}" else -# Search the raw daily directory for the current valid day to find the -# gizipped MRMS grib2 file whose time stamp (in the file name) is closest -# to the current valid day and hour. Then unzip that file and copy it -# to the processed daily directory, in the process renaming it to replace -# the minutes and hours in the file name with "0000". + # Search the raw daily directory for the current valid day to find the + # gizipped MRMS grib2 file whose time stamp (in the file name) is closest + # to the current valid day and hour. Then unzip that file and copy it + # to the processed daily directory, in the process renaming it to replace + # the minutes and hours in the file name with "0000". valid_time=${vyyyymmdd}${vhh} python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${valid_time} \ From 184534283613620a0d4d88cee26d5cd03b45dc99 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 18:29:44 -0600 Subject: [PATCH 013/131] Minor fixes to NDAS section. --- scripts/exregional_get_verif_obs.sh | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 254b5166a3..c31795441a 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -630,7 +630,7 @@ echo "ihh = ${ihh}" ndas_proc=${OBS_DIR} # Check if file exists on disk - ndas_file="$ndas_proc/prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" if [[ -f "${ndas_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${ndas_file}" @@ -643,7 +643,7 @@ echo "ihh = ${ihh}" # # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have to obs files valid for the same time: + # This means that every six hours we have two obs files valid for the same time: # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even # though the earlier files are larger, this is because the time window is larger) @@ -657,18 +657,24 @@ echo "ihh = ${ihh}" continue fi + # Whether to move or copy extracted files from the raw directories to their + # final locations. + #mv_or_cp="mv" + mv_or_cp="cp" + echo "" echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then + if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || \ + ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then echo "" echo "HELLO BBBBB" if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then echo "" echo "HELLO CCCCC" - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} + mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi # Pull NDAS data from HPSS @@ -680,7 +686,7 @@ echo "HELLO CCCCC" --cycle_date ${vyyyymmdd}${vhh} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ + --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -692,8 +698,8 @@ echo "HELLO CCCCC" ${cmd} " - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc + if [[ ! -d "${ndas_proc}" ]]; then + mkdir -p ${ndas_proc} fi # copy files from the previous 6 hours ("tm" means "time minus") @@ -702,7 +708,8 @@ echo "HELLO CCCCC" vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} + ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ + ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} done fi @@ -720,8 +727,8 @@ echo "HELLO CCCCC" vhh=${vhh_noZero} fi - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} + if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then + mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi # Pull NDAS data from HPSS @@ -733,7 +740,7 @@ echo "HELLO CCCCC" --cycle_date ${vyyyymmdd}${vhh} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ + --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -745,8 +752,8 @@ echo "HELLO CCCCC" ${cmd} " - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc + if [[ ! -d "${ndas_proc}" ]]; then + mkdir -p ${ndas_proc} fi for tm in $(seq 1 6); do @@ -755,7 +762,8 @@ echo "HELLO CCCCC" vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} + ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ + ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} done fi From 8c38c19a54642506b4a6dca673a6aaabf667e066 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 10:21:58 -0600 Subject: [PATCH 014/131] Change names of raw directories for CCPA and MRMS to indicate whether they're per-cycle or per-day. --- scripts/exregional_get_verif_obs.sh | 77 ++++++++++++++--------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index c31795441a..4427434b1c 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -36,7 +36,7 @@ set -x # # CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # @@ -46,8 +46,8 @@ set -x # script. # # Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will +# +# 1. Accumulation is currently hardcoded to 01h. The verification will # use MET/pcp-combine to sum 01h files into desired accumulations. # # 2. There is a problem with the valid time in the metadata for files @@ -59,17 +59,17 @@ set -x # # MRMS (Multi-Radar Multi-Sensor) radar observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# +# # Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity # data and EchoTop_18_00.50_ for echo top data. If data is not available # at the top of the hour, you should rename the file closest in time to # your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. +# "ush/mrms_pull_topofhour.py" is provided for this purpose. # # If data is retrieved from HPSS, it will automatically staged by this # this script. @@ -77,30 +77,30 @@ set -x # # NDAS (NAM Data Assimilation System) conventional observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# +# # Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is # either 00, 06, 12, or 18, and prevhour is the number of hours prior to # hh (00 through 05). If using custom staged data, you will have to # rename the files accordingly. -# +# # If data is retrieved from HPSS, it will be automatically staged by this # this script. # # # NOHRSC snow accumulation observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# +# # where AA is the 2-digit accumulation duration in hours: 06 or 24 # # METplus is configured to verify snowfall using 06- and 24-h accumulated @@ -143,7 +143,7 @@ echo echo "HELLO GGGGGGGG" echo "current_fcst = ${current_fcst}" - # Calculate valid date info using date utility + # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) @@ -176,7 +176,7 @@ echo "ihh = ${ihh}" # observed accumulations at forecast hour 0 because there aren't yet # any accumulations in the forecast(s) to compare it to. if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) + current_fcst=$((current_fcst + 1)) continue fi @@ -235,7 +235,7 @@ echo "ihh = ${ihh}" # where YYYYMMDD is a given year, month, and day combination, and # [PREFIX] is a string that is not relevant to the discussion here # (the value it can take on depends on which of several time periods - # YYYYMMDD falls in, and the retrieve_data.py tries various values + # YYYYMMDD falls in, and the retrieve_data.py tries various values # until it finds one for which a tar file exists). Unintuitively, this # archive file contains accumulation data for valid times starting at # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current @@ -247,7 +247,7 @@ echo "ihh = ${ihh}" # # * We call retrieve_data.py in a temporary cycle-specific subdirectory # in order to prevent get_obs_ccpa tasks for different cycles from - # clobbering each other's output. We refer to this as the "raw" CCPA + # clobbering each other's output. We refer to this as the "raw" CCPA # base directory because it contains files as they are found in the # archives before any processing by this script. # @@ -260,9 +260,9 @@ echo "ihh = ${ihh}" # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer # to these as raw daily (sub)directories to distinguish them from the - # processed daily subdirectories under the processed (final) CCPA base + # processed daily subdirectories under the processed (final) CCPA base # directory (ccpa_basedir_proc). - # + # # * For a given cycle, some of the valid times at which there is forecast # output may not have a corresponding file under the raw base directory # for that cycle. This is because another cycle that overlaps this cycle @@ -274,7 +274,7 @@ echo "ihh = ${ihh}" # processed CCPA directory structure than the temporal arrangement used # in the archives and raw directories, we process the raw files such # that the data in the processed directory structure is shifted forward - # in time 6 hours relative to the data in the archives and raw directories. + # in time 6 hours relative to the data in the archives and raw directories. # This results in a processed base directory that, like the raw base # directory, also contains daily subdirectories of the form YYYYMMDD, # but each such subdirectory may only contain CCPA data at valid hours @@ -293,7 +293,7 @@ echo "ihh = ${ihh}" # # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES # ---------------------------------------------------------- - # + # # The daily archive file containing CCPA obs is named # # [PREFIX].YYYYMMDD.tar @@ -345,16 +345,16 @@ echo "ihh = ${ihh}" # ccpa_day_dir_raw: # Raw daily subdirectory under the raw base directory. This is dependent # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) - # in order to maintain the same data timing arrangement in the raw daily + # in order to maintain the same data timing arrangement in the raw daily # directories as in the archive files. # if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then valid_time=${vyyyymmdd}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then valid_time=${vyyyymmdd_p1}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}_vhh19-23" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" fi mkdir -p ${ccpa_day_dir_raw} @@ -362,7 +362,7 @@ echo "ihh = ${ihh}" # Before calling retrieve_data.py, change location to the raw base # directory to avoid get_obs_ccpa tasks for other cycles from clobbering # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the CCPA tar files into the directory it was called from, + # extracts the CCPA tar files into the directory it was called from, # which is the working directory of this script right before retrieve_data.py # is called. cd ${ccpa_basedir_raw} @@ -402,7 +402,7 @@ echo "ihh = ${ihh}" # of other cycles. For this reason, check again for the existence of the # processed file. If it has already been created by another get_obs_ccpa # task, don't bother to recreate it. - if [[ -f "${ccpa_fp_proc}" ]]; then + if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" @@ -446,14 +446,14 @@ echo "ihh = ${ihh}" # grib2 files for REFC (composite reflectivity) and REFC (echo top) will # be located after this script is done, and the daily such subdirectory # for the current valid time (year, month, and day). We refer to these - # as the "processed" base and daily subdirectories because they contain + # as the "processed" base and daily subdirectories because they contain # the final files after all processing by this script is complete. mrms_basedir_proc=${OBS_DIR} mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" # Loop over the fields (REFC and RETOP). for field in ${VAR[@]}; do - + # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then file_base_name="MergedReflectivityQCComposite" @@ -465,7 +465,7 @@ echo "ihh = ${ihh}" echo "Invalid field: ${field}" print_err_msg_exit "\ Invalid field specified: ${field} - + Valid options are 'REFC', 'RETOP'. " fi @@ -476,7 +476,7 @@ echo "ihh = ${ihh}" # not the name of the gzipped grib2 files that may be retrieved below # from archive files using the retrieve_data.py script. mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - + # Full path to the processed MRMS grib2 file for the current field and # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" @@ -511,16 +511,15 @@ echo "ihh = ${ihh}" # an inefficiency in that get_obs_mrms tasks for different cycles will # not be able to detect that another cycle has already retrieved the data # for the current valid day will unnecessarily repeat the retrieval. - mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" + mrms_basedir_raw="${mrms_basedir_proc}/raw_day${vyyyymmdd}" mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" - - # Check if the raw daily directory already exists on disk. If so, it + # Check if the raw daily directory already exists on disk. If so, it # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP # and for all times (hours, minutes, and seconds) in the current valid # day -- have already been or are in the process of being retrieved from # the archive (tar) files. If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily + # proceed to retrieve all the files and place them in the raw daily # directory. if [[ -d "${mrms_day_dir_raw}" ]]; then @@ -537,7 +536,7 @@ echo "ihh = ${ihh}" # Before calling retrieve_data.py, change location to the raw base # directory to avoid get_obs_mrms tasks for other cycles from clobbering # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the MRMS tar files into the directory it was called from, + # extracts the MRMS tar files into the directory it was called from, # which is the working directory of this script right before retrieve_data.py # is called. cd ${mrms_basedir_raw} @@ -608,7 +607,7 @@ echo "ihh = ${ihh}" --valid_time ${valid_time} \ --outdir ${mrms_basedir_proc} \ --source ${mrms_basedir_raw} \ - --product ${file_base_name} + --product ${file_base_name} fi @@ -641,7 +640,7 @@ echo "ihh = ${ihh}" # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, + # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. # This means that every six hours we have two obs files valid for the same time: # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr @@ -652,7 +651,7 @@ echo "ihh = ${ihh}" # pull more HPSS tarballs than necessary if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. + # If at forecast hour zero, skip to next hour. current_fcst=$((${current_fcst} + 1)) continue fi @@ -793,7 +792,7 @@ echo "HELLO CCCCC" # If 24-hour files should be available (at 00z and 12z) then look for both files # Otherwise just look for 6hr file if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then + if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then retrieve=1 echo "${OBTYPE} files do not exist on disk:" echo "${nohrsc06h_file}" @@ -848,7 +847,7 @@ echo "HELLO CCCCC" print_err_msg_exit "\ Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC " - fi # Increment to next forecast hour + fi # Increment to next forecast hour # Increment to next forecast hour echo "Finished fcst hr=${current_fcst}" From 7f531871c2909e9aaa4b300561e59f67d5e8bc55 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 11:14:42 -0600 Subject: [PATCH 015/131] Version with NDAS changes that seems to work. Still need lots of cleanup and comments. --- scripts/exregional_get_verif_obs.sh | 42 +++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 4427434b1c..673e4edc76 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -622,14 +622,34 @@ echo "ihh = ${ihh}" #----------------------------------------------------------------------- # elif [[ ${OBTYPE} == "NDAS" ]]; then +# Fix these comments. + # Calculate valid date - 1 day; this is needed because some obs files + # are stored in the *previous* day's 00h directory + vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) + #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) + +echo "" +echo "HELLO PPPPPPP" +echo "vyyyymmdd = ${vyyyymmdd}" +echo "vhh = ${vhh}" +echo "vhh_noZero = ${vhh_noZero}" +#echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" +echo "vdate_m1h = ${vdate_m1h}" + # raw NDAS data from HPSS ndas_raw=${OBS_DIR}/raw # Reorganized NDAS location ndas_proc=${OBS_DIR} + # raw NDAS data from HPSS + #ndas_raw=${OBS_DIR}/raw + ndas_raw="${ndas_proc}/raw_cyc${iyyyymmddhh}" + # Check if file exists on disk - ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file_m1h="${ndas_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" + ndas_file="${ndas_proc}/prepbufr.ndas.${vdate_m1h}" if [[ -f "${ndas_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${ndas_file}" @@ -650,9 +670,14 @@ echo "ihh = ${ihh}" # The current logic of this script will likely stage more files than you need, but will never # pull more HPSS tarballs than necessary - if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. - current_fcst=$((${current_fcst} + 1)) +# + +# This seems like a strange statement since the only way it can be true +# is if the forecast length is zero. + # If at forecast hour zero, skip to next hour. + #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then + if [[ ${current_fcst} -eq 0 ]]; then + current_fcst=$((current_fcst + 1)) continue fi @@ -670,12 +695,16 @@ echo "vhh_noZero = ${vhh_noZero}" echo "" echo "HELLO BBBBB" - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then + #valid_time=${vyyyymmdd}${vhh} + #output_path="${ndas_raw}/${vyyyymmdd}" + + if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then echo "" echo "HELLO CCCCC" mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi + cd ${ndas_raw} # Pull NDAS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ @@ -704,6 +733,7 @@ echo "HELLO CCCCC" # copy files from the previous 6 hours ("tm" means "time minus") # The tm06 files contain more/better observations than tm00 for the equivalent time for tm in $(seq 1 6); do +# for tm in $(seq --format="%02g" 6 -1 1); do vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') @@ -715,6 +745,7 @@ echo "HELLO CCCCC" # If at last forecast hour, make sure we're getting the last observations if [[ ${current_fcst} -eq ${fcst_length} ]]; then + echo "Retrieving NDAS obs for final forecast hour" vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) if [[ ${vhh_noZero} -eq 24 ]]; then @@ -730,6 +761,7 @@ echo "HELLO CCCCC" mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi + cd ${ndas_raw} # Pull NDAS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ From 7926705a1c7a92f01958ed939d899118d0004d0c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 22:02:17 -0600 Subject: [PATCH 016/131] Second set of NDAS changes so that there are no repeat pulls of NDAS files from HPSS (and works with multiple cycles). --- scripts/exregional_get_verif_obs.sh | 297 +++++++++++++++++----------- 1 file changed, 185 insertions(+), 112 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 673e4edc76..033dd3c0fb 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -133,6 +133,16 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') + +if [[ ${OBTYPE} == "NDAS" ]]; then + vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) + vhh_last=$(echo ${vdate_last} | cut -c9-10) + hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + fcst_length_rounded_up=$(( fcst_length + hours_to_add )) +# vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) + fcst_length=${fcst_length_rounded_up} +fi + # Make sure fcst_length isn't octal (leading zero) fcst_length=$((10#${fcst_length})) @@ -145,19 +155,19 @@ echo "current_fcst = ${current_fcst}" # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") + #unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) vhh=$(echo ${vdate} | cut -c9-10) # Calculate valid date + 1 day; this is needed because some obs files # are stored in the *next* day's 00h directory - vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) + vdate_p1d=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) + vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) echo echo "HELLO HHHHHHHH" echo "vyyyymmdd = ${vyyyymmdd}" -echo "vyyyymmdd_p1 = ${vyyyymmdd_p1}" +echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" echo "ihh = ${ihh}" #exit @@ -353,9 +363,9 @@ echo "ihh = ${ihh}" ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - valid_time=${vyyyymmdd_p1}${vhh} + valid_time=${vyyyymmdd_p1d}${vhh} ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1d}" fi mkdir -p ${ccpa_day_dir_raw} @@ -622,40 +632,48 @@ echo "ihh = ${ihh}" #----------------------------------------------------------------------- # elif [[ ${OBTYPE} == "NDAS" ]]; then + # Fix these comments. # Calculate valid date - 1 day; this is needed because some obs files # are stored in the *previous* day's 00h directory vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) + vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour" +%Y%m%d%H) + vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) + vhh_p1h_noZero=$((10#${vhh_p1h})) + vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) + echo "" echo "HELLO PPPPPPP" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" #echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" +echo "vdate = ${vdate}" echo "vdate_m1h = ${vdate_m1h}" - - # raw NDAS data from HPSS - ndas_raw=${OBS_DIR}/raw +echo "vdate_p1h = ${vdate_m1h}" # Reorganized NDAS location - ndas_proc=${OBS_DIR} + ndas_basedir_proc=${OBS_DIR} + ndas_day_dir_proc="${ndas_basedir_proc}" # raw NDAS data from HPSS #ndas_raw=${OBS_DIR}/raw - ndas_raw="${ndas_proc}/raw_cyc${iyyyymmddhh}" + #ndas_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" # Check if file exists on disk - #ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" - #ndas_file_m1h="${ndas_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" - ndas_file="${ndas_proc}/prepbufr.ndas.${vdate_m1h}" - if [[ -f "${ndas_file}" ]]; then + #ndas_file="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file_m1h="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" + #ndas_fn_check="prepbufr.ndas.${vdate_p1h}" + ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" - echo "${ndas_file}" + echo "${ndas_fp_proc}" else echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_file}" + echo "${ndas_fp_proc}" echo "Will attempt to retrieve from remote locations" # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr @@ -670,16 +688,14 @@ echo "vdate_m1h = ${vdate_m1h}" # The current logic of this script will likely stage more files than you need, but will never # pull more HPSS tarballs than necessary -# - # This seems like a strange statement since the only way it can be true # is if the forecast length is zero. # If at forecast hour zero, skip to next hour. #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((current_fcst + 1)) - continue - fi + # if [[ ${current_fcst} -eq 0 ]]; then + # current_fcst=$((current_fcst + 1)) + # continue + # fi # Whether to move or copy extracted files from the raw directories to their # final locations. @@ -689,116 +705,173 @@ echo "vdate_m1h = ${vdate_m1h}" echo "" echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" +echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || \ - ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then + if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then echo "" echo "HELLO BBBBB" - #valid_time=${vyyyymmdd}${vhh} - #output_path="${ndas_raw}/${vyyyymmdd}" + #ndas_basedir_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" + #ndas_basedir_raw="${ndas_basedir_proc}/raw_qrtrday${vyyyymmdd_p1h}${vhh_p1h}" + ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" + ndas_day_dir_raw="${ndas_basedir_raw}/${vyyyymmdd_p1h}${vhh_p1h}" + #mkdir -p ${ndas_day_dir_raw} - if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then -echo "" -echo "HELLO CCCCC" - mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} - fi - cd ${ndas_raw} - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - if [[ ! -d "${ndas_proc}" ]]; then - mkdir -p ${ndas_proc} - fi + # Check if the raw daily directory already exists on disk. If so, it + # means +#all the gzipped NDAS grib2 files -- i.e. for both REFC and RETOP +# and for all times (hours, minutes, and seconds) in the current valid +# day -- have already been or are in the process of being retrieved from +# the archive (tar) files. +# If so, skip the retrieval process. If not, + # proceed to retrieve all the files and place them in the raw daily + # directory. + if [[ -d "${ndas_day_dir_raw}" ]]; then - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do -# for tm in $(seq --format="%02g" 6 -1 1); do - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') +# Fix up these messages. + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " ndas_day_dir_proc = \"${ndas_day_dir_proc}\"" + echo "This means NDAS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." - ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ - ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} - done + else - fi + mkdir -p ${ndas_day_dir_raw} + valid_time=${vyyyymmdd_p1h}${vhh_p1h} + +# Before calling retrieve_data.py, change location to the raw base +# directory to avoid get_obs_ndas tasks for other cycles from clobbering +# the output from this call to retrieve_data.py. Note that retrieve_data.py +# extracts the NDAS tar files into the directory it was called from, +# which is the working directory of this script right before retrieve_data.py +# is called. + cd ${ndas_basedir_raw} + +# Use the retrieve_data.py script to retrieve all the gzipped NDAS grib2 +# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, +# and seconds) in the current valid day -- and place them in the raw daily +# directory. Note that this will pull both the REFC and RETOP files in +# one call. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${vyyyymmdd_p1h}${vhh_p1h} \ + --data_stores hpss \ + --data_type NDAS_obs \ + --output_path ${ndas_day_dir_raw} \ + --summary_file ${logfile}" - # If at last forecast hour, make sure we're getting the last observations - if [[ ${current_fcst} -eq ${fcst_length} ]]; then - - echo "Retrieving NDAS obs for final forecast hour" - vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) - if [[ ${vhh_noZero} -eq 24 ]]; then - vyyyymmdd=${vyyyymmdd_p1} - vhh=00 - elif [[ ${vhh_noZero} -eq 6 ]]; then - vhh=06 - else - vhh=${vhh_noZero} - fi + echo "CALLING: ${cmd}" - if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then - mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} - fi + $cmd || print_err_msg_exit "\ + Could not retrieve NDAS data from HPSS - cd ${ndas_raw} - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} + The following command exited with a non-zero exit status: + ${cmd} " +# Create a flag file that can be used to confirm the completion of the +# retrieval of all files for the current valid day. + touch ${ndas_day_dir_raw}/pull_completed.txt - if [[ ! -d "${ndas_proc}" ]]; then - mkdir -p ${ndas_proc} fi - for tm in $(seq 1 6); do - last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) - unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - - ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ - ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} +# Make sure the retrieval process for the current day (which may have +# been executed above for this cycle or by another cycle) has completed +# by checking for the existence of the flag file that marks completion. +# If not, keep checking until the flag file shows up. + while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do + echo "Waiting for the retrieval process for valid quarter-day ending on ${vyyyymmdd_p1h}${vhh_p1h} to complete..." + sleep 5s done + if [[ -f "${ndas_fp_proc}" ]]; then + + echo "${OBTYPE} file exists on disk:" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + + else + + #mkdir -p ${ndas_basedir_proc} + + unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") + # copy files from the previous 6 hours ("tm" means "time minus") + # The tm06 files contain more/better observations than tm00 for the equivalent time + for tm in $(seq 1 6); do +# for tm in $(seq --format="%02g" 6 -1 1); do + vyyyymmddhh_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) + if [ ${vyyyymmddhh_p1h_tm} -le ${vdate_last} ]; then + tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') + ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ + ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_p1h_tm} + fi + done + + fi + fi + # If at last forecast hour, make sure we're getting the last observations +# if [[ ${current_fcst} -eq ${fcst_length} ]]; then +# +# echo "Retrieving NDAS obs for final forecast hour" +# vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) +# if [[ ${vhh_noZero} -eq 24 ]]; then +# vyyyymmdd=${vyyyymmdd_p1d} +# vhh=00 +# elif [[ ${vhh_noZero} -eq 6 ]]; then +# vhh=06 +# else +# vhh=${vhh_noZero} +# fi +# +# if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then +# mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} +# fi +# +# cd ${ndas_raw} +# # Pull NDAS data from HPSS +# cmd=" +# python3 -u ${USHdir}/retrieve_data.py \ +# --debug \ +# --file_set obs \ +# --config ${PARMdir}/data_locations.yml \ +# --cycle_date ${vyyyymmdd}${vhh} \ +# --data_stores hpss \ +# --data_type NDAS_obs \ +# --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ +# --summary_file ${logfile}" +# +# echo "CALLING: ${cmd}" +# +# $cmd || print_err_msg_exit "\ +# Could not retrieve NDAS data from HPSS +# +# The following command exited with a non-zero exit status: +# ${cmd} +#" +# +# if [[ ! -d "${ndas_basedir_proc}" ]]; then +# mkdir -p ${ndas_basedir_proc} +# fi +# +# for tm in $(seq 1 6); do +# last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) +# unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") +# vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) +# tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') +# +# ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ +# ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_tm} +# done +# +# fi + fi # #----------------------------------------------------------------------- From f8c3ec67d41aca784b102498931f0eb1e6eda3d1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 22 Jul 2024 16:32:12 -0600 Subject: [PATCH 017/131] Clean up NDAS section in get_obs_... ex-script. --- scripts/exregional_get_verif_obs.sh | 240 +++++++++++----------------- 1 file changed, 94 insertions(+), 146 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 033dd3c0fb..93f17bfa60 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -583,7 +583,7 @@ echo "ihh = ${ihh}" fi # Make sure the retrieval process for the current day (which may have - # been executed above for this cycle or by another cycle) has completed + # been executed above for this cycle or for another cycle) has completed # by checking for the existence of the flag file that marks completion. # If not, keep checking until the flag file shows up. while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do @@ -633,48 +633,53 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "NDAS" ]]; then -# Fix these comments. - # Calculate valid date - 1 day; this is needed because some obs files - # are stored in the *previous* day's 00h directory - vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) - #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) - - vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour" +%Y%m%d%H) + # Calculate valid date plus 1 hour. This is needed because we need to + # check whether this date corresponds to one of the valid hours-of-day + # 00, 06, 12, and 18 on which the NDAS archives are provided. + unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") + vdate_p1h=$($DATE_UTIL -d "${unix_vdate_p1h}" +%Y%m%d%H) + vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) vhh_p1h_noZero=$((10#${vhh_p1h})) - vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) echo "" echo "HELLO PPPPPPP" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" -#echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" echo "vdate = ${vdate}" -echo "vdate_m1h = ${vdate_m1h}" -echo "vdate_p1h = ${vdate_m1h}" +echo "vdate_p1h = ${vdate_p1h}" - # Reorganized NDAS location + # Base directory in which the hourly NDAS prepbufr files will be located. + # We refer to this as the "processed" base directory because it contains + # the final files after all processing by this script is complete. ndas_basedir_proc=${OBS_DIR} - ndas_day_dir_proc="${ndas_basedir_proc}" - - # raw NDAS data from HPSS - #ndas_raw=${OBS_DIR}/raw - #ndas_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" - # Check if file exists on disk - #ndas_file="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" - #ndas_file_m1h="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" - #ndas_fn_check="prepbufr.ndas.${vdate_p1h}" + # Name of the NDAS prepbufr file for the current valid time that will + # appear in the processed daily subdirectory after this script finishes. + # This is the name of the processed file. Note that this is not the + # same as the name of the raw file, i.e. the file extracted from the + # archive (tar) file retrieved below by the retrieve_data.py script. ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + + # Full path to the processed NDAS prepbufr file for the current field and + # valid time. ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + + # Check if the processed NDAS prepbufr file for the current valid time + # already exists on disk. If so, skip this valid time and go to the next + # one. if [[ -f "${ndas_fp_proc}" ]]; then + echo "${OBTYPE} file exists on disk:" - echo "${ndas_fp_proc}" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + else + echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_fp_proc}" - echo "Will attempt to retrieve from remote locations" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr # @@ -685,18 +690,6 @@ echo "vdate_p1h = ${vdate_m1h}" # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even # though the earlier files are larger, this is because the time window is larger) - # The current logic of this script will likely stage more files than you need, but will never - # pull more HPSS tarballs than necessary - -# This seems like a strange statement since the only way it can be true -# is if the forecast length is zero. - # If at forecast hour zero, skip to next hour. - #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # if [[ ${current_fcst} -eq 0 ]]; then - # current_fcst=$((current_fcst + 1)) - # continue - # fi - # Whether to move or copy extracted files from the raw directories to their # final locations. #mv_or_cp="mv" @@ -707,60 +700,59 @@ echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" + # Due to the way NDAS archives are organized, we can only retrieve the + # archive (tar) file containing data for the current valid hour (and the + # 5 hours preceeding it) if the hour-of-day corresponding to the current + # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then -echo "" -echo "HELLO BBBBB" - #ndas_basedir_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" - #ndas_basedir_raw="${ndas_basedir_proc}/raw_qrtrday${vyyyymmdd_p1h}${vhh_p1h}" + # Base directory that will contain the 6-hourly subdirectories in which + # the NDAS prepbufr files retrieved from archive files will be placed, + # and the 6-hourly subdirectory for the current valid time plus 1 hour. + # We refer to these as the "raw" NDAS base and 6-hourly directories + # because they contain files as they are found in the archives before + # any processing by this script. ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vyyyymmdd_p1h}${vhh_p1h}" - #mkdir -p ${ndas_day_dir_raw} - - - - # Check if the raw daily directory already exists on disk. If so, it - # means -#all the gzipped NDAS grib2 files -- i.e. for both REFC and RETOP -# and for all times (hours, minutes, and seconds) in the current valid -# day -- have already been or are in the process of being retrieved from -# the archive (tar) files. -# If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily - # directory. + ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_p1h}" + + # Check if the raw 6-hourly directory already exists on disk. If so, it + # means the NDAS prepbufr files for the current valid hour and the 5 hours + # preceeding it have already been or are in the process of being retrieved + # from the archive (tar) files. If so, skip the retrieval process. If + # not, proceed to retrieve the archive file, extract the prepbufr files + # from it, and place them in the raw daily directory. if [[ -d "${ndas_day_dir_raw}" ]]; then -# Fix up these messages. - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " ndas_day_dir_proc = \"${ndas_day_dir_proc}\"" - echo "This means NDAS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." + print_info_msg " +${OBTYPE} raw directory for day ${vdate_p1h} exists on disk: + ndas_day_dir_raw = \"${ndas_day_dir_raw}\" +This means NDAS files for the current valid time (${vyyyymmdd}) and the +5 hours preceeding it have been or are being retrieved by a get_obs_ndas +workflow task for another cycle. Thus, we will NOT attempt to retrieve +NDAS data for the current valid time from remote locations." else mkdir -p ${ndas_day_dir_raw} - valid_time=${vyyyymmdd_p1h}${vhh_p1h} - -# Before calling retrieve_data.py, change location to the raw base -# directory to avoid get_obs_ndas tasks for other cycles from clobbering -# the output from this call to retrieve_data.py. Note that retrieve_data.py -# extracts the NDAS tar files into the directory it was called from, -# which is the working directory of this script right before retrieve_data.py -# is called. + + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_ndas tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the NDAS prepbufr files the archive into the directory it was + # called from, which is the working directory of this script right before + # retrieve_data.py is called. cd ${ndas_basedir_raw} -# Use the retrieve_data.py script to retrieve all the gzipped NDAS grib2 -# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, -# and seconds) in the current valid day -- and place them in the raw daily -# directory. Note that this will pull both the REFC and RETOP files in -# one call. + # Use the retrieve_data.py script to retrieve all the NDAS prepbufr files + # for the current valid hour and the 5 hours preceeding it and place them + # in the raw 6-hourly directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1h}${vhh_p1h} \ + --cycle_date ${vdate_p1h} \ --data_stores hpss \ --data_type NDAS_obs \ --output_path ${ndas_day_dir_raw} \ @@ -774,21 +766,32 @@ echo "HELLO BBBBB" The following command exited with a non-zero exit status: ${cmd} " -# Create a flag file that can be used to confirm the completion of the -# retrieval of all files for the current valid day. + + # Create a flag file that can be used to confirm the completion of the + # retrieval of all files for the 6-hour interval ending in vdate_p1h. touch ${ndas_day_dir_raw}/pull_completed.txt fi -# Make sure the retrieval process for the current day (which may have -# been executed above for this cycle or by another cycle) has completed -# by checking for the existence of the flag file that marks completion. -# If not, keep checking until the flag file shows up. + # Make sure the retrieval process for the 6-hour interval ending in + # vdate_p1h (which may have been executed above for this cycle or for + # another cycle) has completed by checking for the existence of the flag + # file that marks completion. If not, keep checking until the flag file + # shows up. while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for the retrieval process for valid quarter-day ending on ${vyyyymmdd_p1h}${vhh_p1h} to complete..." + echo "Waiting for completion of the NDAS obs retrieval process for the" + echo "6-hour interval ending on ${vdate_p1h} ..." sleep 5s done + # Since this script is part of a workflow, another get_obs_ndas task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_ndas + # task, don't bother to recreate it. if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -797,18 +800,19 @@ echo "HELLO BBBBB" else - #mkdir -p ${ndas_basedir_proc} - - unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do + # Create the processed NDAS prepbufr files for the current valid hour as + # well as the preceeding 5 hours (or fewer if they're outside the time + # interval of the forecast) by copying or moving (and in the process + # renaming) them from the raw 6-hourly directory. In the following loop, + # "tm" means "time minus". Note that the tm06 files contain more/better + # observations than tm00 for the equivalent time. + for tm in $(seq 6 -1 1); do # for tm in $(seq --format="%02g" 6 -1 1); do - vyyyymmddhh_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) - if [ ${vyyyymmddhh_p1h_tm} -le ${vdate_last} ]; then + vdate_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) + if [ ${vdate_p1h_tm} -le ${vdate_last} ]; then tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_p1h_tm} + ${ndas_basedir_proc}/prepbufr.ndas.${vdate_p1h_tm} fi done @@ -816,62 +820,6 @@ echo "HELLO BBBBB" fi - # If at last forecast hour, make sure we're getting the last observations -# if [[ ${current_fcst} -eq ${fcst_length} ]]; then -# -# echo "Retrieving NDAS obs for final forecast hour" -# vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) -# if [[ ${vhh_noZero} -eq 24 ]]; then -# vyyyymmdd=${vyyyymmdd_p1d} -# vhh=00 -# elif [[ ${vhh_noZero} -eq 6 ]]; then -# vhh=06 -# else -# vhh=${vhh_noZero} -# fi -# -# if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then -# mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} -# fi -# -# cd ${ndas_raw} -# # Pull NDAS data from HPSS -# cmd=" -# python3 -u ${USHdir}/retrieve_data.py \ -# --debug \ -# --file_set obs \ -# --config ${PARMdir}/data_locations.yml \ -# --cycle_date ${vyyyymmdd}${vhh} \ -# --data_stores hpss \ -# --data_type NDAS_obs \ -# --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ -# --summary_file ${logfile}" -# -# echo "CALLING: ${cmd}" -# -# $cmd || print_err_msg_exit "\ -# Could not retrieve NDAS data from HPSS -# -# The following command exited with a non-zero exit status: -# ${cmd} -#" -# -# if [[ ! -d "${ndas_basedir_proc}" ]]; then -# mkdir -p ${ndas_basedir_proc} -# fi -# -# for tm in $(seq 1 6); do -# last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) -# unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") -# vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) -# tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') -# -# ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ -# ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_tm} -# done -# -# fi - fi # #----------------------------------------------------------------------- From bc276fe188aa516d3f365f0b9fa5648da2cbc0ed Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 22 Jul 2024 19:26:53 -0600 Subject: [PATCH 018/131] Add debugging statement to clarify the current working directory where cleanup is happening. --- ush/retrieve_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 5acf9d5ce9..5b4320ccb6 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -51,6 +51,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path unavailable = {} expand_source_paths = [] logging.debug(f"Cleaning up local paths: {source_paths}") + logging.debug(f"Looking for these local paths under directory: {os.getcwd()}") for p in source_paths: expand_source_paths.extend(glob.glob(p.lstrip("/"))) From fe50a5dca742ecb248bcd78a265c7a79a39ddf95 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 11:32:52 -0600 Subject: [PATCH 019/131] Add code to cause the script to wait until all the (processed) obs files, that are expected to be created once the task is finished actually get created. This is needed because it is possible that for some forecast hours for which there is overlap between cycles, the files are being retrieved and processed by the get_obs_... task for another cycle. --- scripts/exregional_get_verif_obs.sh | 50 ++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 93f17bfa60..b615f05ffa 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -146,6 +146,7 @@ fi # Make sure fcst_length isn't octal (leading zero) fcst_length=$((10#${fcst_length})) +processed_fp_list=() current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do @@ -216,6 +217,9 @@ echo "ihh = ${ihh}" # hour-of-day in the name of the file. ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${ccpa_fp_proc}) + # Check if the CCPA grib2 file for the current valid time already exists # at its procedded location on disk. If so, skip and go to the next valid # time. If not, pull it. @@ -491,6 +495,9 @@ echo "ihh = ${ihh}" # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${mrms_fp_proc}) + # Check if the processed MRMS grib2 file for the current field and valid # time already exists on disk. If so, skip this valid time and go to the # next one. If not, pull it. @@ -666,6 +673,9 @@ echo "vdate_p1h = ${vdate_p1h}" # valid time. ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${ndas_fp_proc}) + # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next # one. @@ -907,11 +917,41 @@ NDAS data for the current valid time from remote locations." current_fcst=$((${current_fcst} + 1)) done - - -# Clean up raw, unprocessed observation files -#rm -rf ${OBS_DIR}/raw - +# +#----------------------------------------------------------------------- +# +# At this point, the processed data files for all output forecast hours +# for this cycle are either being created (by a get_obs_... task for +# another cycle) or have already been created (either by this get_obs_... +# task or one for another cycle). In case they are still being created, +# make sure they have in fact been created before exiting this script. +# If we don't do this, it is possible for this get_obs_... task to complete +# successfully but still have processed obs files for some forecast hours +# not yet created, which is undesirable. +# +#----------------------------------------------------------------------- +# +num_proc_files=${#processed_fp_list[@]} +for (( i=0; i<${num_proc_files}; i++ )); do + obs_fp="${processed_fp_list[$i]}" + while [[ ! -f "${obs_fp}" ]]; do + echo "Waiting for ${OBTYPE} file to be created on disk (by a get_obs_... workflow task for another cycle):" + echo " obs_fp = \"${obs_fp}\"" + sleep 5s + done +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +remove_raw="TRUE" +#remove_raw="FALSE" +if [ "${remove_raw}" = "TRUE" ]; then + rm -rf ${OBS_DIR}/raw_* +fi # #----------------------------------------------------------------------- # From dc4971dedfed12f45e1dd13411300ac3fe1ae53a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 11:45:51 -0600 Subject: [PATCH 020/131] Fix the workflow task dependencies and ex-script for the GenEnsProd and EnsembleStat tasks such that GenEnsProd does not depend on the completion of get_obs_... tasks (because it doesn't need observations) but only forecast output while EnsembleStat does. --- parm/wflow/verify_ens.yaml | 47 ++-- ...onal_run_met_genensprod_or_ensemblestat.sh | 64 +++--- ush/set_vx_fhr_list.sh | 206 +++++++++++++++--- 3 files changed, 231 insertions(+), 86 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 18b23a1eb0..4be4c5b47e 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -37,16 +37,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. - taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - metataskdep_pcpcombine_fcst: - attrs: - metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems + metataskdep_pcpcombine_fcst: + attrs: + metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -54,9 +47,13 @@ metatask_GenEnsProd_EnsembleStat_CCPA: METPLUSTOOLNAME: 'ENSEMBLESTAT' FCST_THRESH: 'none' dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs: + attrs: + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h metatask_GenEnsProd_EnsembleStat_NOHRSC: var: @@ -75,9 +72,6 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'all' dependency: and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. metataskdep_pcpcombine_fcst: attrs: metatask: PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems @@ -149,15 +143,8 @@ metatask_GenEnsProd_EnsembleStat_NDAS: FCST_THRESH: 'all' walltime: 02:30:00 dependency: - and: - # The Pb2nc task (which is run only for obs) must be complete because - # this GenEnsProd task checks to see the forecast hours for which obs - # are available before processing the forecast for those hours. - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs - metataskdep_check_post_output: - <<: *check_post_output + metataskdep_check_post_output: + <<: *check_post_output task_run_MET_EnsembleStat_vx_#VAR#: <<: *task_GenEnsProd_NDAS envars: @@ -165,9 +152,13 @@ metatask_GenEnsProd_EnsembleStat_NDAS: METPLUSTOOLNAME: 'ENSEMBLESTAT' walltime: 01:00:00 dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_pb2nc: + attrs: + task: run_MET_Pb2nc_obs + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# metatask_GridStat_CCPA_ensmeanprob_all_accums: var: diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 93caeaa7f2..5003047f4f 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -200,30 +200,40 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. -# -# Note that strictly speaking, this does not need to be done if the MET/ -# METplus tool being called is GenEnsProd (because this tool only operates -# on forecasts), but we run the check anyway in this case in order to -# keep the code here simpler and because the output of GenEnsProd for -# forecast hours with missing observations will not be used anyway in -# downstream verification tasks. -# -#----------------------------------------------------------------------- -# -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +# Generate the list of forecast hours for which to run the specified +# METplus tool. +# +# If running the GenEnsProd tool, we set this to the list of forecast +# output times without filtering for the existence of observation files +# corresponding to those times. This is because GenEnsProd operates +# only on forecasts; it does not need observations. +# +# On the other hand, if running the EnsembleStat tool, we set the list of +# forecast hours to a set of times that takes into consideration whether +# or not observations exist. We do this by starting with the full list +# of forecast times for which there is forecast output and then removing +# from that list any times for which there is no corresponding observations. +# +#----------------------------------------------------------------------- +# +if [ "${MetplusToolName}" = "GenEnsProd" ]; then + set_vx_fhr_list_no_missing \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + outvarname_fhr_list_no_missing="FHR_LIST" +elif [ "${MetplusToolName}" = "EnsembleStat" ]; then + set_vx_fhr_list \ + cdate="${CDATE}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + base_dir="${OBS_INPUT_DIR}" \ + fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + check_accum_contrib_files="FALSE" \ + num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + outvarname_fhr_list="FHR_LIST" +fi # #----------------------------------------------------------------------- # @@ -290,7 +300,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -# Load the yaml-like file containing the configuration for ensemble +# Load the yaml-like file containing the configuration for ensemble # verification. # #----------------------------------------------------------------------- @@ -368,7 +378,7 @@ settings="\ # # Verification configuration dictionary. # -'vx_config_dict': +'vx_config_dict': ${vx_config_dict:-} " @@ -380,7 +390,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh index 5cefc78365..8101e927e5 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_vx_fhr_list.sh @@ -1,14 +1,24 @@ # #----------------------------------------------------------------------- # -# This file defines a function that generates a list of forecast hours -# such that for each hour there exist a corresponding obs file. It does -# this by first generating a generic sequence of forecast hours and then -# removing from that sequence any hour for which there is no obs file. +# This file defines functions used to generate sets of forecast hours for +# which verification will be performed. # #----------------------------------------------------------------------- # -function set_vx_fhr_list() { + +function set_vx_fhr_list_no_missing() { +# +#----------------------------------------------------------------------- +# +# This function sets the forecast hours for which verification will be +# performed under the assumption that that the data file (which may be +# a forecast output file or an observation file) for each hour is available +# (i.e. that there are no missing files). +# +#----------------------------------------------------------------------- +# + # #----------------------------------------------------------------------- # @@ -48,15 +58,10 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # local valid_args=( \ - "cdate" \ "fcst_len_hrs" \ "field" \ "accum_hh" \ - "base_dir" \ - "fn_template" \ - "check_accum_contrib_files" \ - "num_missing_files_max" \ - "outvarname_fhr_list" \ + "outvarname_fhr_list_no_missing" \ ) process_args valid_args "$@" # @@ -76,27 +81,15 @@ function set_vx_fhr_list() { # #----------------------------------------------------------------------- # - local crnt_tmpl \ - crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_int \ + local fhr_array \ fhr_list \ + fhr_int \ fhr_min \ - fhr_max \ - fn \ - fp \ - i \ - num_fcst_hrs \ - num_missing_files \ - regex_search_tmpl \ - remainder \ - skip_this_fhr + fhr_max # #----------------------------------------------------------------------- # -# Create array containing set of forecast hours for which we will check -# for the existence of corresponding observation or forecast file. +# Create the array of forecast hours. # #----------------------------------------------------------------------- # @@ -140,6 +133,157 @@ this field (field): fhr_max="${fcst_len_hrs}" fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) + + # Express the forecast hour array as a (scalar) string containing a comma + # (and space) separated list of the elements of fhr_array. + fhr_list=$( printf "%s, " "${fhr_array[@]}" ) + fhr_list=$( echo "${fhr_list}" | $SED "s/, $//g" ) + + print_info_msg "$VERBOSE" "\ +Initial (i.e. before filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: + fhr_list = \"${fhr_list}\" +" +# +#----------------------------------------------------------------------- +# +# Set output variables. +# +#----------------------------------------------------------------------- +# + if [ ! -z "${outvarname_fhr_list_no_missing}" ]; then + printf -v ${outvarname_fhr_list_no_missing} "%s" "${fhr_list}" + fi +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + + + +# +#----------------------------------------------------------------------- +# +# This function generates a list of forecast hours such that for each +# such hour, there exists a corresponding data file with a name of the +# form specified by the template fn_template. Depending on fn_template, +# this file may contain forecast or observation data. This function +# generates this forecast hour list by first generating a set of hours +# under the assumption that there is a corresponding data file for each +# hour and then removing from that list any hour for which there is no +# data file. +# +#----------------------------------------------------------------------- +# +function set_vx_fhr_list() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; set -u +x; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Specify the set of valid argument names for this script/function. Then +# process the arguments provided to this script/function (which should +# consist of a set of name-value pairs of the form arg1="value1", etc). +# +#----------------------------------------------------------------------- +# + local valid_args=( \ + "cdate" \ + "fcst_len_hrs" \ + "field" \ + "accum_hh" \ + "base_dir" \ + "fn_template" \ + "check_accum_contrib_files" \ + "num_missing_files_max" \ + "outvarname_fhr_list" \ + ) + process_args valid_args "$@" +# +#----------------------------------------------------------------------- +# +# For debugging purposes, print out values of arguments passed to this +# script. Note that these will be printed out only if VERBOSE is set to +# TRUE. +# +#----------------------------------------------------------------------- +# + print_input_args valid_args +# +#----------------------------------------------------------------------- +# +# Declare local variables. +# +#----------------------------------------------------------------------- +# + local crnt_tmpl \ + crnt_tmpl_esc \ + fhr \ + fhr_array \ + fhr_list \ + fn \ + fp \ + i \ + num_fcst_hrs \ + num_missing_files \ + regex_search_tmpl \ + remainder \ + skip_this_fhr +# +#----------------------------------------------------------------------- +# +# For the specified field, generate the set of forecast hours at which +# verification will be performed under the assumption that for each such +# hour, the corresponding forecast and/or observation files exists. Thus, +# this set of forecast hours is an initial guess for the hours at which +# vx will be performed. +# +#----------------------------------------------------------------------- +# + set_vx_fhr_list_no_missing \ + fcst_len_hrs="${fcst_len_hrs}" \ + field="${field}" \ + accum_hh="${accum_hh}" \ + outvarname_fhr_list_no_missing="fhr_list_no_missing" + + # For convenience, save the scalar variable fhr_list_no_missing to a bash + # array. + fhr_array=$( printf "%s" "${fhr_list_no_missing}" | $SED "s/,//g" ) + fhr_array=( ${fhr_array} ) + print_info_msg "$VERBOSE" "\ Initial (i.e. before filtering for missing files) set of forecast hours is: @@ -174,7 +318,7 @@ is: skip_this_fhr="FALSE" for (( j=0; j<${num_back_hrs}; j++ )); do # -# Use the provided template to set the name of/relative path to the file +# Use the provided template to set the name of/relative path to the file # Note that the while-loop below is over all METplus time string templates # of the form {...} in the template fn_template; it continues until all # such templates have been evaluated to actual time strings. @@ -195,7 +339,7 @@ is: # # Replace METplus time templates in fn with actual times. Note that # when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in +# closing and opening curly braces, etc) in the METplus template in # order for the sed command below to work properly. # crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ @@ -253,8 +397,8 @@ METplus configuration file. # fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours is -(written as a single string): +Final (i.e. after filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: fhr_list = \"${fhr_list}\" " # From 13aba39e140f116b9a553229bc975247e2282ec0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 15:54:28 -0600 Subject: [PATCH 021/131] Bug fixes after running WE2E vx suite. --- parm/wflow/verify_ens.yaml | 10 +++++++--- scripts/exregional_get_verif_obs.sh | 29 ++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 4be4c5b47e..9f1079b505 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -248,6 +248,10 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_LEVEL: 'all' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_pb2nc: + attrs: + task: run_MET_Pb2nc_obs + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index b615f05ffa..72be333b82 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -134,10 +134,11 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') +vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) if [[ ${OBTYPE} == "NDAS" ]]; then - vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) vhh_last=$(echo ${vdate_last} | cut -c9-10) - hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + hours_to_add=$(( 6 - (vhh_last % 6) )) fcst_length_rounded_up=$(( fcst_length + hours_to_add )) # vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) fcst_length=${fcst_length_rounded_up} @@ -218,7 +219,9 @@ echo "ihh = ${ihh}" ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${ccpa_fp_proc}) + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then + processed_fp_list+=(${ccpa_fp_proc}) + fi # Check if the CCPA grib2 file for the current valid time already exists # at its procedded location on disk. If so, skip and go to the next valid @@ -496,7 +499,9 @@ echo "ihh = ${ihh}" mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${mrms_fp_proc}) + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then + processed_fp_list+=(${mrms_fp_proc}) + fi # Check if the processed MRMS grib2 file for the current field and valid # time already exists on disk. If so, skip this valid time and go to the @@ -674,7 +679,13 @@ echo "vdate_p1h = ${vdate_p1h}" ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${ndas_fp_proc}) +echo +echo "LLLLLLLLLLLLL" + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then +echo "MMMMMMMMMMMMM" +echo "processed_fp_list = |${processed_fp_list[@]}" + processed_fp_list+=(${ndas_fp_proc}) + fi # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next @@ -931,7 +942,11 @@ done # #----------------------------------------------------------------------- # +echo +echo "KKKKKKKKKKKK" +echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} +echo "num_proc_files = ${num_proc_files}" for (( i=0; i<${num_proc_files}; i++ )); do obs_fp="${processed_fp_list[$i]}" while [[ ! -f "${obs_fp}" ]]; do @@ -947,8 +962,8 @@ done # #----------------------------------------------------------------------- # -remove_raw="TRUE" -#remove_raw="FALSE" +#remove_raw="TRUE" +remove_raw="FALSE" if [ "${remove_raw}" = "TRUE" ]; then rm -rf ${OBS_DIR}/raw_* fi From 860f62e3aee3dd71d3eb23765e9fd846b61b5444 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 15:55:20 -0600 Subject: [PATCH 022/131] Bugfix to dependencies of ensemble vx tasks that come after GenEnsProd due to changes to dependencies of GenEnsProd tasks in previous commit(s). --- parm/wflow/verify_ens.yaml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 9f1079b505..4d01281b6d 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -180,9 +180,13 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs: + attrs: + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h metatask_GridStat_NOHRSC_ensmeanprob_all_accums: var: @@ -204,9 +208,13 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h + and: + taskdep: + attrs: + task: get_obs_nohrsc + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h metatask_GridStat_MRMS_ensprob: var: @@ -224,9 +232,13 @@ metatask_GridStat_MRMS_ensprob: FCST_LEVEL: 'L0' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_get_obs_mrms: + attrs: + task: get_obs_mrms + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# metatask_PointStat_NDAS_ensmeanprob: var: From e54ec16d6e7f12cc095e053d00388081dc7ffe60 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 26 Jul 2024 00:11:17 -0600 Subject: [PATCH 023/131] Bug fixes to get all WE2E vx tests to succeed. --- scripts/exregional_get_verif_obs.sh | 73 +++++++++++++++++++---------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 72be333b82..f3a52710c5 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -124,7 +124,7 @@ idd=$(echo ${PDY} | cut -c7-8) ihh=${cyc} echo -echo "HELLO GGGGGGGG" +echo "HELLO AAAAAAAAAAA" iyyyymmddhh=${PDY}${cyc} echo "iyyyymmddhh = ${iyyyymmddhh}" @@ -134,14 +134,17 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') +echo +echo "BYE 00000000" vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) if [[ ${OBTYPE} == "NDAS" ]]; then +echo "BYE 111111111" vhh_last=$(echo ${vdate_last} | cut -c9-10) #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) hours_to_add=$(( 6 - (vhh_last % 6) )) fcst_length_rounded_up=$(( fcst_length + hours_to_add )) # vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) - fcst_length=${fcst_length_rounded_up} +# fcst_length=${fcst_length_rounded_up} fi # Make sure fcst_length isn't octal (leading zero) @@ -152,14 +155,17 @@ current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do echo -echo "HELLO GGGGGGGG" +echo "HELLO BBBBBBBBBBB" echo "current_fcst = ${current_fcst}" # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - #unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") + unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) vhh=$(echo ${vdate} | cut -c9-10) +echo +echo "BYE 222222222" +echo "vhh = ${vhh}" # Calculate valid date + 1 day; this is needed because some obs files # are stored in the *next* day's 00h directory @@ -167,11 +173,10 @@ echo "current_fcst = ${current_fcst}" vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) echo -echo "HELLO HHHHHHHH" +echo "HELLO CCCCCCCCCC" echo "vyyyymmdd = ${vyyyymmdd}" echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" echo "ihh = ${ihh}" -#exit #remove leading zero again, this time keep original vhh_noZero=$((10#${vhh})) @@ -655,7 +660,8 @@ echo "ihh = ${ihh}" vhh_p1h_noZero=$((10#${vhh_p1h})) echo "" -echo "HELLO PPPPPPP" +echo "HELLO DDDDDDDDDDD" +echo "vdate = ${vdate}" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" @@ -672,7 +678,7 @@ echo "vdate_p1h = ${vdate_p1h}" # This is the name of the processed file. Note that this is not the # same as the name of the raw file, i.e. the file extracted from the # archive (tar) file retrieved below by the retrieve_data.py script. - ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_fn="prepbufr.ndas.${vdate}" # Full path to the processed NDAS prepbufr file for the current field and # valid time. @@ -680,9 +686,9 @@ echo "vdate_p1h = ${vdate_p1h}" # Store the full path to the processed file in a list for later use. echo -echo "LLLLLLLLLLLLL" - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then -echo "MMMMMMMMMMMMM" +echo "EEEEEEEEEEEEEE" + if [ ${vdate} -le ${vdate_last} ]; then +echo "FFFFFFFFFFFFFF" echo "processed_fp_list = |${processed_fp_list[@]}" processed_fp_list+=(${ndas_fp_proc}) fi @@ -717,7 +723,7 @@ echo "processed_fp_list = |${processed_fp_list[@]}" mv_or_cp="cp" echo "" -echo "HELLO AAAAA" +echo "HELLO GGGGGGGGGGGGG" echo "vhh_noZero = ${vhh_noZero}" echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" @@ -726,7 +732,22 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # 5 hours preceeding it) if the hour-of-day corresponding to the current # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 || \ + ${current_fcst} -eq ${fcst_length} ]]; then + + if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then + unix_vdate_archive="${unix_vdate_p1h}" + vdate_archive="${vdate_p1h}" + vyyyymmdd_archive="${vyyyymmdd_p1h}" + vhh_archive=${vhh_p1h} + elif [[ ${current_fcst} -eq ${fcst_length} ]]; then + hours_to_archive=$(( 6 - (vhh % 6) )) + unix_vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" "+%Y-%m-%d %H:00:00") + vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" +%Y%m%d%H) + vyyyymmdd_archive=$(echo ${vdate_archive} | cut -c1-8) + vhh_archive=$(echo ${vdate_archive} | cut -c9-10) + fi # Base directory that will contain the 6-hourly subdirectories in which # the NDAS prepbufr files retrieved from archive files will be placed, @@ -734,8 +755,8 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # We refer to these as the "raw" NDAS base and 6-hourly directories # because they contain files as they are found in the archives before # any processing by this script. - ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_p1h}" + ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_archive}" + ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_archive}" # Check if the raw 6-hourly directory already exists on disk. If so, it # means the NDAS prepbufr files for the current valid hour and the 5 hours @@ -746,9 +767,9 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" if [[ -d "${ndas_day_dir_raw}" ]]; then print_info_msg " -${OBTYPE} raw directory for day ${vdate_p1h} exists on disk: +${OBTYPE} raw 6-hourly directory ${vdate_archive} exists on disk: ndas_day_dir_raw = \"${ndas_day_dir_raw}\" -This means NDAS files for the current valid time (${vyyyymmdd}) and the +This means NDAS files for the current valid time (${vdate}) and the 5 hours preceeding it have been or are being retrieved by a get_obs_ndas workflow task for another cycle. Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." @@ -773,7 +794,7 @@ NDAS data for the current valid time from remote locations." --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vdate_p1h} \ + --cycle_date ${vdate_archive} \ --data_stores hpss \ --data_type NDAS_obs \ --output_path ${ndas_day_dir_raw} \ @@ -789,19 +810,19 @@ NDAS data for the current valid time from remote locations." " # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the 6-hour interval ending in vdate_p1h. + # retrieval of all files for the 6-hour interval ending in vdate_archive. touch ${ndas_day_dir_raw}/pull_completed.txt fi # Make sure the retrieval process for the 6-hour interval ending in - # vdate_p1h (which may have been executed above for this cycle or for + # vdate_archive (which may have been executed above for this cycle or for # another cycle) has completed by checking for the existence of the flag # file that marks completion. If not, keep checking until the flag file # shows up. while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do echo "Waiting for completion of the NDAS obs retrieval process for the" - echo "6-hour interval ending on ${vdate_p1h} ..." + echo "6-hour interval ending on ${vdate_archive} ..." sleep 5s done @@ -829,11 +850,11 @@ NDAS data for the current valid time from remote locations." # observations than tm00 for the equivalent time. for tm in $(seq 6 -1 1); do # for tm in $(seq --format="%02g" 6 -1 1); do - vdate_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) - if [ ${vdate_p1h_tm} -le ${vdate_last} ]; then + vdate_archive_tm=$($DATE_UTIL -d "${unix_vdate_archive} ${tm} hours ago" +%Y%m%d%H) + if [[ ${vdate_archive_tm} -le ${vdate_last} ]]; then tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vdate_p1h_tm} + ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_archive}z.prepbufr.tm${tm2}.nr \ + ${ndas_basedir_proc}/prepbufr.ndas.${vdate_archive_tm} fi done @@ -943,7 +964,7 @@ done #----------------------------------------------------------------------- # echo -echo "KKKKKKKKKKKK" +echo "HHHHHHHHHHHHHHHH" echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} echo "num_proc_files = ${num_proc_files}" From 8e8a1c10defb814f153a5df88acff5ddfda098e9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 09:36:19 -0600 Subject: [PATCH 024/131] Increase default wallclock time for get_obs_ccpa tasks since they're tending to time out for 48-hour forecasts. --- parm/wflow/verify_pre.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index da43336a0d..c99bd4b4e6 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -33,7 +33,7 @@ task_get_obs_ccpa: native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" - walltime: 00:45:00 + walltime: 02:00:00 task_get_obs_nohrsc: <<: *default_task_verify_pre From a34d99339e67cba2bd9abf474dd1543e77e1433f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 09:39:34 -0600 Subject: [PATCH 025/131] For each cycle except the last one, in the PcpCombine_obs tasks make sure PcpCombine operates only on those hours unique to the cycle, i.e. for those times starting from the initial time of the cycle to just before the initial time of the next cycle. For the PcpCombine_obs task for the last cycle, allow it to operate on all hours of that cycle's forecast. This ensures that the PcpCombine tasks for the various cycles do not clobber each other's output. Accordingly, change the dependencies of downstream tasks that depend on PcpCombine obs output to make sure they include all PcpCombine_obs tasks that cover the forecast period of the that downstream task's cycle. --- parm/wflow/verify_det.yaml | 39 +++++++++++++++++++++--- parm/wflow/verify_ens.yaml | 36 +++++++++++++++++++--- scripts/exregional_run_met_pcpcombine.sh | 21 ++++++++++++- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 3acfa3e836..f69429bbe4 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -47,12 +47,41 @@ metatask_GridStat_CCPA_all_accums_all_mems: walltime: 02:00:00 dependency: and: - taskdep_pcpcombine_obs: + # The following will include dependencies on the PcpCombine_obs task for + # the current cycle as well as those from other cycles that process CCPA + # obs at valid times that are part of the current cycle's forecast. This + # dependence is necessary because each PcpCombine_obs task except the + # last one processes obs at valid times starting with the initial time + # of the current cycle's forecast and ending with the last output time + # of this forecast that is before the initial time of the next cycle's + # forecast. It will also include a dependency on the PcpCombine_fcst + # task for the current cycle. + taskdep_pcpcombine_obs_fcst: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - taskdep_pcpcombine_fcst: - attrs: - task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# + task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} + {%- set num_cycl_dep = num_cycl_dep %} + {%- for n in range(0, num_cycl_dep) %} + {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} + {%- if n == 0 %} + {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} + {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {%- elif (n == num_cycl_dep-1) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n" }} + {%- endif %} + {%- endfor %} + {{- " \n" % cycl_offset }} + {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {%- elif (n == num_cycl_dep-1) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n" }} + {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -182,8 +211,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: dependency: and: taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + <<: &taskdep_pcpcombine_obs taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 6e64d102e6..20ae1a9794 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -212,9 +212,28 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi +# If processing obs, then for all cylces except the last one, calculate +# a "forecast length" that will hours up to but not including the initial +# (zeroth) hour of the next cycle. For the last cycle, take the "forecast +# length" of the obs to be the same as that of the forecast for the cycle. +# This ensures that the PcpCombine_obs tasks for different cycles do not +# overwrite or clobber output from another cycle (because with this +# approach, the valid times on which the current PcpCombine_obs task is +# operating is distinct from the ones for the PcpCombine_obs tasks for +# every other cycle). +fcst_len_hrs="${FCST_LEN_HRS}" +if [ "${FCST_OR_OBS}" = "OBS" ]; then + yyyymmddhhmn="${PDY}${cyc}00" + if [ ${yyyymmddhhmn} -lt ${DATE_LAST_CYCL} ] && \ + [ ${FCST_LEN_HRS} -ge ${INCR_CYCL_FREQ} ]; then + output_incr_hrs="1" + fcst_len_hrs=$((INCR_CYCL_FREQ - output_incr_hrs + 1)) + fi +fi + set_vx_fhr_list \ cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ + fcst_len_hrs="${fcst_len_hrs}" \ field="$VAR" \ accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ From 5550a41a1b31e104db3c39f02f95aa3edb8361ae Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 10:16:43 -0600 Subject: [PATCH 026/131] Bug fix in yaml. --- parm/wflow/verify_ens.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 995f362926..2d62b803b8 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -211,7 +211,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: dependency: and: taskdep_pcpcombine_obs: - <<: &taskdep_pcpcombine_obs + <<: *taskdep_pcpcombine_obs taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h From c76ed1afdc3b9e18c59c36cf8567588f3e25fa1f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 29 Jul 2024 11:50:16 -0600 Subject: [PATCH 027/131] Fix still-existing problem of file clobbering with get_obs_mrms and possibly also get_obs_ndas by putting in sleep commands. --- scripts/exregional_get_verif_obs.sh | 36 +++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index f3a52710c5..a2759f7ef5 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -548,10 +548,21 @@ echo "ihh = ${ihh}" # the archive (tar) files. If so, skip the retrieval process. If not, # proceed to retrieve all the files and place them in the raw daily # directory. + # + # Note that despite the check on the existence of the raw daily directory + # below, it is possible for two get_obs_mrms tasks to try to retrieve + # obs for the same day. To minimize this possibility, sleep for a random + # number of seconds (with a maximum wait of maxwait seconds set below) + # before performing the directory existence check + maxwait=30 + sleep_duration_secs=$((RANDOM % maxwait)) + echo "Sleeping for $duration seconds..." + sleep "${sleep_duration_secs}s" + if [[ -d "${mrms_day_dir_raw}" ]]; then - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" + echo "${OBTYPE} raw daily directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir_raw = \"${mrms_day_dir_raw}\"" echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." @@ -650,7 +661,7 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "NDAS" ]]; then - # Calculate valid date plus 1 hour. This is needed because we need to + # Calculate valid date plus 1 hour. This is needed because we need to # check whether this date corresponds to one of the valid hours-of-day # 00, 06, 12, and 18 on which the NDAS archives are provided. unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") @@ -675,7 +686,7 @@ echo "vdate_p1h = ${vdate_p1h}" # Name of the NDAS prepbufr file for the current valid time that will # appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is not the + # This is the name of the processed file. Note that this is not the # same as the name of the raw file, i.e. the file extracted from the # archive (tar) file retrieved below by the retrieve_data.py script. ndas_fn="prepbufr.ndas.${vdate}" @@ -695,7 +706,7 @@ echo "processed_fp_list = |${processed_fp_list[@]}" # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next - # one. + # one. if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -764,6 +775,17 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # from the archive (tar) files. If so, skip the retrieval process. If # not, proceed to retrieve the archive file, extract the prepbufr files # from it, and place them in the raw daily directory. + # + # Note that despite the check on the existence of the raw daily directory + # below, it is possible for two get_obs_mrms tasks to try to retrieve + # obs for the same day. To minimize this possibility, sleep for a random + # number of seconds (with a maximum wait of maxwait seconds set below) + # before performing the directory existence check + maxwait=30 + sleep_duration_secs=$((RANDOM % maxwait)) + echo "Sleeping for $duration seconds..." + sleep "${sleep_duration_secs}s" + if [[ -d "${ndas_day_dir_raw}" ]]; then print_info_msg " @@ -949,6 +971,7 @@ NDAS data for the current valid time from remote locations." current_fcst=$((${current_fcst} + 1)) done +echo "SSSSSSSSSSSSSSSS" # #----------------------------------------------------------------------- # @@ -956,14 +979,13 @@ done # for this cycle are either being created (by a get_obs_... task for # another cycle) or have already been created (either by this get_obs_... # task or one for another cycle). In case they are still being created, -# make sure they have in fact been created before exiting this script. +# make sure they have in fact been created before exiting this script. # If we don't do this, it is possible for this get_obs_... task to complete # successfully but still have processed obs files for some forecast hours # not yet created, which is undesirable. # #----------------------------------------------------------------------- # -echo echo "HHHHHHHHHHHHHHHH" echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} From 3f1dea1ebd2f4755bf268a78984c5125ec3476af Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 29 Jul 2024 11:51:57 -0600 Subject: [PATCH 028/131] Improvements to jinja2 code to put in dependencies from other cycles. --- parm/wflow/verify_det.yaml | 27 ++++++++++----------------- parm/wflow/verify_ens.yaml | 19 ++++++------------- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index f69429bbe4..35358c9b67 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -54,9 +54,8 @@ metatask_GridStat_CCPA_all_accums_all_mems: # last one processes obs at valid times starting with the initial time # of the current cycle's forecast and ending with the last output time # of this forecast that is before the initial time of the next cycle's - # forecast. It will also include a dependency on the PcpCombine_fcst - # task for the current cycle. - taskdep_pcpcombine_obs_fcst: + # forecast. + taskdep_pcpcombine_obs: attrs: task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} {%- set num_cycl_dep = num_cycl_dep %} @@ -64,24 +63,18 @@ metatask_GridStat_CCPA_all_accums_all_mems: {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} {%- if n == 0 %} {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {%- elif (n == num_cycl_dep-1) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset }} - {{- " \n"}} + {%- else %} + {{- " \n" }} + {{- " \n" % cycl_offset }} {{- " \n" % cycl_offset }} {{- " \n" }} {%- endif %} {%- endfor %} - {{- " \n" }} + {{- " \n" % cycl_offset }} - {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {%- elif (n == num_cycl_dep-1) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset }} - {{- " \n"}} + {%- else %} + {{- " \n" }} + {{- " \n" % cycl_offset }} {{- " \n" % cycl_offset }} {{- " \n" }} {%- endif %} - {%- endfor %}' + {%- endfor %} + {{- " \n" }} + {{- " Date: Tue, 30 Jul 2024 17:47:56 -0600 Subject: [PATCH 029/131] Bug fix. --- scripts/exregional_get_verif_obs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a2759f7ef5..314273ba93 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -556,7 +556,7 @@ echo "ihh = ${ihh}" # before performing the directory existence check maxwait=30 sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for $duration seconds..." + echo "Sleeping for ${sleep_duration_secs} seconds..." sleep "${sleep_duration_secs}s" if [[ -d "${mrms_day_dir_raw}" ]]; then @@ -783,7 +783,7 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # before performing the directory existence check maxwait=30 sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for $duration seconds..." + echo "Sleeping for ${sleep_duration_secs} seconds..." sleep "${sleep_duration_secs}s" if [[ -d "${ndas_day_dir_raw}" ]]; then From f9af954a305debedfd4305f62e8b10902dbb95e5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 15 Aug 2024 10:39:29 -0600 Subject: [PATCH 030/131] Minor typo fix. --- jobs/JREGIONAL_GET_VERIF_OBS | 1 - 1 file changed, 1 deletion(-) diff --git a/jobs/JREGIONAL_GET_VERIF_OBS b/jobs/JREGIONAL_GET_VERIF_OBS index 3820a739db..65377ddde2 100755 --- a/jobs/JREGIONAL_GET_VERIF_OBS +++ b/jobs/JREGIONAL_GET_VERIF_OBS @@ -55,7 +55,6 @@ This is the J-job script for the task that checks, pulls, and stages observation data for verification purposes. ========================================================================" -# # #----------------------------------------------------------------------- # From f81cd1cab8922930b2d908bb6c141af8cdf10318 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 15 Aug 2024 10:49:21 -0600 Subject: [PATCH 031/131] Add workflow configuration options for whether or not to remove raw obs files. --- parm/wflow/verify_pre.yaml | 4 ++++ ush/config_defaults.yaml | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c99bd4b4e6..2357c6bc5e 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -29,6 +29,7 @@ task_get_obs_ccpa: ACCUM_HH: '01' OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_CCPA }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -42,6 +43,7 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NOHRSC }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -56,6 +58,7 @@ task_get_obs_mrms: OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' VAR: 'REFC RETOP' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_MRMS }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -69,6 +72,7 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NDAS }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index c9c0fc7cb8..ceccd71277 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -324,6 +324,24 @@ platform: # #----------------------------------------------------------------------- # + # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # Boolean flag specifying whether to remove the "raw" observation + # directories after pulling the specified type of obs (CCPA, MRMS, + # NDAS, or NOHRSC). The raw directories are the ones in which the + # observation files are placed immediately after pulling them from + # a data store (e.g. NOAA's HPSS) but before performing any processing + # on them (e.g. renaming the files or reorganizing their directory + # structure). + # + #----------------------------------------------------------------------- + # + REMOVE_RAW_OBS_DIRS_CCPA: true + REMOVE_RAW_OBS_DIRS_MRMS: true + REMOVE_RAW_OBS_DIRS_NDAS: true + REMOVE_RAW_OBS_DIRS_NOHRSC: true + # + #----------------------------------------------------------------------- + # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, From 01e87b8b2f22e21e55c1131c1d11040860a3c8ce Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 16 Aug 2024 11:41:50 -0600 Subject: [PATCH 032/131] Make the names of the deterministic and ensemble vx configuration files user-specifiable. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 4 +--- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 4 +--- .../exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 4 +--- .../exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 +--- ush/config_defaults.yaml | 7 +++++++ 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 5003047f4f..5034369851 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -305,9 +305,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 91c5a7896b..e54dd7b553 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -292,9 +292,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="det" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_DET_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 6e4a4ff33f..f08c002d5f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -250,9 +250,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 924d321ec3..5952ed3785 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -249,9 +249,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index ceccd71277..e564444b49 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2555,6 +2555,13 @@ verification: # be run. # NUM_MISSING_FCST_FILES_MAX: 0 + # + # Names of configuration files for deterministic and ensemble vx that + # specify the field groups, field names, levels, and (if applicable) + # thresholds for which to run verification. + # + VX_CONFIG_DET_FN: 'vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' #---------------------------- # CPL_AQM config parameters From 21f7b691af429623c51d001e6de91085bec9672f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 26 Aug 2024 10:45:09 -0600 Subject: [PATCH 033/131] Add new files. --- ush/bash_utils/ceil.sh | 122 +++++++++++ ush/get_obs_ccpa.sh | 454 +++++++++++++++++++++++++++++++++++++++++ ush/get_obs_mrms.sh | 260 +++++++++++++++++++++++ ush/get_obs_ndas.sh | 305 +++++++++++++++++++++++++++ 4 files changed, 1141 insertions(+) create mode 100644 ush/bash_utils/ceil.sh create mode 100755 ush/get_obs_ccpa.sh create mode 100755 ush/get_obs_mrms.sh create mode 100755 ush/get_obs_ndas.sh diff --git a/ush/bash_utils/ceil.sh b/ush/bash_utils/ceil.sh new file mode 100644 index 0000000000..dc8a21c90d --- /dev/null +++ b/ush/bash_utils/ceil.sh @@ -0,0 +1,122 @@ +# +#----------------------------------------------------------------------- +# +# This function returns the ceiling of the quotient of two numbers. The +# ceiling of a number is the number rounded up to the nearest integer. +# +#----------------------------------------------------------------------- +# +function ceil() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Check number of arguments. +# +#----------------------------------------------------------------------- +# + if [ "$#" -ne 2 ]; then + + print_err_msg_exit " +Incorrect number of arguments specified: + + Function name: \"${func_name}\" + Number of arguments specified: $# + +Usage: + + ${func_name} numer denom + +where denom is a nonnegative integer and denom is a positive integer. +" + + fi +# +#----------------------------------------------------------------------- +# +# Make sure arguments are of the right form. +# +#----------------------------------------------------------------------- +# + local numer="$1" + local denom="$2" + + if ! [[ "${numer}" =~ ^[0-9]+$ ]]; then + print_err_msg_exit " +The first argument to the \"${func_name}\" function (numer) must be a nonnegative +integer but isn't: + numer = ${numer} +" + fi + + if [[ "${denom}" -eq 0 ]]; then + print_err_msg_exit " +The second argument to the \"${func_name}\" function (denom) cannot be zero: + denom = ${denom} +" + fi + + if ! [[ "${denom}" =~ ^[0-9]+$ ]]; then + print_err_msg_exit " +The second argument to the \"${func_name}\" function (denom) must be a positive +integer but isn't: + denom = ${denom} +" + fi +# +#----------------------------------------------------------------------- +# +# Let ceil(a,b) denote the ceiling of the quotient of a and b. It can be +# shown that for two positive integers a and b, we have: +# +# ceil(a,b) = floor((a+b-1)/b) +# +# where floor(a,b) is the integer obtained by rounding the quotient of +# a and b (i.e. a/b) down to the nearest integer. Since in bash a +# division returns only the integer part of the result, it is effectively +# the floor function. Thus the following. +# +#----------------------------------------------------------------------- +# + result=$(( (numer+denom-1)/denom )) + print_info_msg "${result}" +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/func- +# tion. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh new file mode 100755 index 0000000000..5cbf6638c6 --- /dev/null +++ b/ush/get_obs_ccpa.sh @@ -0,0 +1,454 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 +# +# If data is retrieved from HPSS, it will be automatically staged by this +# script. +# +# Notes about the data and how it's used for verification: +# +# 1. Accumulation is currently hardcoded to 01h. The verification will +# use MET/pcp-combine to sum 01h files into desired accumulations. +# +# 2. There is a problem with the valid time in the metadata for files +# valid from 19 - 00 UTC (or files under the '00' directory). This is +# accounted for in this script for data retrieved from HPSS, but if you +# have manually staged data on disk you should be sure this is accounted +# for. See in-line comments below for details. +# +#----------------------------------------------------------------------- +# + +# +#----------------------------------------------------------------------- +# +# Below, we will use the retrieve_data.py script to retrieve the CCPA +# grib2 file from a data store (e.g. HPSS). Before doing so, note the +# following: +# +# * The daily archive (tar) file containing CCPA obs has a name of the +# form +# +# [PREFIX].YYYYMMDD.tar +# +# where YYYYMMDD is a given year, month, and day combination, and +# [PREFIX] is a string that is not relevant to the discussion here +# (the value it can take on depends on which of several time periods +# YYYYMMDD falls in, and the retrieve_data.py tries various values +# until it finds one for which a tar file exists). Unintuitively, this +# archive file contains accumulation data for valid times starting at +# hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current +# day (YYYYMMDD). In other words, the valid times of the contents of +# this archive file are shifted back by 6 hours relative to the time +# string appearing in the name of the file. See section "DETAILS..." +# for a detailed description of the directory structure in the CCPA +# archive files. +# +# * We call retrieve_data.py in a temporary cycle-specific subdirectory +# in order to prevent get_obs_ccpa tasks for different cycles from +# clobbering each other's output. We refer to this as the "raw" CCPA +# base directory because it contains files as they are found in the +# archives before any processing by this script. +# +# * In each (cycle-specific) raw base directory, the data is arranged in +# daily subdirectories with the same timing as in the archive (tar) +# files (which are described in the section "DETAILS..." below). In +# particular, each daily subdirectory has the form YYYYMDD, and it may +# contain CCPA grib2 files for accumulations valid at hour 19 of the +# previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). +# (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the +# daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer +# to these as raw daily (sub)directories to distinguish them from the +# processed daily subdirectories under the processed (final) CCPA base +# directory (basedir_proc). +# +# * For a given cycle, some of the valid times at which there is forecast +# output may not have a corresponding file under the raw base directory +# for that cycle. This is because another cycle that overlaps this cycle +# has already obtained the grib2 CCPA file for that valid time and placed +# it in its processed location; as a result, the retrieveal of that grib2 +# file for this cycle is skipped. +# +# * To obtain a more intuitive temporal arrangement of the data in the +# processed CCPA directory structure than the temporal arrangement used +# in the archives and raw directories, we process the raw files such +# that the data in the processed directory structure is shifted forward +# in time 6 hours relative to the data in the archives and raw directories. +# This results in a processed base directory that, like the raw base +# directory, also contains daily subdirectories of the form YYYYMMDD, +# but each such subdirectory may only contain CCPA data at valid hours +# within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but +# may not contain data that is valid on the previous, next, or any other +# day). +# +# * For data between 20180718 and 20210504, the 01h accumulation data +# (which is the only accumulation we are retrieving) have incorrect +# metadata under the "00" directory in the archive files (meaning for +# hour 00 and hours 19-23, which are the ones in the "00" directory). +# Below, we use wgrib2 to make a correction for this when transferring +# (moving or copying) grib2 files from the raw daily directories to +# the processed daily directories. +# +# +# DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES +# ---------------------------------------------------------- +# +# The daily archive file containing CCPA obs is named +# +# [PREFIX].YYYYMMDD.tar +# +# This file contains accumulation data for valid times starting at hour +# 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day +# (YYYYMMDD). In particular, when untarred, the daily archive file +# expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and +# 18 subdirectories contain grib2 files for accumulations valid at or +# below the hour-of-day given by the subdirectory name (and on YYYYMMDD). +# For example, the 06 directory contains data valid at: +# +# * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; +# * YYYYMMDD[03, 06] for 03h accumulations; +# * YYYYMMDD[06] for 06h accumulations. +# +# The valid times for the data in the 12 and 18 subdirectories are +# analogous. However, the 00 subdirectory is different in that it +# contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE +# this time, i.e. the data for valid times other than YYYYMMDD00 are on +# the PREVIOUS day. Thus, the 00 subdirectory contains data valid at +# (note the DD-1, meaning one day prior): +# +# * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; +# * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; +# * YYYYMMDD00 for 06h accumulations. +# +#----------------------------------------------------------------------- +# + +# CCPA accumulation period to consider. Here, we only retrieve data for +# 1-hour accumulations. Other accumulations (03h, 06h, 24h) are obtained +# by other tasks in the workflow that add up these hourly values. +accum="01" + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the CCPA +# grib2 files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# APCP (accumulated precipitation) output for the current day. We start +# constructing this by extracting from the full list of all forecast APCP +# output times (i.e. from all cycles) all elements that contain the current +# task's day (in the form YYYYMMDD). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +# If the 0th hour of the current day is in this list (and if it is, it +# will be the first element), remove it because for APCP, that time is +# considered part of the previous day (because it represents precipitation +# that occurred during the last hour of the previous day). +if [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + output_times_crnt_day=(${output_times_crnt_day[@]:1}) +fi +# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is +# one of the output times in the list of all APCP output times, we include +# it in the list for the current day because for APCP, that time is +# considered part of the current day (because it represents precipitation +# that occured during the last hour of the current day). +yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) +if [[ ${output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +fi + +# If there are no forecast APCP output times on the day of the current +# task, exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast APCP output times fall in the current day (including +the 0th hour of the next day). Thus, there is no need to retrieve any +obs files." + exit +fi + +# Obs files will be obtained by extracting them from the relevant 6-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "6 12 18 24". This +# will be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "6 12 18 24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. + +# Sequence interval must be 6 hours because the archives are 6-hourly. +arcv_hr_incr=6 + +# Initial guess for starting archive hour. This is set to the hour +# corresponding to the first forecast output time of the day. +hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) +arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + +# Ending archive hour. This is set to the hour corresponding to the last +# forecast output time of the day. +hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +if [[ ${hr_last} -eq 0 ]]; then + arcv_hr_end=24 +else + arcv_hr_end=$(ceil ${hr_last} ${arcv_hr_incr}) + arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}/${yyyymmdd}" + fn_proc="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) + arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + print_info_msg " +File does not exists on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day)) +if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. In this case, set the sequence +# of hours corresponding to the archives from which files will be retrieved. +else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. Thus, loop over the relevant archives that contain obs for +# the day given by yyyymmdd_task and retrieve files as needed. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# CCPA grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the CCPA grib2 files retrieved from the + # current 6-hourly archive file. We refer to this as the "raw" quarter- + # daily directory because it will contain the files as they are in the + # archive before any processing by this script. + qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + + # Check whether any of the forecast APCP output times for the day associated + # with this task fall in the time interval spanned by the current archive. + # If so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) + yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} + do_retrieve="FALSE" + nout=${#output_times_crnt_day[@]} + for (( i=0; i<${nout}; i++ )); do + output_time=${output_times_crnt_day[i]} + if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ ${do_retrieve} == "TRUE" ]]; then + + # Make sure the raw quarter-daily directory exists because it is used + # below as the output directory of the retrieve_data.py script (so if + # this directory doesn't already exist, that script will fail). Creating + # this directory also ensures that the raw base directory (basedir_raw) + # exists before we change location to it below. + mkdir -p ${qrtrday_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ccpa tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ccpa tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull CCPA data from HPSS. This will get all 6 obs files in the current + # archive and place them in the raw quarter-daily directory. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type CCPA_obs \ + --output_path ${qrtrday_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed CCPA grib2 files. This usually consists of just + # moving or copying the raw files to their processed location, but for + # times between 20180718 and 20210504 and hours-of-day 19 through the + # end of the day (i.e. hour 0 of the next day), it involves using wgrib2 + # to correct an error in the metadata of the raw file and writing the + # corrected data to a new grib2 file in the processed location. + for hrs_ago in $(seq 5 -1 0); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fp_raw="${qrtrday_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}/${yyyymmdd}" + mkdir -p ${day_dir_proc} + fn_proc="${fn_raw}" + fp_proc="${day_dir_proc}/${fn_proc}" + hh_noZero=$((10#${hh})) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ + [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then + wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s + else + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + fi + done + + else + + print_info_msg " +None of the current day's forecast APCP output times fall in the range +spanned by the current 6-hourly archive file. The bounds of the current +archive are: + yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" + yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" +The forecast output times for APCP are: + output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh new file mode 100755 index 0000000000..23896bbf38 --- /dev/null +++ b/ush/get_obs_mrms.sh @@ -0,0 +1,260 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# MRMS (Multi-Radar Multi-Sensor) radar observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, +# +# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity +# data and EchoTop_18_00.50_ for echo top data. If data is not available +# at the top of the hour, you should rename the file closest in time to +# your hour(s) of interest to the above naming format. A script +# "ush/mrms_pull_topofhour.py" is provided for this purpose. +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +# +#----------------------------------------------------------------------- +# + +# Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an +# environment variable created in the ROCOTO XML. It is a scalar variable +# because there doesn't seem to be a way to pass a bash array from the +# XML to the task's script. +mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) + +# Loop over the fields (REFC and RETOP) and set the file base name +# corresponding to each. +fields_in_filenames=() +levels_in_filenames=() +for field in ${mrms_fields[@]}; do + # Set field-dependent parameters needed in forming grib2 file names. + if [ "${field}" = "REFC" ]; then + fields_in_filenames+=("MergedReflectivityQCComposite") + levels_in_filenames+=("00.50") + elif [ "${field}" = "RETOP" ]; then + fields_in_filenames+=("EchoTop") + levels_in_filenames+=("18_00.50") + else + print_err_msg_exit "\ +Invalid field specified: + field = \"${field}\" +Valid options are 'REFC', 'RETOP'." + fi +done + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the MRMS +# grib2 files will appear after this script is done. We refer to this +# as the "processed" base directory because it contains the files after +# all processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) + +# If there are no forecast output times on the day of the current task, +# exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall in the current day. Thus, there +is no need to retrieve any obs files." + exit +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +num_mrms_fields=${#mrms_fields[@]} +for (( i=0; i<${num_mrms_fields}; i++ )); do + for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}/${yyyymmdd}" + fn_proc="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd}-${hh}0000.grib2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + break + fi + done +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day*num_mrms_fields)) +if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. +else + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files. +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# MRMS grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" + +# Time associated with the archive. MRMS data have daily archives that +# have the hour-of-day set to "00". +yyyymmddhh_arcv="${yyyymmdd_task}00" + +# Directory that will contain the MRMS grib2 files retrieved from the +# current 6-hourly archive file. We refer to this as the "raw" quarter- +# daily directory because it will contain the files as they are in the +# archive before any processing by this script. +day_dir_raw="${basedir_raw}/${yyyymmdd_task}" + +# Make sure the raw quarter-daily directory exists because it is used +# below as the output directory of the retrieve_data.py script (so if +# this directory doesn't already exist, that script will fail). Creating +# this directory also ensures that the raw base directory (basedir_raw) +# exists before we change location to it below. +mkdir -p ${day_dir_raw} + +# The retrieve_data.py script first extracts the contents of the archive +# file into the directory it was called from and then moves them to the +# specified output location (via the --output_path option). In order to +# avoid other get_obs_ndas tasks (i.e. those associated with other days) +# from interfering with (clobbering) these files (because extracted files +# from different get_obs_ndas tasks to have the same names or relative +# paths), we change location to the base raw directory so that files with +# same names are extracted into different directories. +cd ${basedir_raw} + +# Pull MRMS data from HPSS. This will get all 7 obs files in the current +# archive and place them in the raw quarter-daily directory, although we +# will make use of only 6 of these (we will not use the tm00 file). +cmd=" +python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type MRMS_obs \ + --output_path ${day_dir_raw} \ + --summary_file retrieve_data.log" + +print_info_msg "CALLING: ${cmd}" +$cmd || print_err_msg_exit "Could not retrieve obs from HPSS." +# +#----------------------------------------------------------------------- +# +# Loop over the 24 hour period starting with the zeroth hour of the day +# associated with this task and ending with the 23rd hour. +# +#----------------------------------------------------------------------- +# + +# Loop through all hours of the day associated with the task. For each +# hour, find the gzipped grib2 file in the raw daily directory that is +# closest in time to this hour. Then gunzip the file and copy it (in the +# process renaming it) to the processed location. +for hr in $(seq 0 1 23); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + for (( i=0; i<${num_mrms_fields}; i++ )); do + python ${USHdir}/mrms_pull_topofhour.py \ + --valid_time ${yyyymmddhh} \ + --outdir ${basedir_proc} \ + --source ${basedir_raw} \ + --product ${fields_in_filenames[$i]} + done + fi +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh new file mode 100755 index 0000000000..d98f390c8b --- /dev/null +++ b/ush/get_obs_ndas.sh @@ -0,0 +1,305 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# NDAS (NAM Data Assimilation System) conventional observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} +# +# Note that data retrieved from HPSS and other sources may be in a +# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +# either 00, 06, 12, or 18, and prevhour is the number of hours prior to +# hh (00 through 05). If using custom staged data, you will have to +# rename the files accordingly. +# +# If data is retrieved from HPSS, it will be automatically staged by this +# this script. +# +#----------------------------------------------------------------------- +# + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the NDAS +# prepbufr files will appear after this script is done. We refer to this +# as the "processed" base directory because it contains the files after +# all processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) + +# If there are no forecast output times on the day of the current task, +# exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall in the current day. Thus, there +is no need to retrieve any obs files." + exit +fi + +# Obs files will be obtained by extracting them from the relevant 6-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "6 12 18 24". This +# will be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "6 12 18 24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. + +# Sequence interval must be 6 hours because the archives are 6-hourly. +arcv_hr_incr=6 + +# Initial guess for starting archive hour. This is set to the hour +# corresponding to the first forecast output time of the day. +hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) + +# Ending archive hour. This is set to the hour corresponding to the last +# forecast output time of the day. +hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}" + fn_proc="prepbufr.ndas.${yyyymmddhh}" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) + print_info_msg " +File does not exists on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day)) +if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. In this case, set the sequence +# of hours corresponding to the archives from which files will be retrieved. +else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. Thus, loop over the relevant archives that contain obs for +# the day given by yyyymmdd_task and retrieve files as needed. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# NDAS prepbufr files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the NDAS prepbufr files retrieved from the + # current 6-hourly archive file. We refer to this as the "raw" quarter- + # daily directory because it will contain the files as they are in the + # archive before any processing by this script. + qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + + # Check whether any of the forecast output times for the day associated + # with this task fall in the time interval spanned by the current archive. + # If so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) + yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) + do_retrieve="FALSE" + nout=${#output_times_crnt_day[@]} + for (( i=0; i<${nout}; i++ )); do + output_time=${output_times_crnt_day[i]} + if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ ${do_retrieve} == "TRUE" ]]; then + + # Make sure the raw quarter-daily directory exists because it is used + # below as the output directory of the retrieve_data.py script (so if + # this directory doesn't already exist, that script will fail). Creating + # this directory also ensures that the raw base directory (basedir_raw) + # exists before we change location to it below. + mkdir -p ${qrtrday_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ndas tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ndas tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull NDAS data from HPSS. This will get all 7 obs files in the current + # archive and place them in the raw quarter-daily directory, although we + # will make use of only 6 of these (we will not use the tm00 file). + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type NDAS_obs \ + --output_path ${qrtrday_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed NDAS prepbufr files. This consists of simply + # copying or moving (and in the process renaming) them from the raw + # quarter-daily directory to the processed directory. Note that the + # tm06 files contain more/better observations than tm00 for the + # equivalent time, so we use those. + for hrs_ago in $(seq --format="%02g" 6 -1 1); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" + fp_raw="${qrtrday_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}" + mkdir -p ${day_dir_proc} + fn_proc="prepbufr.ndas.${yyyymmddhh}" + fp_proc="${day_dir_proc}/${fn_proc}" + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + done + + else + + print_info_msg " +None of the current day's forecast output times fall in the range spanned +by the current 6-hourly archive file. The bounds of the current archive +are: + yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" + yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" +The forecast output times are: + output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi From e14b1b8e4485666594894fe9501da7f29e0a4df7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 16:52:46 -0600 Subject: [PATCH 034/131] Bug fixes to get_obs_... tasks. --- ush/get_obs_ccpa.sh | 15 ++++++++++----- ush/get_obs_mrms.sh | 11 ++++++++--- ush/get_obs_ndas.sh | 11 ++++++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 5cbf6638c6..ef1d55eb05 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -185,12 +185,16 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # constructing this by extracting from the full list of all forecast APCP # output times (i.e. from all cycles) all elements that contain the current # task's day (in the form YYYYMMDD). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If the 0th hour of the current day is in this list (and if it is, it # will be the first element), remove it because for APCP, that time is # considered part of the previous day (because it represents precipitation # that occurred during the last hour of the previous day). -if [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then +if [[ ${#output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then output_times_crnt_day=(${output_times_crnt_day[@]:1}) fi # If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is @@ -208,9 +212,10 @@ fi num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast APCP output times fall in the current day (including -the 0th hour of the next day). Thus, there is no need to retrieve any -obs files." +None of the forecast APCP output times fall within the day (including the +0th hour of the next day) associated with the current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 23896bbf38..92fc24fa56 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -95,15 +95,20 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If there are no forecast output times on the day of the current task, # exit the script. num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall in the current day. Thus, there -is no need to retrieve any obs files." +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index d98f390c8b..441de7b31d 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -69,15 +69,20 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If there are no forecast output times on the day of the current task, # exit the script. num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall in the current day. Thus, there -is no need to retrieve any obs files." +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi From 609b4e99efbc06463329cb3d8348219c7843fc5c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:02:09 -0600 Subject: [PATCH 035/131] Change paths to archive files to make retrieve_data.py work with new get_obs_...sh scripts. --- parm/data_locations.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index dd3b5ddd17..a3712a1972 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -305,10 +305,7 @@ CCPA_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_internal_dir: - - "./00" - - "./06" - - "./12" - - "./18" + - "./{hh}" archive_file_names: - "com2_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" - "gpfs_dell1_nco_ops_com_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" @@ -316,7 +313,7 @@ CCPA_obs: - "com_ccpa_v4.2_ccpa.{yyyy}{mm}{dd}.tar" file_names: obs: - - "ccpa.t{hh}z.01h.hrap.conus.gb2" + - "ccpa.t*z.01h.hrap.conus.gb2" MRMS_obs: hpss: From ed6b6771aa105cd9df5f1cc89acef02934e79dd7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:21:19 -0600 Subject: [PATCH 036/131] Move most of the code for getting obs files out of the ex-script exregional_get_verif_obs.sh since those are now in the ush/get_obs_[ccpa|mrms|ndas|nohrsc].sh scripts. --- scripts/exregional_get_verif_obs.sh | 912 +--------------------------- 1 file changed, 19 insertions(+), 893 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 314273ba93..a055fc0bc4 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -18,7 +18,6 @@ source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} #----------------------------------------------------------------------- # { save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 -set -x # #----------------------------------------------------------------------- # @@ -108,908 +107,35 @@ set -x # # If data is retrieved from HPSS, it will automatically staged by this # this script. - -#----------------------------------------------------------------------- -# Create and enter top-level obs directory (so temporary data from HPSS won't collide with other tasks) -mkdir -p ${OBS_DIR} -cd ${OBS_DIR} - -# Set log file for retrieving obs -logfile=retrieve_data.log - -# PDY and cyc are defined in rocoto XML...they are the yyyymmdd and hh for initial forecast hour respectively -iyyyy=$(echo ${PDY} | cut -c1-4) -imm=$(echo ${PDY} | cut -c5-6) -idd=$(echo ${PDY} | cut -c7-8) -ihh=${cyc} - -echo -echo "HELLO AAAAAAAAAAA" -iyyyymmddhh=${PDY}${cyc} -echo "iyyyymmddhh = ${iyyyymmddhh}" - -# Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format -unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" - -# This awk expression gets the last item of the list $FHR -fcst_length=$(echo ${FHR} | awk '{ print $NF }') - -echo -echo "BYE 00000000" -vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) -if [[ ${OBTYPE} == "NDAS" ]]; then -echo "BYE 111111111" - vhh_last=$(echo ${vdate_last} | cut -c9-10) - #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) - hours_to_add=$(( 6 - (vhh_last % 6) )) - fcst_length_rounded_up=$(( fcst_length + hours_to_add )) -# vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) -# fcst_length=${fcst_length_rounded_up} -fi - -# Make sure fcst_length isn't octal (leading zero) -fcst_length=$((10#${fcst_length})) - -processed_fp_list=() -current_fcst=0 -while [[ ${current_fcst} -le ${fcst_length} ]]; do - -echo -echo "HELLO BBBBBBBBBBB" -echo "current_fcst = ${current_fcst}" - - # Calculate valid date info using date utility - vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") - vyyyymmdd=$(echo ${vdate} | cut -c1-8) - vhh=$(echo ${vdate} | cut -c9-10) -echo -echo "BYE 222222222" -echo "vhh = ${vhh}" - - # Calculate valid date + 1 day; this is needed because some obs files - # are stored in the *next* day's 00h directory - vdate_p1d=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) - -echo -echo "HELLO CCCCCCCCCC" -echo "vyyyymmdd = ${vyyyymmdd}" -echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" -echo "ihh = ${ihh}" - - #remove leading zero again, this time keep original - vhh_noZero=$((10#${vhh})) -# -#----------------------------------------------------------------------- -# -# Retrieve CCPA observations. -# -#----------------------------------------------------------------------- -# - if [[ ${OBTYPE} == "CCPA" ]]; then - - # CCPA is accumulation observations. We do not need to retrieve any - # observed accumulations at forecast hour 0 because there aren't yet - # any accumulations in the forecast(s) to compare it to. - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((current_fcst + 1)) - continue - fi - - # CCPA accumulation period to consider. Here, we only retrieve data for - # 01h accumulations (see note above). Other accumulations (03h, 06h, 24h) - # are obtained elsewhere in the workflow by adding up these 01h accumulations. - accum=01 - - # Base directory in which the daily subdirectories containing the CCPA - # grib2 files will appear after this script is done, and the daily such - # subdirectory for the current valid time (year, month, and day). We - # refer to these as the "processed" base and daily subdirectories because - # they contain the final files after all processing by this script is - # complete. - ccpa_basedir_proc=${OBS_DIR} - ccpa_day_dir_proc="${ccpa_basedir_proc}/${vyyyymmdd}" - # Make sure these directories exist. - mkdir -p ${ccpa_day_dir_proc} - - # Name of the grib2 file to extract from the archive (tar) file as well - # as the name of the processed grib2 file. - ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - - # Full path to the location of the processed CCPA grib2 file for the - # current valid time. Note that this path includes the valid date (year, - # month, and day) information in the name of a subdirectory and the valid - # hour-of-day in the name of the file. - ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" - - # Store the full path to the processed file in a list for later use. - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then - processed_fp_list+=(${ccpa_fp_proc}) - fi - - # Check if the CCPA grib2 file for the current valid time already exists - # at its procedded location on disk. If so, skip and go to the next valid - # time. If not, pull it. - if [[ -f "${ccpa_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - # - #----------------------------------------------------------------------- - # - # Below, we will use the retrieve_data.py script to retrieve the CCPA - # grib2 file from a data store (e.g. HPSS). Before doing so, note the - # following: - # - # * The daily archive (tar) file containing CCPA obs has a name of the - # form - # - # [PREFIX].YYYYMMDD.tar - # - # where YYYYMMDD is a given year, month, and day combination, and - # [PREFIX] is a string that is not relevant to the discussion here - # (the value it can take on depends on which of several time periods - # YYYYMMDD falls in, and the retrieve_data.py tries various values - # until it finds one for which a tar file exists). Unintuitively, this - # archive file contains accumulation data for valid times starting at - # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current - # day (YYYYMMDD). In other words, the valid times of the contents of - # this archive file are shifted back by 6 hours relative to the time - # string appearing in the name of the file. See section "DETAILS..." - # for a detailed description of the directory structure in the CCPA - # archive files. - # - # * We call retrieve_data.py in a temporary cycle-specific subdirectory - # in order to prevent get_obs_ccpa tasks for different cycles from - # clobbering each other's output. We refer to this as the "raw" CCPA - # base directory because it contains files as they are found in the - # archives before any processing by this script. - # - # * In each (cycle-specific) raw base directory, the data is arranged in - # daily subdirectories with the same timing as in the archive (tar) - # files (which are described in the section "DETAILS..." below). In - # particular, each daily subdirectory has the form YYYYMDD, and it may - # contain CCPA grib2 files for accumulations valid at hour 19 of the - # previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). - # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the - # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer - # to these as raw daily (sub)directories to distinguish them from the - # processed daily subdirectories under the processed (final) CCPA base - # directory (ccpa_basedir_proc). - # - # * For a given cycle, some of the valid times at which there is forecast - # output may not have a corresponding file under the raw base directory - # for that cycle. This is because another cycle that overlaps this cycle - # has already obtained the grib2 CCPA file for that valid time and placed - # it in its processed location; as a result, the retrieveal of that grib2 - # file for this cycle is skipped. - # - # * To obtain a more intuitive temporal arrangement of the data in the - # processed CCPA directory structure than the temporal arrangement used - # in the archives and raw directories, we process the raw files such - # that the data in the processed directory structure is shifted forward - # in time 6 hours relative to the data in the archives and raw directories. - # This results in a processed base directory that, like the raw base - # directory, also contains daily subdirectories of the form YYYYMMDD, - # but each such subdirectory may only contain CCPA data at valid hours - # within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but - # may not contain data that is valid on the previous, next, or any other - # day). - # - # * For data between 20180718 and 20210504, the 01h accumulation data - # (which is the only accumulation we are retrieving) have incorrect - # metadata under the "00" directory in the archive files (meaning for - # hour 00 and hours 19-23, which are the ones in the "00" directory). - # Below, we use wgrib2 to make a correction for this when transferring - # (moving or copying) grib2 files from the raw daily directories to - # the processed daily directories. - # - # - # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES - # ---------------------------------------------------------- - # - # The daily archive file containing CCPA obs is named - # - # [PREFIX].YYYYMMDD.tar - # - # This file contains accumulation data for valid times starting at hour - # 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day - # (YYYYMMDD). In particular, when untarred, the daily archive file - # expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and - # 18 subdirectories contain grib2 files for accumulations valid at or - # below the hour-of-day given by the subdirectory name (and on YYYYMMDD). - # For example, the 06 directory contains data valid at: - # - # * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; - # * YYYYMMDD[03, 06] for 03h accumulations; - # * YYYYMMDD[06] for 06h accumulations. - # - # The valid times for the data in the 12 and 18 subdirectories are - # analogous. However, the 00 subdirectory is different in that it - # contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE - # this time, i.e. the data for valid times other than YYYYMMDD00 are on - # the PREVIOUS day. Thus, the 00 subdirectory contains data valid at - # (note the DD-1, meaning one day prior): - # - # * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; - # * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; - # * YYYYMMDD00 for 06h accumulations. - # - #----------------------------------------------------------------------- - # - - # Set parameters for retrieving CCPA data using retrieve_data.py. - # Definitions: - # - # valid_time: - # The valid time in the name of the archive (tar) file from which data - # will be pulled. Due to the way the data is arranged in the CCPA archive - # files (as described above), for valid hours 19 to 23 of the current day, - # this must be set to the corresponding valid time on the NEXT day. - # - # ccpa_basedir_raw: - # Raw base directory that will contain the raw daily subdirectory in which - # the retrieved CCPA grib2 file will be placed. Note that this must be - # cycle-dependent (where the cycle is given by the variable iyyyymmddhh) - # to avoid get_obs_ccpa workflow tasks for other cycles writing to the - # same directories/files. Note also that this doesn't have to depend on - # the current valid hour (0-18 vs. 19-23), but for clarity and ease of - # debugging, here we do make it valid-hour-dependent. - # - # ccpa_day_dir_raw: - # Raw daily subdirectory under the raw base directory. This is dependent - # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) - # in order to maintain the same data timing arrangement in the raw daily - # directories as in the archive files. - # - if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then - valid_time=${vyyyymmdd}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - valid_time=${vyyyymmdd_p1d}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1d}" - fi - mkdir -p ${ccpa_day_dir_raw} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_ccpa tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the CCPA tar files into the directory it was called from, - # which is the working directory of this script right before retrieve_data.py - # is called. - cd ${ccpa_basedir_raw} - - # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file - # corresponding to the current valid time (valid_time). - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path ${ccpa_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS. - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create the processed CCPA grib2 files. This usually consists of just - # moving or copying the raw file to its processed location, but for valid - # times between 20180718 and 20210504, it involves using wgrib2 to correct - # an error in the metadata of the raw file and writing the corrected data - # to a new grib2 file in the processed location. - # - # Since this script is part of a workflow, another get_obs_ccpa task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_ccpa - # task, don't bother to recreate it. - if [[ -f "${ccpa_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" - echo "It was likely created by a get_obs_ccpa workflow task for another cycle that overlaps the current one." - echo "NOT moving or copying file from its raw location to its processed location." - - else - - # Full path to the CCPA file that was pulled and extracted above and - # placed in the raw directory. - ccpa_fp_raw="${ccpa_day_dir_raw}/${ccpa_fn}" - - #mv_or_cp="mv" - mv_or_cp="cp" - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then - ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} - elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then - # One hour CCPA files have incorrect metadata in the files under the "00" - # directory from 20180718 to 20210504. After data is pulled, reorganize - # into correct valid yyyymmdd structure. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s - else - ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} - fi - fi - - fi - - fi -# -#----------------------------------------------------------------------- -# -# Retrieve MRMS observations. -# -#----------------------------------------------------------------------- -# - elif [[ ${OBTYPE} == "MRMS" ]]; then - - # Base directory in which the daily subdirectories containing the MRMS - # grib2 files for REFC (composite reflectivity) and REFC (echo top) will - # be located after this script is done, and the daily such subdirectory - # for the current valid time (year, month, and day). We refer to these - # as the "processed" base and daily subdirectories because they contain - # the final files after all processing by this script is complete. - mrms_basedir_proc=${OBS_DIR} - mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" - - # Loop over the fields (REFC and RETOP). - for field in ${VAR[@]}; do - - # Set field-dependent parameters needed in forming grib2 file names. - if [ "${field}" = "REFC" ]; then - file_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - file_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - - # Name of the MRMS grib2 file for the current field and valid time that - # will appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is generally - # not the name of the gzipped grib2 files that may be retrieved below - # from archive files using the retrieve_data.py script. - mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - - # Full path to the processed MRMS grib2 file for the current field and - # valid time. - mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" - - # Store the full path to the processed file in a list for later use. - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then - processed_fp_list+=(${mrms_fp_proc}) - fi - - # Check if the processed MRMS grib2 file for the current field and valid - # time already exists on disk. If so, skip this valid time and go to the - # next one. If not, pull it. - if [[ -f "${mrms_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - - # Base directory that will contain the daily subdirectories in which the - # gzipped MRMS grib2 files retrieved from archive files will be placed, - # and the daily subdirectory for the current valid year, month, and day. - # We refer to these as the "raw" MRMS base and daily directories because - # they contain files as they are found in the archives before any processing - # by this script. - # - # Note that the name of the raw base directory depends on (contains) the - # valid year, month, and day (but not on the cycle, i.e. not on iyyyymmddhh) - # in order to avoid having get_obs_mrms tasks from other cycles clobbering - # the output from this one. It is also possible to make the name of this - # directory name depend instead on the cycle, but that turns out to cause - # an inefficiency in that get_obs_mrms tasks for different cycles will - # not be able to detect that another cycle has already retrieved the data - # for the current valid day will unnecessarily repeat the retrieval. - mrms_basedir_raw="${mrms_basedir_proc}/raw_day${vyyyymmdd}" - mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" - - # Check if the raw daily directory already exists on disk. If so, it - # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP - # and for all times (hours, minutes, and seconds) in the current valid - # day -- have already been or are in the process of being retrieved from - # the archive (tar) files. If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily - # directory. - # - # Note that despite the check on the existence of the raw daily directory - # below, it is possible for two get_obs_mrms tasks to try to retrieve - # obs for the same day. To minimize this possibility, sleep for a random - # number of seconds (with a maximum wait of maxwait seconds set below) - # before performing the directory existence check - maxwait=30 - sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for ${sleep_duration_secs} seconds..." - sleep "${sleep_duration_secs}s" - - if [[ -d "${mrms_day_dir_raw}" ]]; then - - echo "${OBTYPE} raw daily directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir_raw = \"${mrms_day_dir_raw}\"" - echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." - - else - - mkdir -p ${mrms_day_dir_raw} - valid_time=${vyyyymmdd}${vhh} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_mrms tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the MRMS tar files into the directory it was called from, - # which is the working directory of this script right before retrieve_data.py - # is called. - cd ${mrms_basedir_raw} - - # Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 - # files -- i.e. for both REFC and RETOP and for all times (hours, minutes, - # and seconds) in the current valid day -- and place them in the raw daily - # directory. Note that this will pull both the REFC and RETOP files in - # one call. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${mrms_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the current valid day. - touch ${mrms_day_dir_raw}/pull_completed.txt - - fi - - # Make sure the retrieval process for the current day (which may have - # been executed above for this cycle or for another cycle) has completed - # by checking for the existence of the flag file that marks completion. - # If not, keep checking until the flag file shows up. - while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." - sleep 5s - done - - # Since this script is part of a workflow, another get_obs_mrms task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_mrms - # task, don't bother to recreate it. - if [[ -f "${mrms_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - # Search the raw daily directory for the current valid day to find the - # gizipped MRMS grib2 file whose time stamp (in the file name) is closest - # to the current valid day and hour. Then unzip that file and copy it - # to the processed daily directory, in the process renaming it to replace - # the minutes and hours in the file name with "0000". - valid_time=${vyyyymmdd}${vhh} - python ${USHdir}/mrms_pull_topofhour.py \ - --valid_time ${valid_time} \ - --outdir ${mrms_basedir_proc} \ - --source ${mrms_basedir_raw} \ - --product ${file_base_name} - - fi - - fi - - done -# -#----------------------------------------------------------------------- -# -# Retrieve NDAS observations. -# -#----------------------------------------------------------------------- -# - elif [[ ${OBTYPE} == "NDAS" ]]; then - - # Calculate valid date plus 1 hour. This is needed because we need to - # check whether this date corresponds to one of the valid hours-of-day - # 00, 06, 12, and 18 on which the NDAS archives are provided. - unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") - vdate_p1h=$($DATE_UTIL -d "${unix_vdate_p1h}" +%Y%m%d%H) - vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) - vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) - vhh_p1h_noZero=$((10#${vhh_p1h})) - -echo "" -echo "HELLO DDDDDDDDDDD" -echo "vdate = ${vdate}" -echo "vyyyymmdd = ${vyyyymmdd}" -echo "vhh = ${vhh}" -echo "vhh_noZero = ${vhh_noZero}" -echo "vdate = ${vdate}" -echo "vdate_p1h = ${vdate_p1h}" - - # Base directory in which the hourly NDAS prepbufr files will be located. - # We refer to this as the "processed" base directory because it contains - # the final files after all processing by this script is complete. - ndas_basedir_proc=${OBS_DIR} - - # Name of the NDAS prepbufr file for the current valid time that will - # appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is not the - # same as the name of the raw file, i.e. the file extracted from the - # archive (tar) file retrieved below by the retrieve_data.py script. - ndas_fn="prepbufr.ndas.${vdate}" - - # Full path to the processed NDAS prepbufr file for the current field and - # valid time. - ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" - - # Store the full path to the processed file in a list for later use. -echo -echo "EEEEEEEEEEEEEE" - if [ ${vdate} -le ${vdate_last} ]; then -echo "FFFFFFFFFFFFFF" -echo "processed_fp_list = |${processed_fp_list[@]}" - processed_fp_list+=(${ndas_fp_proc}) - fi - - # Check if the processed NDAS prepbufr file for the current valid time - # already exists on disk. If so, skip this valid time and go to the next - # one. - if [[ -f "${ndas_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: - # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr - # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, - # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have two obs files valid for the same time: - # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr - # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even - # though the earlier files are larger, this is because the time window is larger) - - # Whether to move or copy extracted files from the raw directories to their - # final locations. - #mv_or_cp="mv" - mv_or_cp="cp" - -echo "" -echo "HELLO GGGGGGGGGGGGG" -echo "vhh_noZero = ${vhh_noZero}" -echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" - - # Due to the way NDAS archives are organized, we can only retrieve the - # archive (tar) file containing data for the current valid hour (and the - # 5 hours preceeding it) if the hour-of-day corresponding to the current - # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. - if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 || \ - ${current_fcst} -eq ${fcst_length} ]]; then - - if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then - unix_vdate_archive="${unix_vdate_p1h}" - vdate_archive="${vdate_p1h}" - vyyyymmdd_archive="${vyyyymmdd_p1h}" - vhh_archive=${vhh_p1h} - elif [[ ${current_fcst} -eq ${fcst_length} ]]; then - hours_to_archive=$(( 6 - (vhh % 6) )) - unix_vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" "+%Y-%m-%d %H:00:00") - vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" +%Y%m%d%H) - vyyyymmdd_archive=$(echo ${vdate_archive} | cut -c1-8) - vhh_archive=$(echo ${vdate_archive} | cut -c9-10) - fi - - # Base directory that will contain the 6-hourly subdirectories in which - # the NDAS prepbufr files retrieved from archive files will be placed, - # and the 6-hourly subdirectory for the current valid time plus 1 hour. - # We refer to these as the "raw" NDAS base and 6-hourly directories - # because they contain files as they are found in the archives before - # any processing by this script. - ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_archive}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_archive}" - - # Check if the raw 6-hourly directory already exists on disk. If so, it - # means the NDAS prepbufr files for the current valid hour and the 5 hours - # preceeding it have already been or are in the process of being retrieved - # from the archive (tar) files. If so, skip the retrieval process. If - # not, proceed to retrieve the archive file, extract the prepbufr files - # from it, and place them in the raw daily directory. - # - # Note that despite the check on the existence of the raw daily directory - # below, it is possible for two get_obs_mrms tasks to try to retrieve - # obs for the same day. To minimize this possibility, sleep for a random - # number of seconds (with a maximum wait of maxwait seconds set below) - # before performing the directory existence check - maxwait=30 - sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for ${sleep_duration_secs} seconds..." - sleep "${sleep_duration_secs}s" - - if [[ -d "${ndas_day_dir_raw}" ]]; then - - print_info_msg " -${OBTYPE} raw 6-hourly directory ${vdate_archive} exists on disk: - ndas_day_dir_raw = \"${ndas_day_dir_raw}\" -This means NDAS files for the current valid time (${vdate}) and the -5 hours preceeding it have been or are being retrieved by a get_obs_ndas -workflow task for another cycle. Thus, we will NOT attempt to retrieve -NDAS data for the current valid time from remote locations." - - else - - mkdir -p ${ndas_day_dir_raw} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_ndas tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the NDAS prepbufr files the archive into the directory it was - # called from, which is the working directory of this script right before - # retrieve_data.py is called. - cd ${ndas_basedir_raw} - - # Use the retrieve_data.py script to retrieve all the NDAS prepbufr files - # for the current valid hour and the 5 hours preceeding it and place them - # in the raw 6-hourly directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vdate_archive} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the 6-hour interval ending in vdate_archive. - touch ${ndas_day_dir_raw}/pull_completed.txt - - fi - - # Make sure the retrieval process for the 6-hour interval ending in - # vdate_archive (which may have been executed above for this cycle or for - # another cycle) has completed by checking for the existence of the flag - # file that marks completion. If not, keep checking until the flag file - # shows up. - while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for completion of the NDAS obs retrieval process for the" - echo "6-hour interval ending on ${vdate_archive} ..." - sleep 5s - done - - # Since this script is part of a workflow, another get_obs_ndas task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_ndas - # task, don't bother to recreate it. - if [[ -f "${ndas_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - # Create the processed NDAS prepbufr files for the current valid hour as - # well as the preceeding 5 hours (or fewer if they're outside the time - # interval of the forecast) by copying or moving (and in the process - # renaming) them from the raw 6-hourly directory. In the following loop, - # "tm" means "time minus". Note that the tm06 files contain more/better - # observations than tm00 for the equivalent time. - for tm in $(seq 6 -1 1); do -# for tm in $(seq --format="%02g" 6 -1 1); do - vdate_archive_tm=$($DATE_UTIL -d "${unix_vdate_archive} ${tm} hours ago" +%Y%m%d%H) - if [[ ${vdate_archive_tm} -le ${vdate_last} ]]; then - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_archive}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vdate_archive_tm} - fi - done - - fi - - fi - - fi -# -#----------------------------------------------------------------------- -# -# Retrieve NOHRSC observations. # #----------------------------------------------------------------------- # - elif [[ ${OBTYPE} == "NOHRSC" ]]; then - - #NOHRSC is accumulation observations, so none to retrieve for hour zero - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) - continue - fi - - # Reorganized NOHRSC location (no need for raw data dir) - nohrsc_proc=${OBS_DIR} - - nohrsc06h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_06h_${vyyyymmdd}${vhh}_grid184.grb2" - nohrsc24h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_24h_${vyyyymmdd}${vhh}_grid184.grb2" - retrieve=0 - # If 24-hour files should be available (at 00z and 12z) then look for both files - # Otherwise just look for 6hr file - if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then - retrieve=1 - echo "${OBTYPE} files do not exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} files exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - fi - elif (( ${current_fcst} % 6 == 0 )) ; then - if [[ ! -f "${nohrsc06h_file}" ]]; then - retrieve=1 - echo "${OBTYPE} file does not exist on disk:" - echo "${nohrsc06h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} file exists on disk:" - echo "${nohrsc06h_file}" - fi - fi - if [ $retrieve == 1 ]; then - if [[ ! -d "$nohrsc_proc/${vyyyymmdd}" ]]; then - mkdir -p $nohrsc_proc/${vyyyymmdd} - fi - - # Pull NOHRSC data from HPSS; script will retrieve all files so only call once - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path $nohrsc_proc/${vyyyymmdd} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NOHRSC data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} +if [[ ${OBTYPE} == "CCPA" ]]; then + $USHdir/get_obs_ccpa.sh +elif [[ ${OBTYPE} == "MRMS" ]]; then + $USHdir/get_obs_mrms.sh +elif [[ ${OBTYPE} == "NDAS" ]]; then + $USHdir/get_obs_ndas.sh +elif [[ ${OBTYPE} == "NOHRSC" ]]; then + $USHdir/get_obs_nohrsc.sh +else + print_err_msg_exit "\ +Invalid OBTYPE specified for script: + OBTYPE = \"${OBTYPE}\" +Valid options are CCPA, MRMS, NDAS, and NOHRSC. " - # 6-hour forecast needs to be renamed - mv $nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_6h_${vyyyymmdd}${vhh}_grid184.grb2 ${nohrsc06h_file} - fi - - else - print_err_msg_exit "\ - Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC - " - fi # Increment to next forecast hour - - # Increment to next forecast hour - echo "Finished fcst hr=${current_fcst}" - current_fcst=$((${current_fcst} + 1)) - -done -echo "SSSSSSSSSSSSSSSS" -# -#----------------------------------------------------------------------- -# -# At this point, the processed data files for all output forecast hours -# for this cycle are either being created (by a get_obs_... task for -# another cycle) or have already been created (either by this get_obs_... -# task or one for another cycle). In case they are still being created, -# make sure they have in fact been created before exiting this script. -# If we don't do this, it is possible for this get_obs_... task to complete -# successfully but still have processed obs files for some forecast hours -# not yet created, which is undesirable. -# -#----------------------------------------------------------------------- -# -echo "HHHHHHHHHHHHHHHH" -echo "processed_fp_list = |${processed_fp_list[@]}" -num_proc_files=${#processed_fp_list[@]} -echo "num_proc_files = ${num_proc_files}" -for (( i=0; i<${num_proc_files}; i++ )); do - obs_fp="${processed_fp_list[$i]}" - while [[ ! -f "${obs_fp}" ]]; do - echo "Waiting for ${OBTYPE} file to be created on disk (by a get_obs_... workflow task for another cycle):" - echo " obs_fp = \"${obs_fp}\"" - sleep 5s - done -done +fi # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Create flag file that indicates completion of task. This is needed by +# the workflow. # #----------------------------------------------------------------------- # -#remove_raw="TRUE" -remove_raw="FALSE" -if [ "${remove_raw}" = "TRUE" ]; then - rm -rf ${OBS_DIR}/raw_* -fi +obtype=$(echo_lowercase ${OBTYPE}) +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/get_obs_${obtype}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # From 2ee3a46d525bb25eda4137f36df91d516fe790d5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:49:09 -0600 Subject: [PATCH 037/131] Add ceil.sh to the set of scripts/functions that are sourced by source_util_funcs.sh. --- ush/source_util_funcs.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 7fe3025d6a..3884793fea 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -96,6 +96,15 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # +# Source the file containing the function that returns the ceiling of +# the quotient of two positive integers. +# +#----------------------------------------------------------------------- +# + . ${bashutils_dir}/ceil.sh +# +#----------------------------------------------------------------------- +# # Source the file containing the functions that will echo given strings # as uppercase or lowercase # From d60f6511d4b221fc98edd8473374c4cb449cd58e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:55:54 -0600 Subject: [PATCH 038/131] Changes to reflect the fact that the output of PcpCombine_obs tasks now goes into a cycle-based subdirectory (under the experiment directory), as opposed to a obs day based subdirectory. --- ...onal_run_met_genensprod_or_ensemblestat.sh | 2 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 9 +++--- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 28 ++++--------------- 4 files changed, 13 insertions(+), 28 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 5034369851..24f12be786 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -131,7 +131,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index e54dd7b553..f833ee534c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -132,6 +132,7 @@ time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # vx_fcst_input_basedir=$( eval echo "${VX_FCST_INPUT_BASEDIR}" ) vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) + ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then @@ -162,15 +163,15 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) OBS_INPUT_DIR="${OBS_DIR}" OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") @@ -198,7 +199,7 @@ fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_INPUT_FN_TEMPLATE} ) -OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" +OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}" STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 5952ed3785..23bafe91fd 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -119,7 +119,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 20ae1a9794..ee999d1de3 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -163,6 +163,9 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then slash_ensmem_subdir_or_null="" fi fi +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + slash_cdate_or_null="/${CDATE}" + slash_ensmem_subdir_or_null="/obs" fi OBS_INPUT_DIR="" @@ -175,7 +178,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_fcst" OUTPUT_FN_TEMPLATE=$( eval echo ${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -185,7 +188,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -212,28 +215,9 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi -# If processing obs, then for all cylces except the last one, calculate -# a "forecast length" that will hours up to but not including the initial -# (zeroth) hour of the next cycle. For the last cycle, take the "forecast -# length" of the obs to be the same as that of the forecast for the cycle. -# This ensures that the PcpCombine_obs tasks for different cycles do not -# overwrite or clobber output from another cycle (because with this -# approach, the valid times on which the current PcpCombine_obs task is -# operating is distinct from the ones for the PcpCombine_obs tasks for -# every other cycle). -fcst_len_hrs="${FCST_LEN_HRS}" -if [ "${FCST_OR_OBS}" = "OBS" ]; then - yyyymmddhhmn="${PDY}${cyc}00" - if [ ${yyyymmddhhmn} -lt ${DATE_LAST_CYCL} ] && \ - [ ${FCST_LEN_HRS} -ge ${INCR_CYCL_FREQ} ]; then - output_incr_hrs="1" - fcst_len_hrs=$((INCR_CYCL_FREQ - output_incr_hrs + 1)) - fi -fi - set_vx_fhr_list \ cdate="${CDATE}" \ - fcst_len_hrs="${fcst_len_hrs}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ field="$VAR" \ accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ From 009666f994ddb61c81f95988dd02574850248a53 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:59:08 -0600 Subject: [PATCH 039/131] Simplify names of some variables; add variable for where workflow task flag files should be placed; fix template for output files from pcpcombine_obs tasks. --- ush/config_defaults.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index e564444b49..9ac5bc39f8 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -335,10 +335,10 @@ platform: # #----------------------------------------------------------------------- # - REMOVE_RAW_OBS_DIRS_CCPA: true - REMOVE_RAW_OBS_DIRS_MRMS: true - REMOVE_RAW_OBS_DIRS_NDAS: true - REMOVE_RAW_OBS_DIRS_NOHRSC: true + REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_MRMS: true + REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_NOHRSC: true # #----------------------------------------------------------------------- # @@ -547,12 +547,17 @@ workflow: # default will point to: # # EXPTDIR: "${EXPT_BASEDIR}/${EXPT_SUBDIR}" + # + # WFLOW_FLAG_FILES_DIR: + # Directory in which flag files marking completion of various workflow + # tasks can be placed. #----------------------------------------------------------------------- # EXPT_BASEDIR: '' # This will be set in setup.py prior to extend_yaml() being called EXPT_SUBDIR: 'experiment' EXEC_SUBDIR: "exec" EXPTDIR: '{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}' + WFLOW_FLAG_FILES_DIR: '{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}' # #----------------------------------------------------------------------- # @@ -1726,7 +1731,7 @@ task_run_fcst: #----------------------------------------------------------------------- # # KMP_AFFINITY_*: - # From Intel: "The Intel® runtime library has the ability to bind OpenMP + # From Intel: "The Intel runtime library has the ability to bind OpenMP # threads to physical processing units. The interface is controlled using # the KMP_AFFINITY environment variable. Depending on the system (machine) # topology, application, and operating system, thread affinity can have a @@ -2464,7 +2469,7 @@ verification: # METplus Pb2nc tool on NDAS observations. (These files will contain # obs ADPSFC or ADPUPA fields in NetCDF format.) # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: 'ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2_a${ACCUM_HH}h.nc' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: From 1622c0e45493ad5403501c32c1109ce2d259bc04 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:01:59 -0600 Subject: [PATCH 040/131] Add file accidentally left out of commit two hashes ago. --- scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index f08c002d5f..e769a194fc 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -119,7 +119,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) From 9a26289c08d854cecd64b45f4e97207d1b4ca7b0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:05:21 -0600 Subject: [PATCH 041/131] Modifications to exregional_run_met_pb2nc_obs.sh so that the corresponding task is day-based (i.e. it is run for each day for which obs are needed). The script now also creates a flag file indicating it completed successfully (needed for the workflow to work properly). --- scripts/exregional_run_met_pb2nc_obs.sh | 105 +++++++++++++++++++++--- 1 file changed, 94 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 985cd33c7f..c5f007f7c0 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -71,6 +71,43 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # +# +# +#----------------------------------------------------------------------- +# +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi + +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to run ${METPLUSTOOLNAME} on any prepbufr files." + exit +fi +# +#----------------------------------------------------------------------- +# # Get the cycle date and time in YYYYMMDDHH format. # #----------------------------------------------------------------------- @@ -123,16 +160,52 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST="" +num_missing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + fn="prepbufr.ndas.${yyyymmddhh}" + fp="${OBS_INPUT_DIR}/${fn}" + if [[ -f "${fp}" ]]; then + print_info_msg " +Found ${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh): + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +" + hh_noZero=$((10#${hh})) + #FHR_LIST+=("${yyyymmddhh}") + FHR_LIST="${FHR_LIST},${hh_noZero}" + else + num_missing_files=$((num_missing_files+1)) + print_info_msg " +${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh) does +not exist on disk: + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. +" + fi +done + +# If the number of missing files is greater than the maximum allowed +# (specified by num_missing_files_max), print out an error message and +# exit. +if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then + print_err_msg_exit "\ +The number of missing ${OBTYPE} obs files (num_missing_files) is greater +than the maximum allowed number (num_missing_files_max): + num_missing_files = ${num_missing_files} + num_missing_files_max = ${num_missing_files_max}" +fi + +# Remove leading comma from FHR_LIST. +FHR_LIST=$( echo "${FHR_LIST}" | $SED "s/^,//g" ) +print_info_msg "$VERBOSE" "\ +Final (i.e. after filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: + FHR_LIST = \"${FHR_LIST}\" +" # #----------------------------------------------------------------------- # @@ -282,7 +355,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile @@ -315,6 +388,16 @@ METplus configuration file used is: # #----------------------------------------------------------------------- # +# Create flag file that indicates completion of task. This is needed by +# the workflow. +# +#----------------------------------------------------------------------- +# +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_${PDY}_complete.txt" +# +#----------------------------------------------------------------------- +# # Print message indicating successful completion of script. # #----------------------------------------------------------------------- From e8a6f7dcfb4f81105acdb766c46272dac7073aec Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:15:36 -0600 Subject: [PATCH 042/131] Move sections of WE2E yaml files around to be consistent with the order in config_defaults.yaml. --- .../config.MET_ensemble_verification_only_vx.yaml | 6 +++--- ...config.MET_ensemble_verification_only_vx_time_lag.yaml | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml index 812e805645..80b2e3099f 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml @@ -8,9 +8,6 @@ metadata: user: RUN_ENVIR: community -nco: - NET_default: rrfs - workflow: PREDEF_GRID_NAME: RRFS_CONUS_25km DATE_FIRST_CYCL: '2019061500' @@ -18,6 +15,9 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: rrfs + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index f7d82cb8cd..d0edccca01 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -16,18 +16,20 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: 'RRFSE_CONUS' + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' -nco: - NET_default: 'RRFSE_CONUS' - global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' +# If the following is commented out, then the obs files staged on each +# platform will be (found and) used. platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' From a3c8c4d34676e8410a0bedc27c2a8b201f4faf5a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:17:00 -0600 Subject: [PATCH 043/131] Changes to workflow yaml files for vx portion of the workflow in order to have cycledefs for obs-day-based tasks that include only the obs days for which obs are actually needed but not on any days (inbetween the first and last obs days over all cycles) for which obs are not needed. --- parm/wflow/verify_det.yaml | 64 +++++++++++++++++++-------------- parm/wflow/verify_ens.yaml | 74 +++++++++++++++++++++----------------- parm/wflow/verify_pre.yaml | 60 ++++++++++++++++++++----------- 3 files changed, 118 insertions(+), 80 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 35358c9b67..89f614b4e8 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -47,31 +47,9 @@ metatask_GridStat_CCPA_all_accums_all_mems: walltime: 02:00:00 dependency: and: - # The following will include dependencies on the PcpCombine_obs task for - # the current cycle as well as those from other cycles that process CCPA - # obs at valid times that are part of the current cycle's forecast. This - # dependence is necessary because each PcpCombine_obs task except the - # last one processes obs at valid times starting with the initial time - # of the current cycle's forecast and ending with the last output time - # of this forecast that is before the initial time of the next cycle's - # forecast. taskdep_pcpcombine_obs: attrs: - task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} - {%- set num_cycl_dep = num_cycl_dep %} - {%- for n in range(0, num_cycl_dep) %} - {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} - {%- if n == 0 %} - {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- else %} - {{- " \n" }} - {{- " \n" % cycl_offset }} - {{- " \n" % cycl_offset }} - {{- " \n" }} - {%- endif %} - {%- endfor %} - {{- " \n" }} - {{- " \n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 @@ -163,9 +157,25 @@ metatask_PointStat_NDAS_all_mems: walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_complete: attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any PointStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 17086c6bc6..0893248863 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -48,31 +48,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_THRESH: 'none' dependency: and: - # The following will include dependencies on the PcpCombine_obs task for - # the current cycle as well as those from other cycles that process CCPA - # obs at valid times that are part of the current cycle's forecast. This - # dependence is necessary because each PcpCombine_obs task except the - # last one processes obs at valid times starting with the initial time - # of the current cycle's forecast and ending with the last output time - # of this forecast that is before the initial time of the next cycle's - # forecast. taskdep_pcpcombine_obs: &taskdep_pcpcombine_obs attrs: - task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} - {%- set num_cycl_dep = num_cycl_dep %} - {%- for n in range(0, num_cycl_dep) %} - {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} - {%- if n == 0 %} - {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- else %} - {{- " \n" }} - {{- " \n" % cycl_offset }} - {{- " \n" % cycl_offset }} - {{- " \n" }} - {%- endif %} - {%- endfor %} - {{- " \n" }} - {{- " \n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -175,9 +169,25 @@ metatask_GenEnsProd_EnsembleStat_NDAS: walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_complete: &all_pb2nc_obs_complete attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any EnsembleStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -254,9 +264,8 @@ metatask_GridStat_MRMS_ensprob: FCST_THRESH: 'all' dependency: and: - taskdep_get_obs_mrms: - attrs: - task: get_obs_mrms + datadep_all_get_obs_mrms_complete: + <<: *all_get_obs_mrms_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -282,9 +291,8 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_THRESH: 'all' dependency: and: - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs + datadep_all_pb2nc_obs_complete: + <<: *all_pb2nc_obs_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 2357c6bc5e..13815a9752 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: forecast + cycledefs: cycledef_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -23,14 +23,19 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre + attrs: + cycledefs: cycledef_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - ACCUM_HH: '01' OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_CCPA }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: &output_times_all_cumul + '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} + {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} + {%- endfor %}' + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_CCPA' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -43,8 +48,8 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NOHRSC }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: *output_times_all_cumul + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NOHRSC' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -57,9 +62,12 @@ task_get_obs_mrms: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' - VAR: 'REFC RETOP' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_MRMS }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + MRMS_FIELDS: 'REFC RETOP' + OUTPUT_TIMES_ALL: &output_times_all_inst + '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} + {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} + {%- endfor %}' + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_MRMS' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -72,8 +80,8 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NDAS }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: *output_times_all_inst + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NDAS' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -82,7 +90,7 @@ task_get_obs_ndas: task_run_MET_Pb2nc_obs: <<: *default_task_verify_pre attrs: - cycledefs: forecast + cycledefs: cycledef_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: @@ -92,6 +100,7 @@ task_run_MET_Pb2nc_obs: FCST_OR_OBS: OBS OBTYPE: NDAS OBS_DIR: '&NDAS_OBS_DIR;' + OUTPUT_TIMES_ALL: *output_times_all_inst METPLUSTOOLNAME: 'PB2NC' dependency: and: @@ -127,14 +136,25 @@ metatask_PcpCombine_obs: and: datadep: text: "&CCPA_OBS_DIR;" - or: - not: - taskvalid: - attrs: - task: get_obs_ccpa - taskdep: - attrs: - task: get_obs_ccpa + datadep_all_get_obs_ccpa_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_ccpa tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' metatask_check_post_output_all_mems: var: From c774e40cc4d9f3f48fee2ef49a01a6bae0b47e21 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 12:29:06 -0600 Subject: [PATCH 044/131] Add code to be able to generate forecast output times and obs days, both for instantaneous and cumulative fields, and pass this info to the ROCOTO xml generation system. --- ush/set_cycle_dates.py | 268 +++++++++++++++++++++++++++++++++++++++-- ush/setup.py | 44 ++++++- 2 files changed, 298 insertions(+), 14 deletions(-) diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_dates.py index 0c63a87e49..c3969d8ef3 100644 --- a/ush/set_cycle_dates.py +++ b/ush/set_cycle_dates.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 from datetime import datetime, timedelta, date - +from pprint import pprint from python_utils import print_input_args, print_err_msg_exit - -def set_cycle_dates(date_start, date_end, incr_cycl_freq): +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): """This file defines a function that, given the start and end dates as date time objects, and a cycling frequency, returns an array of cycle date-hours whose elements have the form YYYYMMDDHH. Here, @@ -13,22 +12,267 @@ def set_cycle_dates(date_start, date_end, incr_cycl_freq): two-digit day of the month, and HH is a two-digit hour of the day. Args: - date_start: start date, datetime object - date_end: end date, datetime object - incr_cycl_freq: cycle frequency increment in hours, an int + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. + Returns: - A list of dates in a format YYYYMMDDHH + A list of strings containing cycle starting times in the format + 'YYYYMMDDHH' """ print_input_args(locals()) - freq_delta = timedelta(hours=incr_cycl_freq) - # iterate over cycles all_cdates = [] - cdate = date_start - while cdate <= date_end: + cdate = start_time_first_cycl + while cdate <= start_time_last_cycl: cyc = datetime.strftime(cdate, "%Y%m%d%H") all_cdates.append(cyc) - cdate += freq_delta + cdate += cycl_intvl return all_cdates + + +def set_fcst_output_times_and_obs_days_all_cycles( + start_time_first_cycl, start_time_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl): + """Given the starting time of the first cycle of an SRW App experiment, the + starting time of the last cycle, the interval between cycle start times, + the forecast length, and the forecast output interval, this function + returns two pairs of lists: the first of each pair is a list of strings + of forecast output times over all cycles (each element of the form + 'YYYYMMDDHH'), and the second is a list of days over all cycles on which + observations are needed to perform verification (each element of the form + 'YYYYMMDD'). The first pair of lists is for instantaneous output fields + (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. + APCP or accumulated precipitation). + + Args: + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. + + fcst_len: + The length of each forecast; a timedelta object. + + fcst_output_intvl: + Time interval between forecast output times; a timedelta object. + + Returns: + output_times_all_cycles_inst: + List of forecast output times over all cycles of instantaneous fields. + Each element is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles_inst: + List of observation days (i.e. days on which observations are needed to + perform verification) over all cycles of instantaneous fields. Each + element is a string of the form 'YYYYMMDD'. + + output_times_all_cycles_cumul: + List of forecast output times over all cycles of cumulative fields. Each + element is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles_cumul: + List of observation days (i.e. days on which observations are needed to + perform verification) over all cycles of cumulative fields. Each element + is a string of the form 'YYYYMMDD'. + + """ + + # Get the list containing the starting times of the cycles. Each element + # of the list is a string of the form 'YYYYMMDDHH'. + cycle_start_times_str \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl) + + # Convert cycle_start_times_str to a list of datetime objects. + cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] + + # Get the number of forecast output times per cycle/forecast. + num_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + + # Initialize sets that will contain the various forecast output and obs + # day information. + output_times_all_cycles_inst = set() + obs_days_all_cycles_inst = set() + output_times_all_cycles_cumul = set() + obs_days_all_cycles_cumul = set() + + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + # Create a list of forecast output times of instantaneous fields for the + # current cycle. + output_times_crnt_cycle_inst \ + = [start_time_crnt_cycle + i*fcst_output_intvl + for i in range(0,num_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle + # in the set of all such output times over all cycles. + output_times_all_cycles_inst \ + = output_times_all_cycles_inst | set(output_times_crnt_cycle_inst) + + # Create a list of instantaneous field obs days (i.e. days on which + # observations of instantaneous fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. + tmp = [datetime_obj.date() for datetime_obj in output_times_crnt_cycle_inst] + obs_days_crnt_cycl_inst = sorted(set(tmp)) + # Include the obs days for instantaneous fields for the current cycle + # in the set of all such obs days over all cycles. + obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) + + # Create a list of forecast output times of cumulative fields for the + # current cycle. This is simply the list of forecast output times for + # instantaneous fields but with the first time dropped (because nothing + # has yet accumulated at the starting time of the cycle). + output_times_crnt_cycle_cumul = output_times_crnt_cycle_inst + output_times_crnt_cycle_cumul.pop(0) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + output_times_all_cycles_cumul \ + = output_times_all_cycles_cumul | set(output_times_crnt_cycle_cumul) + + # Create a list of cumulative field obs days (i.e. days on which + # observations of cumulative fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. Note, however, that before dropping the hour-of-day from + # the list of forecast output times, we remove the last forecast output + # time if it happens to be the 0th hour of a day. This is because in + # the scripts/tasks that get observations of cumulative fields, the + # zeroth hour of a day is considered part of the previous day (because + # it represents accumulation that occurred on the previous day). + tmp = output_times_crnt_cycle_cumul + last_output_time_cumul = output_times_crnt_cycle_cumul[-1] + if last_output_time_cumul.hour == 0: + tmp.pop() + tmp = [datetime_obj.date() for datetime_obj in tmp] + obs_days_crnt_cycl_cumul = sorted(set(tmp)) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + obs_days_all_cycles_cumul = obs_days_all_cycles_cumul | set(obs_days_crnt_cycl_cumul) + + # Convert the set of output times of instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDDHH'. + output_times_all_cycles_inst = sorted(output_times_all_cycles_inst) + output_times_all_cycles_inst = [datetime.strftime(output_times_all_cycles_inst[i], "%Y%m%d%H") + for i in range(len(output_times_all_cycles_inst))] + + # Convert the set of obs days for instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles_inst = sorted(obs_days_all_cycles_inst) + obs_days_all_cycles_inst = [datetime.strftime(obs_days_all_cycles_inst[i], "%Y%m%d") + for i in range(len(obs_days_all_cycles_inst))] + + # Convert the set of output times of cumulative fields over all cycles to + # a sorted list of strings of the form 'YYYYMMDDHH'. + output_times_all_cycles_cumul = sorted(output_times_all_cycles_cumul) + output_times_all_cycles_cumul = [datetime.strftime(output_times_all_cycles_cumul[i], "%Y%m%d%H") + for i in range(len(output_times_all_cycles_cumul))] + + # Convert the set of obs days for cumulative fields over all cycles to a + # sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles_cumul = sorted(obs_days_all_cycles_cumul) + obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") + for i in range(len(obs_days_all_cycles_cumul))] + + return output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + output_times_all_cycles_cumul, obs_days_all_cycles_cumul + + +def set_cycledefs_for_obs_days(obs_days_all_cycles): + """Given a list of days on which obs are needed, this function generates a + list of ROCOTO-style cycledef strings that together span the days (over + all cycles of an SRW App experiment) on which obs are needed. The input + list of days must be increasing in time, but the days do not have to be + consecutive, i.e. there may be gaps between days that are greater than + one day. + + Each cycledef string in the output list represents a set of consecutive + days in the input string (when used inside a tag in a ROCOTO + XML). Thus, when the cycledef strings in the output string are all + assigned to the same cycledef group in a ROCOTO XML, that group will + represent all the days on which observations are needed. + + Args: + obs_days_all_cycles: + A list of strings of the form 'YYYYMMDD', with each string representing + a day on which observations are needed. Note that the list must be + sorted, i.e. the days must be increasing in time, but there may be + gaps between days. + + Returns: + cycledef_all_obs_days: + A list of strings, with each string being a ROCOTO-style cycledef of + the form + + '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' + + where {yyyymmdd_start} is the starting day of the first cycle in the + cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note + that the minutes and hours in these cycledef stirngs are always set to + '00'). Thus, one of the elements of the output list may be as follows: + + '202404290000 202405010000 24:00:00' + """ + + # To enable arithmetic with dates, convert input sting list of observation + # days (i.e. days on which observations are needed) over all cycles to a + # list of datetime objects. + tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] + + # Initialize the variable that in the loop below contains the date of + # the previous day. This is just the first element of the list of + # datetime objects constructed above. Then use it to initialize the + # list (contin_obs_day_lists) that will contain lists of consecutive + # observation days. Thus, after its construction is complete, each + # element of contin_obs_day_lists will itself be a list containing + # datetime objects that are 24 hours apart. + day_prev = tmp[0] + contin_obs_day_lists = list() + contin_obs_day_lists.append([day_prev]) + + # Remove the first element of the list of obs days since it has already + # been used initiliaze contin_obs_day_lists. + tmp.pop(0) + + # Loop over the remaining list of obs days and construct the list of + # lists of consecutive obs days. + one_day = timedelta(days=1) + for day_crnt in tmp: + # If the current obs day comes 24 hours after the previous obs day, i.e. + # if it is the next day of the previous obs day, append it to the last + # existing list in contin_obs_day_lists. + if day_crnt == day_prev + one_day: + contin_obs_day_lists[-1].append(day_crnt) + # If the current obs day is NOT the next day of the previous obs day, + # append a new element to contin_obs_day_lists and initialize it as a + # list containing a single element -- the current obs day. + else: + contin_obs_day_lists.append([day_crnt]) + # Update the value of the previous day in preparation for the next + # iteration of the loop. + day_prev = day_crnt + + # Use the list of lists of consecutive obs days to construct a list of + # ROCOTO-style cycledef strings that each represent a set of consecutive + # obs days when included in a tag in a ROCOTO XML. Each + # string in this new list corresponds to a series of consecutive days on + # which observations are needed (where by "consecutive" we mean no days + # are skipped), and there is at least a one day gap between each such + # series. These cycledefs together represent all the days (i.e. over all + # cycles of the experiment) on which observations are needed. + cycledef_all_obs_days = list() + for contin_obs_day_list in contin_obs_day_lists: + cycledef_start = contin_obs_day_list[0].strftime('%Y%m%d%H%M') + cycledef_end = contin_obs_day_list[-1].strftime('%Y%m%d%H%M') + cycledef_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledef_all_obs_days diff --git a/ush/setup.py b/ush/setup.py index 0511653fa2..b6f4256fc4 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -10,6 +10,7 @@ from textwrap import dedent import yaml +from pprint import pprint from python_utils import ( log_info, @@ -37,7 +38,9 @@ load_xml_file, ) -from set_cycle_dates import set_cycle_dates +from set_cycle_dates import \ + set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ + set_cycledefs_for_obs_days from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid @@ -754,6 +757,43 @@ def get_location(xcs, fmt, expt_cfg): date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + # Set the forecast output interval. Ideally, this should be obtained + # from the SRW App's configuration file, but such a variable doesn't + # yet exist in that file. + fcst_output_intvl_hrs = 1 + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + cycl_intvl = datetime.timedelta(days=0, hours=incr_cycl_freq, minutes=0, seconds=0) + fcst_len = datetime.timedelta(days=0, hours=fcst_len_hrs, minutes=0, seconds=0) + fcst_output_intvl = datetime.timedelta(days=0, hours=fcst_output_intvl_hrs, minutes=0, seconds=0) + + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification) over all + # cycles, both for instantaneous fields (e.g. T2m, REFC, RETOP) and for + # cumulative ones (e.g. APCP). + output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + output_times_all_cycles_cumul, obs_days_all_cycles_cumul \ + = set_fcst_output_times_and_obs_days_all_cycles( \ + date_first_cycl, date_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl) + + # Add the list generated above to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + workflow_config['OUTPUT_TIMES_ALL_CYCLES_INST'] = output_times_all_cycles_inst + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles_inst + workflow_config['OUTPUT_TIMES_ALL_CYCLES_CUMUL'] = output_times_all_cycles_cumul + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles_cumul + + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. + cycledef_obs_days_inst = set_cycledefs_for_obs_days(obs_days_all_cycles_inst) + cycledef_obs_days_cumul = set_cycledefs_for_obs_days(obs_days_all_cycles_cumul) + # Save the lists of cycledefs in the dictionary containing values needed + # to construct the ROCOTO XML. + rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst + rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul + # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") @@ -765,7 +805,7 @@ def get_location(xcs, fmt, expt_cfg): num_cycles = len(set_cycle_dates( date_first_cycl, date_last_cycl, - incr_cycl_freq)) + cycl_incr)) if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does From 7bb8b1fefdc0b96c20e93c1e8c3551e89bc8c05d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 14:06:02 -0600 Subject: [PATCH 045/131] Fix bug in the way the start time of the second cycle is calculated. --- parm/wflow/default_workflow.yaml | 8 +++++++- ush/setup.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index c79415b3be..d7198cf524 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -51,7 +51,13 @@ rocoto: forecast: - !startstopfreq ['{{workflow.DATE_FIRST_CYCL}}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] cycled_from_second: - - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %}{{ [workflow.DATE_FIRST_CYCL[0:8], "{:02d}".format(workflow.INCR_CYCL_FREQ)]|join }}{%- else %}{{workflow.DATE_FIRST_CYCL}}{%- endif %}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] + - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %} + {{- workflow.DATE_SECOND_CYCL }} + {%- else %} + {{- workflow.DATE_FIRST_CYCL }} + {%- endif %}', + '{{ workflow.DATE_LAST_CYCL }}', + '{{ workflow.INCR_CYCL_FREQ }}'] log: !cycstr '&LOGDIR;/FV3LAM_wflow.{% if user.RUN_ENVIR == "nco" %}{{ workflow.WORKFLOW_ID + "." }}{% endif %}log' tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/test.yaml"]|include }}' diff --git a/ush/setup.py b/ush/setup.py index b6f4256fc4..4b56294aaa 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -794,6 +794,19 @@ def get_location(xcs, fmt, expt_cfg): rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + date_second_cycl = date_first_cycl + cycl_intvl + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") + # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") From ef6aafe6c1e2f02407bf417b4faa9a4f559d1962 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 14:07:24 -0600 Subject: [PATCH 046/131] Rename script for clarity. --- ush/{set_cycle_dates.py => set_cycle_and_obs_timeinfo.py} | 0 ush/setup.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename ush/{set_cycle_dates.py => set_cycle_and_obs_timeinfo.py} (100%) diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_and_obs_timeinfo.py similarity index 100% rename from ush/set_cycle_dates.py rename to ush/set_cycle_and_obs_timeinfo.py diff --git a/ush/setup.py b/ush/setup.py index 4b56294aaa..9ecc5e402b 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -38,7 +38,7 @@ load_xml_file, ) -from set_cycle_dates import \ +from set_cycle_and_obs_timeinfo import \ set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ set_cycledefs_for_obs_days from set_predef_grid_params import set_predef_grid_params From 99246f2e5d08d6e837f0043eeb8d2f418072ff2c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:54:05 -0600 Subject: [PATCH 047/131] Allow use of dots in WE2E test names. --- tests/WE2E/run_WE2E_tests.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 5d4bd81105..d3c2cb98ab 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -141,7 +141,6 @@ def run_we2e_tests(homedir, args) -> None: pretty_list = "\n".join(str(x) for x in tests_to_run) logging.info(f'Will run {len(tests_to_run)} tests:\n{pretty_list}') - config_default_file = os.path.join(ushdir,'config_defaults.yaml') logging.debug(f"Loading config defaults file {config_default_file}") config_defaults = load_config_file(config_default_file) @@ -159,7 +158,13 @@ def run_we2e_tests(homedir, args) -> None: # test-specific options, then write resulting complete config.yaml starttime = datetime.now() starttime_string = starttime.strftime("%Y%m%d%H%M%S") - test_name = os.path.basename(test).split('.')[1] + test_fn = os.path.basename(test) + # Set the test name to all characters between the initial "config." and + # the final ".yaml" in the file name. This will allow any characters to + # be used as part of the test name, in particular a ".". + prefix = 'config.' + suffix = '.yaml' + test_name = test_fn[test_fn.find(prefix)+len(prefix):test_fn.rfind(suffix)] logging.debug(f"For test {test_name}, constructing config.yaml") test_cfg = load_config_file(test) From f1d2c29261a8a9978757ae9a46edcdeda7e04dee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:54:57 -0600 Subject: [PATCH 048/131] Remove extraneous "proc" subdirectory in default obs directories. --- ush/config_defaults.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 9ac5bc39f8..6486388cad 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -317,10 +317,10 @@ platform: # #----------------------------------------------------------------------- # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # #----------------------------------------------------------------------- # From 5945b027f9bf65c30945d56a2b2237be036b72d2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:56:08 -0600 Subject: [PATCH 049/131] Comment out "set -x". --- ush/get_obs_ccpa.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index ef1d55eb05..0f3dd248ea 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -11,7 +11,7 @@ source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} set -u -set -x +#set -x # #----------------------------------------------------------------------- # From b20f2c9c617c0ff26567ec45b97df194e3ef9a29 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 17:54:26 -0600 Subject: [PATCH 050/131] Add WE2E test configuration files for getting obs, doing vx pre-processing, and deterministic vx. --- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 65 +++++++++++++++++++ ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 65 +++++++++++++++++++ ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 64 ++++++++++++++++++ ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 65 +++++++++++++++++++ 7 files changed, 448 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..8b840a8ea8 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), overlapping forecasts in a day. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..18558e0d95 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), NON-overlapping forecasts in a day with multi-hour + (but < 24hr) gaps between the end of one forecast and the start of the + next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..5d6929cd4a --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the first obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml new file mode 100644 index 0000000000..7ec2264509 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which forecasts do not include 00z. It + is the simplest case of obtaining CCPA and NDAS obs because it avoids + testing the special treatment needed at 00z for these obs types. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml new file mode 100644 index 0000000000..29427201e7 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -0,0 +1,64 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) overlapping forecasts. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..508d14c7fa --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the last obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml new file mode 100644 index 0000000000..c838e8581d --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) NON-overlapping forecasts with multi-day gaps between + the end of one forecast and the start of the next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From e3b1f6fdf41831aa628c3dcf046f90c4b6e9db67 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 18:02:11 -0600 Subject: [PATCH 051/131] Rename files. --- ...multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml} | 0 ...multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml} | 0 ....multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} | 0 ....multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} (100%) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml From 2725832d80b1c6f72642c5b594022a1dd8d378cd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 13:34:21 -0600 Subject: [PATCH 052/131] Bug fixes after merging in develop. --- parm/wflow/verify_pre.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index a237246eb7..6c1b8ff2b5 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -26,7 +26,7 @@ task_get_obs_ccpa: attrs: cycledefs: cycledef_obs_days_cumul maxtries: '1' - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' @@ -75,7 +75,7 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' From c38e9816ac1a7890f481dd6cc8904a52ff1bb962 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 15:47:06 -0600 Subject: [PATCH 053/131] Bug fixes to use new yaml version of var_defns.sh. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs_ccpa.sh | 4 +++- ush/get_obs_mrms.sh | 4 +++- ush/get_obs_ndas.sh | 4 +++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index b971ab3615..fbd751718f 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -31,7 +31,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco ; do +for sect in user workflow nco ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 0f3dd248ea..b17dda97e0 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 92fc24fa56..a7a47d3f98 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 441de7b31d..f4cddfe19a 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x From d8cb3a06e17afe2a42fae78703b2197edc9b168e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 18:12:11 -0600 Subject: [PATCH 054/131] Bug fix to remove crontab line once experiment is complete (regardless of whether it succeeded or failed). --- ush/get_crontab_contents.py | 2 +- ush/launch_FV3LAM_wflow.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index 6b0548141c..fbdf80dae9 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -162,7 +162,7 @@ def delete_crontab_line(called_from_cron, machine, crontab_line, debug): crontab_contents = crontab_contents.replace(crontab_line + "\n", "") crontab_contents = crontab_contents.replace(crontab_line, "") else: - print(f"\nWARNING: line not found in crontab, nothing to remove:\n {crontab_line}\n") + print(f"\nWARNING: line not found in crontab, nothing to remove:\n{crontab_line}\n") run_command(f"""echo '{crontab_contents}' | {crontab_cmd}""") diff --git a/ush/launch_FV3LAM_wflow.sh b/ush/launch_FV3LAM_wflow.sh index 7c26511f4f..7a4a16e4b5 100644 --- a/ush/launch_FV3LAM_wflow.sh +++ b/ush/launch_FV3LAM_wflow.sh @@ -353,9 +353,9 @@ script for this experiment: # Remove CRONTAB_LINE from cron table # if [ "${called_from_cron}" = "TRUE" ]; then - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -c -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -c -d else - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -d fi fi # From f7b77fd8a383b49dd6895a603af2b829e67b404b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 23:27:05 -0600 Subject: [PATCH 055/131] Use SRW's standard way of doing boolean comparisons in bash. --- ush/get_obs_ccpa.sh | 6 +++--- ush/get_obs_mrms.sh | 4 ++-- ush/get_obs_ndas.sh | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index b17dda97e0..21f85e2ff4 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -325,7 +325,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -368,7 +368,7 @@ arcv_hr = ${arcv_hr}" fi done - if [[ ${do_retrieve} == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then # Make sure the raw quarter-daily directory exists because it is used # below as the output directory of the retrieve_data.py script (so if @@ -454,7 +454,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index a7a47d3f98..d82ba842c0 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -175,7 +175,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -260,7 +260,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index f4cddfe19a..b65e3173f1 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -185,7 +185,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -228,7 +228,7 @@ arcv_hr = ${arcv_hr}" fi done - if [[ ${do_retrieve} == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then # Make sure the raw quarter-daily directory exists because it is used # below as the output directory of the retrieve_data.py script (so if @@ -305,7 +305,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." From 6425b822ea8b26642d48c07ab6750c8b9d1c8b78 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 01:21:13 -0600 Subject: [PATCH 056/131] Make script more compact. --- scripts/exregional_get_verif_obs.sh | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index fbd751718f..4e981b3958 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -135,21 +135,17 @@ done # #----------------------------------------------------------------------- # -if [[ ${OBTYPE} == "CCPA" ]]; then - $USHdir/get_obs_ccpa.sh -elif [[ ${OBTYPE} == "MRMS" ]]; then - $USHdir/get_obs_mrms.sh -elif [[ ${OBTYPE} == "NDAS" ]]; then - $USHdir/get_obs_ndas.sh -elif [[ ${OBTYPE} == "NOHRSC" ]]; then - $USHdir/get_obs_nohrsc.sh -else +valid_obtypes=("CCPA" "MRMS" "NDAS" "NOHRSC") +if [[ ! ${valid_obtypes[@]} =~ ${OBTYPE} ]]; then print_err_msg_exit "\ -Invalid OBTYPE specified for script: +Invalid observation type (OBTYPE) specified for script: OBTYPE = \"${OBTYPE}\" -Valid options are CCPA, MRMS, NDAS, and NOHRSC. +Valid observation types are: + $(printf "\"%s\" " ${valid_obtypes[@]}) " fi +script_bn="get_obs_$(echo_lowercase ${OBTYPE})" +$USHdir/${script_bn}.sh # #----------------------------------------------------------------------- # @@ -158,9 +154,8 @@ fi # #----------------------------------------------------------------------- # -obtype=$(echo_lowercase ${OBTYPE}) mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/get_obs_${obtype}_${PDY}_complete.txt" +touch "${WFLOW_FLAG_FILES_DIR}/${script_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # From 24c8c5912e3b6416bfff3623c55445fd87a30fa8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 01:25:32 -0600 Subject: [PATCH 057/131] Bug fixes. --- parm/wflow/verify_pre.yaml | 8 ++++---- ush/get_obs_mrms.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 6c1b8ff2b5..c6e33e4565 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -35,7 +35,7 @@ task_get_obs_ccpa: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} {%- endfor %}' - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_CCPA' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_CCPA }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -49,7 +49,7 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' OUTPUT_TIMES_ALL: *output_times_all_cumul - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NOHRSC' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NOHRSC }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -67,7 +67,7 @@ task_get_obs_mrms: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} {%- endfor %}' - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_MRMS' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_MRMS }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -81,7 +81,7 @@ task_get_obs_ndas: OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' OUTPUT_TIMES_ALL: *output_times_all_inst - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NDAS' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NDAS }}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index d82ba842c0..377ffb25c0 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -262,6 +262,6 @@ done # if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." - rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ + rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." fi From 2622f8b47809449ff797a80f5cdb883b406bd2ee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 10:27:39 -0600 Subject: [PATCH 058/131] Get REMOVE_RAW_OBS... variables directly from var_defns.yaml file instead of setting it as an enviroment variable in the rocoto xml. This makes it possible to change their settings by chaning the values in var_defns.yaml and rerunning the get_obs_... tasks. --- parm/wflow/verify_pre.yaml | 4 ---- ush/get_obs_ccpa.sh | 15 +++++++++------ ush/get_obs_mrms.sh | 15 +++++++++------ ush/get_obs_ndas.sh | 15 +++++++++------ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c6e33e4565..4a9e750c56 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -35,7 +35,6 @@ task_get_obs_ccpa: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} {%- endfor %}' - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_CCPA }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -49,7 +48,6 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' OUTPUT_TIMES_ALL: *output_times_all_cumul - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NOHRSC }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -67,7 +65,6 @@ task_get_obs_mrms: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} {%- endfor %}' - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_MRMS }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -81,7 +78,6 @@ task_get_obs_ndas: OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' OUTPUT_TIMES_ALL: *output_times_all_inst - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NDAS }}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 21f85e2ff4..6d217c43c2 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -322,10 +322,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_CCPA}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -450,12 +453,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 377ffb25c0..4fbe503b25 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -172,10 +172,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_MRMS}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -256,12 +259,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index b65e3173f1..23e0496e35 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -182,10 +182,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_NDAS}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -301,12 +304,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi From 2a379983419e636149656ac8d84f1fe10250a55e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 11 Sep 2024 16:48:05 -0600 Subject: [PATCH 059/131] Minor fixes to comments and output messages. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 15 ++++++++------- ush/set_cycle_and_obs_timeinfo.py | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 572f7c68c4..c3e19c8935 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -92,7 +92,8 @@ function eval_METplus_timestr_tmpl() { # if [ -z "${METplus_timestr_tmpl}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot be empty: +The specified METplus time string template (METplus_timestr_tmpl) cannot +be empty: METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" fi @@ -100,21 +101,21 @@ The specified METplus time string template (METplus_timestr_tmpl) cannot be empt if [[ ${init_time} =~ ^[0-9]+$ ]]; then if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then print_err_msg_exit "\ -The specified initial time string (init_time) must contain exactly 10, -12, or 14 integers (but contains $len): +The specified initial time string (init_time) must contain 10, 12, or 14 +digits (but contains $len): init_time = \"${init_time}\"" fi else print_err_msg_exit "\ -The specified initial time string (init_time) must consist of only -integers and cannot be empty: +The specified initial time string (init_time) must consist of digits only +and cannot be empty: init_time = \"${init_time}\"" fi if ! [[ $fhr =~ ^[0-9]+$ ]]; then print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of only integers and -cannot be empty: +The specified forecast hour (fhr) must consist of digits only and cannot +be empty: fhr = \"${fhr}\"" fi # diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index c3969d8ef3..36635b643e 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -49,7 +49,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( observations are needed to perform verification (each element of the form 'YYYYMMDD'). The first pair of lists is for instantaneous output fields (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. - APCP or accumulated precipitation). + APCP or accumulated precipitation). The accumulation period for the latter + is the forecast output interval. Args: start_time_first_cycl: From 5160d3ccedf78f36c849846f01d3961500ae4a4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 12 Sep 2024 14:54:06 -0600 Subject: [PATCH 060/131] Fix typos. --- ush/get_obs_ccpa.sh | 2 +- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 6d217c43c2..54537b9e97 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -174,7 +174,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 4fbe503b25..d4e32a4c54 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -86,7 +86,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 23e0496e35..b69d4cb1bc 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -60,7 +60,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to From f910f6fcb98ec8e0fe09d7eaeda036dfbf581c3b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 12 Sep 2024 14:56:54 -0600 Subject: [PATCH 061/131] Fix more typos. --- ush/get_obs_ccpa.sh | 2 +- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 54537b9e97..1eee37339c 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -177,7 +177,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index d4e32a4c54..6d84141eab 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -89,7 +89,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index b69d4cb1bc..595ecc2688 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -63,7 +63,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) From aa7188b7897c8497be2817bb0e056358e932c9bc Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 13 Sep 2024 16:53:30 -0600 Subject: [PATCH 062/131] Fix up comments. --- ush/get_obs_ccpa.sh | 10 +++++----- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 1eee37339c..79ce882da3 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -235,15 +235,15 @@ fi # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the hour -# corresponding to the first forecast output time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first forecast output time of the day. hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) -# Ending archive hour. This is set to the hour corresponding to the last -# forecast output time of the day. +# Ending archive hour. This is set to the archive hour containing obs at +# the last forecast output time of the day. hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then @@ -255,7 +255,7 @@ fi # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 6d84141eab..6aac0159b3 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -116,7 +116,7 @@ fi # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 595ecc2688..2954552412 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -102,21 +102,21 @@ fi # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the hour -# corresponding to the first forecast output time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first forecast output time of the day. hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) -# Ending archive hour. This is set to the hour corresponding to the last -# forecast output time of the day. +# Ending archive hour. This is set to the archive hour containing obs at +# the last forecast output time of the day. hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) From 3c794f258b1e827ec0b484ab030f14220b20d6a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 13:41:42 -0600 Subject: [PATCH 063/131] Add new function and clean up existing. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 232 ++++++++++++++++---- 1 file changed, 194 insertions(+), 38 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index c3e19c8935..ae4a1c7ebf 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -1,8 +1,9 @@ # #----------------------------------------------------------------------- # -# This file defines a function that evaluates a METplus time-string -# template. +# This function evaluates a METplus time-string template, i.e. a string +# (e.g. a file name template) containing one or more METplus time- +# formatting strings. # #----------------------------------------------------------------------- # @@ -49,7 +50,7 @@ function eval_METplus_timestr_tmpl() { "init_time" \ "fhr" \ "METplus_timestr_tmpl" \ - "outvarname_formatted_time" \ + "outvarname_evaluated_timestr" \ ) process_args valid_args "$@" # @@ -69,16 +70,171 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - local fmt \ - formatted_time \ + local crnt_timefmt \ + crnt_timefmt_esc \ + evaluated_timestr \ + regex_search_tmpl \ + the_time \ + tmpl_remainder +# +#----------------------------------------------------------------------- +# +# Loop over all METplus time-formatting strings in the given METplus +# template and evaluate each using the given initial time (init_time) and +# forecast hour (fhr). +# +# Note that the while-loop below is over all METplus time-formatting +# strings of the form {...} in the template METplus_timestr_tmpl; it +# continues until all such time-formatting strings have been evaluated +# to actual times. +# +#----------------------------------------------------------------------- +# +# Regular expression used by the sed utility below to pick out the next +# METplus time-formatting string in the given METplus time-string template. +# + regex_search_tmpl="(.*)(\{.*\})(.*)" +# +# Initialize while-loop variables. +# + evaluated_timestr="${METplus_timestr_tmpl}" + + crnt_timefmt=$( printf "%s" "${METplus_timestr_tmpl}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) + tmpl_remainder=$( printf "%s" "${METplus_timestr_tmpl}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) + + while [ ! -z "${crnt_timefmt}" ]; do + + eval_single_METplus_timefmt \ + init_time="${init_time}" \ + fhr="${fhr}" \ + METplus_timefmt="${crnt_timefmt}" \ + outvarname_evaluated_timefmt="the_time" +# +# Replace the next METplus time string in evaluated_timestr with an actual +# time. +# +# Note that when using sed, we need to escape various characters (question +# mark, closing and opening curly braces, etc) in the METplus template in +# order for the sed command below to work properly. +# + crnt_timefmt_esc=$( echo "${crnt_timefmt}" | \ + $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) + evaluated_timestr=$( echo "${evaluated_timestr}" | \ + $SED -n -r "s|(.*)(${crnt_timefmt_esc})(.*)|\1${the_time}\3|p" ) +# +# Set up values for the next iteration of the while-loop. +# + crnt_timefmt=$( printf "%s" "${tmpl_remainder}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) + tmpl_remainder=$( printf "%s" "${tmpl_remainder}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) + + done +# +#----------------------------------------------------------------------- +# +# Set output variables. +# +#----------------------------------------------------------------------- +# + if [ ! -z "${outvarname_evaluated_timestr}" ]; then + printf -v ${outvarname_evaluated_timestr} "%s" "${evaluated_timestr}" + fi +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + +# +#----------------------------------------------------------------------- +# +# This function uses the specified initial forecast time and forecast +# hour to evaluate a single METplus time-formatting string and return +# the corresponding time. +# +#----------------------------------------------------------------------- +# +function eval_single_METplus_timefmt() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Specify the set of valid argument names for this script/function. Then +# process the arguments provided to this script/function (which should +# consist of a set of name-value pairs of the form arg1="value1", etc). +# +#----------------------------------------------------------------------- +# + local valid_args=( \ + "init_time" \ + "fhr" \ + "METplus_timefmt" \ + "outvarname_evaluated_timefmt" \ + ) + process_args valid_args "$@" +# +#----------------------------------------------------------------------- +# +# For debugging purposes, print out values of arguments passed to this +# script. Note that these will be printed out only if VERBOSE is set to +# TRUE. +# +#----------------------------------------------------------------------- +# + print_input_args "valid_args" +# +#----------------------------------------------------------------------- +# +# Declare local variables. +# +#----------------------------------------------------------------------- +# + local evaluated_timefmt \ + fmt \ hh_init \ init_time_str \ lead_hrs \ len \ - mn_init \ - METplus_time_fmt \ + METplus_time_codes \ METplus_time_shift \ METplus_time_type \ + mn_init \ regex_search \ ss_init \ valid_time_str \ @@ -90,25 +246,25 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - if [ -z "${METplus_timestr_tmpl}" ]; then + if [ -z "${METplus_timefmt}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot -be empty: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +The specified METplus time-formatting string (METplus_timefmt) cannot be +empty: + METplus_timefmt = \"${METplus_timefmt}\"" fi len=${#init_time} if [[ ${init_time} =~ ^[0-9]+$ ]]; then if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then print_err_msg_exit "\ -The specified initial time string (init_time) must contain 10, 12, or 14 -digits (but contains $len): +The specified initial time (init_time) must contain 10, 12, or 14 digits +but instead contains $len: init_time = \"${init_time}\"" fi else print_err_msg_exit "\ -The specified initial time string (init_time) must consist of digits only -and cannot be empty: +The specified initial time (init_time) must consist of digits only and +cannot be empty: init_time = \"${init_time}\"" fi @@ -150,11 +306,11 @@ be empty: # regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" METplus_time_type=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_fmt=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\4/p" ) + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\1/p" ) + METplus_time_codes=$( \ + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\4/p" ) METplus_time_shift=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\7/p" ) + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\7/p" ) # #----------------------------------------------------------------------- # @@ -163,9 +319,9 @@ be empty: # #----------------------------------------------------------------------- # - case "${METplus_time_fmt}" in + case "${METplus_time_codes}" in "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_fmt}" + fmt="${METplus_time_codes}" ;; "%H") # @@ -178,7 +334,7 @@ be empty: if [ "${METplus_time_type}" = "lead" ]; then fmt="%02.0f" else - fmt="${METplus_time_fmt}" + fmt="${METplus_time_codes}" fi ;; "%HHH") @@ -194,10 +350,10 @@ be empty: ;; *) print_err_msg_exit "\ -Unsupported METplus time format: - METplus_time_fmt = \"${METplus_time_fmt}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +Unsupported METplus time codes: + METplus_time_codes = \"${METplus_time_codes}\" +METplus time-formatting string passed to this function is: + METplus_timefmt = \"${METplus_timefmt}\"" ;; esac # @@ -213,10 +369,10 @@ METplus time string template passed to this function is: # case "${METplus_time_type}" in "init") - formatted_time=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) + evaluated_timefmt=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) ;; "valid") - formatted_time=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) + evaluated_timefmt=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) ;; "lead") lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ @@ -240,23 +396,23 @@ The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR # # Get the lead in the proper format. # - formatted_time=$( printf "${fmt}" "${lead_hrs}" ) + evaluated_timefmt=$( printf "${fmt}" "${lead_hrs}" ) ;; *) print_err_msg_exit "\ Unsupported METplus time type: METplus_time_type = \"${METplus_time_type}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +METplus time-formatting string passed to this function is: + METplus_timefmt = \"${METplus_timefmt}\"" ;; esac - if [ -z "${formatted_time}" ]; then + if [ -z "${evaluated_timefmt}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) could -not be evaluated for the given initial time (init_time) and forecast -hour (fhr): - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\" +The specified METplus time-formatting string (METplus_timefmt) could not +be evaluated for the given initial time (init_time) and forecast hour +(fhr): + METplus_timefmt = \"${METplus_timefmt}\" init_time = \"${init_time}\" fhr = \"${fhr}\"" fi @@ -267,8 +423,8 @@ hour (fhr): # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_formatted_time}" ]; then - printf -v ${outvarname_formatted_time} "%s" "${formatted_time}" + if [ ! -z "${outvarname_evaluated_timefmt}" ]; then + printf -v ${outvarname_evaluated_timefmt} "%s" "${evaluated_timefmt}" fi # #----------------------------------------------------------------------- From ef1cbbb6f97de5ea03709ff89a895f1908aa8325 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 14:12:31 -0600 Subject: [PATCH 064/131] Change variable names and remove unneeded variable. --- ush/get_obs_ccpa.sh | 47 ++++++++++++++++++++++----------------------- ush/get_obs_mrms.sh | 26 ++++++++++++------------- ush/get_obs_ndas.sh | 37 +++++++++++++++++------------------ 3 files changed, 54 insertions(+), 56 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 79ce882da3..d3ad4c49ce 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -173,31 +173,31 @@ yyyymmdd_task=${PDY} # processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # APCP (accumulated precipitation) output for the current day. We start # constructing this by extracting from the full list of all forecast APCP # output times (i.e. from all cycles) all elements that contain the current # task's day (in the form YYYYMMDD). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If the 0th hour of the current day is in this list (and if it is, it # will be the first element), remove it because for APCP, that time is # considered part of the previous day (because it represents precipitation # that occurred during the last hour of the previous day). -if [[ ${#output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - output_times_crnt_day=(${output_times_crnt_day[@]:1}) +if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) fi # If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is # one of the output times in the list of all APCP output times, we include @@ -205,14 +205,14 @@ fi # considered part of the current day (because it represents precipitation # that occured during the last hour of the current day). yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) fi # If there are no forecast APCP output times on the day of the current # task, exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast APCP output times fall within the day (including the 0th hour of the next day) associated with the current task (yyyymmdd_task): @@ -237,14 +237,14 @@ arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at # the last forecast output time of the day. -hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then arcv_hr_end=24 @@ -257,7 +257,7 @@ fi # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -283,7 +283,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day)) +num_needed_files=$((num_fcst_output_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -361,9 +361,8 @@ arcv_hr = ${arcv_hr}" yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} do_retrieve="FALSE" - nout=${#output_times_crnt_day[@]} - for (( i=0; i<${nout}; i++ )); do - output_time=${output_times_crnt_day[i]} + for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do + output_time=${fcst_output_times_crnt_day[i]} if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" @@ -416,7 +415,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -445,7 +444,7 @@ archive are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" The forecast output times for APCP are: - output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 6aac0159b3..af3b8ca942 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -85,27 +85,27 @@ yyyymmdd_task=${PDY} # all processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If there are no forecast output times on the day of the current task, # exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast output times fall within the day associated with the current task (yyyymmdd_task): @@ -120,7 +120,7 @@ fi num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${output_times_crnt_day[@]}; do + for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -139,7 +139,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day*num_mrms_fields)) +num_needed_files=$((num_fcst_output_times_crnt_day*num_mrms_fields)) if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -246,7 +246,7 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." # process renaming it) to the processed location. for hr in $(seq 0 1 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 2954552412..5562647340 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -59,27 +59,27 @@ yyyymmdd_task=${PDY} # all processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If there are no forecast output times on the day of the current task, # exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast output times fall within the day associated with the current task (yyyymmdd_task): @@ -104,13 +104,13 @@ arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at # the last forecast output time of the day. -hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) @@ -118,7 +118,7 @@ arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" @@ -143,7 +143,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day)) +num_needed_files=$((num_fcst_output_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -221,9 +221,8 @@ arcv_hr = ${arcv_hr}" yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" - nout=${#output_times_crnt_day[@]} - for (( i=0; i<${nout}; i++ )); do - output_time=${output_times_crnt_day[i]} + for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do + output_time=${fcst_output_times_crnt_day[i]} if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" @@ -276,7 +275,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" @@ -296,7 +295,7 @@ are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" The forecast output times are: - output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" fi From 2d751130084dce588aef9def18f2f487d1dadbc9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 14:13:24 -0600 Subject: [PATCH 065/131] Fix location of staged forecast input files. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 8b840a8ea8..e386d84e3d 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 18558e0d95..9694f9845f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 5d6929cd4a..067187a216 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 7ec2264509..75d706ba1f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 29427201e7..a55c7c1e0b 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -58,7 +58,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 508d14c7fa..e5f8fc8d1f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index c838e8581d..6445ebca53 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From 830a635c2a0c9fcadf605b7d8d52f4b447c58652 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 17 Sep 2024 12:58:42 -0600 Subject: [PATCH 066/131] Add check in setup.py to make sure that the accumulation periods of accumlated vx fields are less than or equal to the forecast length. This check is also made in the workflow yaml files, but if all the accumulation periods happen to be greater than the forecast length, then the field must be completely removed from verification (i.e. its vx tasks must be removed from the workflow), and that can only be done via this check in setup.py; it can't be done in the workflow task yaml files. --- ush/setup.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index d60f8a9154..d6e9e5c2d0 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -546,6 +546,31 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # For vx fields that are accumulated, remove those accumulation hours + # that are longer than the forecast length. If that leaves the array + # of accumulation hours for that field empty, then remove the field + # from the list of fields to be verified. + # + # ----------------------------------------------------------------------- + # + # Get the vx fields specified in the experiment configuration. + vx_fields_config = expt_config["verification"]["VX_FIELDS"] + + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fields_accum = ["APCP", "ASNOW"] + for field in vx_fields_accum: + if field in vx_fields_config: + accum_periods_array_name = "".join(["VX_", field, "_ACCUMS_HRS"]) + accum_periods = expt_config["verification"][accum_periods_array_name] + accum_periods = [accum for accum in accum_periods if (accum <= fcst_len_hrs)] + expt_config["verification"][accum_periods_array_name] = accum_periods + if not accum_periods: + vx_fields_config.remove(field) + + expt_config["verification"]["VX_FIELDS"] = vx_fields_config + # + # ----------------------------------------------------------------------- + # # Remove all verification [meta]tasks for which no fields are specified. # # ----------------------------------------------------------------------- @@ -562,10 +587,10 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", @@ -578,9 +603,6 @@ def remove_tag(tasks, tag): "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. if not vx_fields_config: @@ -602,7 +624,6 @@ def remove_tag(tasks, tag): are specified for verification.""" )) rocoto_config['tasks'].pop(metatask) - # # ----------------------------------------------------------------------- # From 407c51b9a183983001f5d33ed25a05372361fe3e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 20 Sep 2024 08:25:06 -0600 Subject: [PATCH 067/131] Fix typo. --- parm/metplus/PcpCombine.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 3cee69df1d..c2807e6380 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -98,7 +98,7 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # For accumulation variables (which is the only type of variable that we # run PcpCombine on), we add the accumulation period to the variable name # because this is how METplus normally sets names. This is because, -# epending on the settings in the METplus configuration file, it is +# depending on the settings in the METplus configuration file, it is # possible for a single NetCDF output file to contain output for multiple # accumulations, so even though the "level" attribute of each accumulation # variable in the output file will contain the level (e.g. "A1" or "A3"), From e3de6e1386955d36df0117e42cd376b1e829bcc6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 20 Sep 2024 08:32:02 -0600 Subject: [PATCH 068/131] Fix typo. --- ush/get_obs_ndas.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 5562647340..8b0c87b3eb 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -133,7 +133,7 @@ File already exists on disk: hr=$((10#${hh})) arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) print_info_msg " -File does not exists on disk: +File does not exist on disk: fp_proc = \"${fp_proc}\" Setting the hour (since 00) of the first archive to retrieve to: arcv_hr_start = \"${arcv_hr_start}\"" From fc7e257ebc88750bc0d0a1d20c979506f49fc179 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:22:44 -0600 Subject: [PATCH 069/131] Changes to allow NOHRSC obs and ASNOW to be accumulated by PcpCombine and verified in GridStat. --- parm/data_locations.yml | 2 +- parm/metplus/GridStat_or_PointStat.conf | 11 ++------ parm/metplus/PcpCombine.conf | 34 ++++++++++++++++--------- parm/metplus/vx_config_det.yaml | 1 + parm/metplus/vx_config_ens.yaml | 1 + 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index a3712a1972..5a30e48774 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -371,6 +371,6 @@ NOHRSC_obs: - "dcom_{yyyy}{mm}{dd}.tar" file_names: obs: - - "sfav2_CONUS_*h_{yyyy}{mm}{dd}{hh}_grid184.grb2" + - "sfav2_CONUS_6h_{yyyy}{mm}{dd}*_grid184.grb2" archive_internal_dir: - ./wgrbbul/nohrsc_snowfall/ diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 39d34eb24f..7bd0039ab5 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -93,7 +93,7 @@ METPLUS_CONF = {{ '{' ~ METPLUS_TOOL_NAME ~ '_OUTPUT_DIR}' }}/metplus_final.{{me {%- if (METPLUS_TOOL_NAME == 'GRID_STAT') %} - {%- if (input_field_group == 'APCP') %} + {%- if input_field_group in ['APCP', 'ASNOW'] %} #{{METPLUS_TOOL_NAME}}_INTERP_FIELD = BOTH #{{METPLUS_TOOL_NAME}}_INTERP_VLD_THRESH = 1.0 @@ -539,15 +539,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index c2807e6380..5bdd09c761 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -79,17 +79,26 @@ FCST_PCP_COMBINE_RUN = False # {{FCST_OR_OBS}}_PCP_COMBINE_METHOD = ADD -{%- if (FCST_OR_OBS == 'FCST') and (input_field_group == 'ASNOW') %} +{%- if (FCST_OR_OBS == 'FCST') %} + {%- if (input_field_group == 'ASNOW') %} # # Specify name of variable for Snowfall Accumulation. -# NOTE: Currently TSNOWP is used which is a constant-density estimate of snowfall accumulation. -# In future RRFS development, a GSL product with variable-density snowfall accumulation -# is planned for UPP. When that is included and turned on in post, this variable may be changed -# to ASNOW. # -FCST_PCP_COMBINE_INPUT_NAMES = TSNOWP - -FCST_PCP_COMBINE_INPUT_LEVELS = A01 +# NOTE: +# For forecasts, currently TSNOWP is used which is a constant-density +# estimate of snowfall accumulation. In future RRFS development, a GSL +# product with variable-density snowfall accumulation is planned for UPP. +# When that is included and turned on in post, this variable may be +# changed to ASNOW. +# +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = TSNOWP +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} +{%- elif (FCST_OR_OBS == 'OBS') %} + {%- if (input_field_group == 'ASNOW') %} +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = ASNOW +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} {%- endif %} # # Specify how to name the array in the NetCDF file that PcpCombine @@ -110,18 +119,19 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # the output NetCDF file). # {%- if (input_field_group in ['APCP', 'ASNOW']) %} -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{output_accum_hh}} {%- else %} {{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}} {%- endif %} # # Accumulation interval available in the input data. # -{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +#{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. # -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{output_accum_hh}} # # If the output NetCDF file already exists, specify whether or not to # skip the call to PcpCombine. @@ -177,7 +187,7 @@ FCST_PCP_COMBINE_CONSTANT_INIT = True # # Name to identify observation data in output. # -OBTYPE = CCPA +OBTYPE = {{obtype}} {%- endif %} # # Specify file type of input data. diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index 8ea3fd5e13..c20e192dcb 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -50,6 +50,7 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] diff --git a/parm/metplus/vx_config_ens.yaml b/parm/metplus/vx_config_ens.yaml index 5f55254a4c..2608490565 100644 --- a/parm/metplus/vx_config_ens.yaml +++ b/parm/metplus/vx_config_ens.yaml @@ -14,6 +14,7 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] From 75325447c87aa12ccb95f8bd35ccab220e78add6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:28:34 -0600 Subject: [PATCH 070/131] Changes to get_obs/vx WE2E tests to get them working with staged forecast output at new location. --- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 21 ++++++++----------- ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 21 ++++++++----------- ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 21 ++++++++----------- ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 21 ++++++++----------- 7 files changed, 63 insertions(+), 84 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index e386d84e3d..41428a7939 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 7 - DATE_FIRST_CYCL: '2024042902' - DATE_LAST_CYCL: '2024043006' - FCST_LEN_HRS: 9 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 9694f9845f..2fae0d6388 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 11 - DATE_FIRST_CYCL: '2024042902' - DATE_LAST_CYCL: '2024043022' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 067187a216..e150234a47 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042900' - DATE_LAST_CYCL: '2024043000' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 75d706ba1f..2e180e2714 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024043012' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index a55c7c1e0b..37c3eceb24 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -16,16 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024050212' - FCST_LEN_HRS: 48 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -45,6 +35,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -58,7 +56,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index e5f8fc8d1f..d8eb349433 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042921' - DATE_LAST_CYCL: '2024043021' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 6445ebca53..514dbed8d3 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 96 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024051112' - FCST_LEN_HRS: 48 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From 877bf05efad49fb5c15f1c568d72d024c0408e1b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:39:42 -0600 Subject: [PATCH 071/131] Add WE2E test to get NOHRSC obs and do vx on 6-hour and 24-hour snowfall accumulation. --- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml new file mode 100644 index 0000000000..6069ce8212 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -0,0 +1,61 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, NOHRSC, MRMS, and NDAS + observations from HPSS for a single cycle with a relatively long forecast + (36 hours) cycle and then perform deterministic verification, including + first performing vx preprocessing with METplus tools such as PcpCombine + and Pb2Nc. + + The staged forecast data are from the SRW itself. + + This test uses a winter case to ensure that ASNOW is verified correctly + for both 6-hour and 24-hour accumulations. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +workflow: + PREEXISTING_DIR_METHOD: rename + # This is required in the experiment generation step, although it shouldn't + # since a forecast is not being run. + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2023021700' + DATE_LAST_CYCL: '2023021700' + FCST_LEN_HRS: 36 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' + +verification: + VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' + VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 4495813d591ea7e536e073f8fe23f68a2ed2562d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:41:36 -0600 Subject: [PATCH 072/131] Add script to get NOHRSC obs from HPSS. --- ush/get_obs_nohrsc.sh | 475 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100755 ush/get_obs_nohrsc.sh diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh new file mode 100755 index 0000000000..910cf3c35a --- /dev/null +++ b/ush/get_obs_nohrsc.sh @@ -0,0 +1,475 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +for sect in user platform verification ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# NOHRSC snow accumulation observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 +# +# where AA is the 2-digit accumulation duration in hours: 06 or 24 +# +# METplus is configured to verify snowfall using 06- and 24-h accumulated +# snowfall from 6- and 12-hourly NOHRSC files, respectively. +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +#----------------------------------------------------------------------- +# + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Generate a list of forecast output times for the current day. Note +# that if the 0th hour of the next day (i.e. the day after the one +# associated with this task) is one of the forecast output times, we +# include it in the list for the current day because the accumulation +# associated with that hour occurred during the current day. +# +#----------------------------------------------------------------------- +# + +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to the task's script. To have an array-valued variable to +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# ASNOW (accumulated snow) output for the current day. We start constructing +# this by extracting from the full list of all forecast ASNOW output times +# (i.e. from all cycles) all elements that contain the current task's day +# (in the form YYYYMMDD). +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi +# If the 0th hour of the current day is in this list (and if it is, it +# will be the first element), remove it because for ASNOW, that time is +# considered part of the previous day (because it represents snowfall +# that occurred during the last hour of the previous day). +if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) +fi +# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is +# one of the output times in the list of all ASNOW output times, we +# include it in the list for the current day because for ASNOW, that time +# is considered part of the current day (because it represents snowfall +# that occured during the last hour of the current day). +yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +fi + +# If there are no forecast ASNOW output times on the day of the current +# task, exit the script. +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast ASNOW output times fall within the day (including the +0th hour of the next day) associated with the current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." + exit +fi +# +#----------------------------------------------------------------------- +# +# Generate a list of all the times at which obs are available for the +# current day, possibly including hour 00 of the next day. +# +#----------------------------------------------------------------------- +# + +# The time interval (in hours) at which the obs are available on HPSS +# must be evenly divisible into 24. Otherwise, different days would +# have obs available at different hours. Make sure this is the case. +remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} + mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + +# Construct the array of times during the current day (and possibly +# during hour 00 of the next day) at which obs are available on HPSS. +# Each element of this array is of the form "YYYYMMDDHH". +num_obs_avail_times=$((24/NOHRSC_OBS_AVAIL_INTVL_HRS)) +obs_avail_times_crnt_day=() +# Note: Start at i=1 because the output for hour 00 of the current day is +# considered part of the previous day (because it represents accumulation +# that occurred during the previous day). +for (( i=1; i<$((num_obs_avail_times+1)); i++ )); do + hrs=$((i*NOHRSC_OBS_AVAIL_INTVL_HRS)) + obs_avail_times_crnt_day+=( $(${DATE_UTIL} --date "${yyyymmdd_task} ${hrs} hours" +%Y%m%d%H) ) +done +# +#----------------------------------------------------------------------- +# +# Generate a list of all the times at which to retrieve obs. This is +# obtained from the intersection of the list of times at which there is +# forecast output and the list of times at which there are obs available. +# Note that if the forecast output is more frequent than the data is +# available, then the forecast values must be accumulated together to +# get values at the times at which the obs are available. This is done +# in another workflow task using the METplus tool PcpCombine. +# +#----------------------------------------------------------------------- +# +obs_retrieve_times_crnt_day=() +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do + if [[ ${obs_avail_times_crnt_day[@]} =~ ${yyyymmddhh} ]] ; then + obs_retrieve_times_crnt_day+=(${yyyymmddhh}) + fi +done +# +#----------------------------------------------------------------------- +# +# +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times=\( \${${array_name}[@]} \) +echo +echo "QQQQQQQQQQQQQQQQQQQ" +#echo "obs_retrieve_times = |${obs_retrieve_times[@]}|" +echo "obs_retrieve_times =" +echo "|${obs_retrieve_times[@]}|" + +# For testing. +#obs_retrieve_times+=('abcd') +#obs_retrieve_times[4]='abcd' + +err_msg=" +The two methods of obtaining the array of obs retrieve times don't match: + obs_retrieve_times_crnt_day = + (${obs_retrieve_times_crnt_day[@]}) + obs_retrieve_times = + (${obs_retrieve_times[@]})" + +n1=${#obs_retrieve_times_crnt_day[@]} +n2=${#obs_retrieve_times[@]} +if [ ${n1} -ne ${n2} ]; then + print_err_msg_exit "${err_msg}" +fi + +for (( i=0; i<${n1}; i++ )); do + elem1=${obs_retrieve_times_crnt_day[$i]} + elem2=${obs_retrieve_times[$i]} + if [ ${elem1} != ${elem2} ]; then + print_err_msg_exit "${err_msg}" + fi +done + +obs_retrieve_times_crnt_day=($( printf "%s " "${obs_retrieve_times[@]}" )) + +echo +echo "RRRRRRRRRRRRRRRRR" +#echo "obs_retrieve_times_crnt_day = |${obs_retrieve_times_crnt_day[@]}|" +echo "obs_retrieve_times_crnt_day =" +echo "|${obs_retrieve_times_crnt_day[@]}|" + +#exit 1 +# +#----------------------------------------------------------------------- +# +# Obs files will be obtained by extracting them from the relevant 24-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "0 24". This will +# be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "0 24", e.g. just "0" or just "24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. +# +#----------------------------------------------------------------------- +# + +# Sequence interval must be 24 hours because the archives are 24-hourly. +arcv_hr_incr=24 + +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(( hr_first/arcv_hr_incr )) +arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + +# Ending archive hour. This is set to the archive hour containing obs at +# the last obs retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +if [[ ${hr_last} -eq 0 ]]; then + arcv_hr_end=24 +else + arcv_hr_end=$(( hr_last/arcv_hr_incr )) + arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# obs files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}" + fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(( hr/arcv_hr_incr )) + arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + print_info_msg " +File does not exist on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. +else + + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" + +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. The NOHRSC data on HPSS are archived by day, with the +# archive for a given day containing 6-hour as well as 24-hour grib2 +# files. The four 6-hour files are for accumulated snowfall at 00z +# (which represents accumulation over the last 6 hours of the previous +# day), 06z, 12z, and 18z, while the two 24-hour files are at 00z (which +# represents accumulation over all 24 hours of the previous day) and 12z +# (which represents accumulation over the last 12 hours of the previous +# day plus the first 12 hours of the current day). +# +# Here, we will only obtain the 6-hour files. In other workflow tasks, +# the values in these 6-hour files will be added as necessary to obtain +# accumulations over longer periods (e.g. 24 hours). Since the four +# 6-hour files are in one archive and are relatively small (on the order +# of kilobytes), we get them all with a single call to the retrieve_data.py +# script. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_NOHRSC}" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# NOHRSC grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the grib2 files retrieved from the current + # archive file. We refer to this as the "raw" archive directory because + # it will contain the files as they are in the archive before any processing + # by this script. + arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv}" +%Y%m%d%H) + hrs=$((arcv_hr_incr - 1)) + arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) + do_retrieve="FALSE" + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the times in the current day (or hour 00 of the next day) at which +obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly +archive file. The bounds of the data in the current archive file are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ccpa tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ccpa tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type NOHRSC_obs \ + --output_path ${arcv_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed NOHRSC grib2 files. This consists of simply copying + # or moving them from the raw daily directory to the processed directory. + for hrs in $(seq 0 6 18); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed grib2 obs file from the raw one (by moving, copying, + # or otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_raw="${arcv_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}" + mkdir -p ${day_dir_proc} + fn_proc="${fn_raw}" + #fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_proc="${day_dir_proc}/${fn_proc}" + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + done + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw obs directories. +# +#----------------------------------------------------------------------- +# +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw obs directories." +fi From 80f2a1258c4e8053e9806af41f6d6548aea718c1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:44:50 -0600 Subject: [PATCH 073/131] Add new variables specifying the time intervals at which different types of obs are available on NOAA HPSS. Use these new variables in file name templates. --- ush/config_defaults.yaml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index bbddf30874..687463de5f 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2423,6 +2423,13 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # # Templates for CCPA, MRMS, and NDAS observation files. # # OBS_CCPA_APCP_FN_TEMPLATE: @@ -2448,8 +2455,10 @@ verification: # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate # NetCDF versions of these files. # - OBS_CCPA_APCP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2' + OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' @@ -2467,7 +2476,10 @@ verification: # METplus Pb2nc tool on NDAS observations. (These files will contain # obs ADPSFC or ADPUPA fields in NetCDF format.) # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: 'ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2_a${ACCUM_HH}h.nc' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: From a0a938c0667ddd2c3cdf76ae1768470e43532020 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:48:52 -0600 Subject: [PATCH 074/131] Rearrange the settings for the forecast file templates in config_default.sh so that they're more readable. --- ush/config_defaults.yaml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 687463de5f..56bd15b814 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2547,9 +2547,25 @@ verification: # both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}' - FCST_FN_TEMPLATE: '${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc' + FCST_SUBDIR_TEMPLATE: '{%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %}' + FCST_FN_TEMPLATE: '{{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' # # For verification tasks that need observational data, this specifies # the maximum number of observation files that may be missing. If more From 692255598ff94ef260720c9ffbf21684ce5677d9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:51:31 -0600 Subject: [PATCH 075/131] Allow verification of 12 and 18-hourly accumulated snowfall since these are now obtained by adding 6-hourly in the obs (as opposed to before, where we just got the 6 or 24 hourly obs without the option to add the 6-hourly to get 12 and 18). --- ush/valid_param_vals.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 3530b51ae9..1ff5405ffb 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -78,4 +78,4 @@ valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] -valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] +valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From cae50b5d8a3ef9d477f6a771eee29cf04dcd0b50 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:31:19 -0600 Subject: [PATCH 076/131] In Pb2NC tasks, use the arrays in var_defns.py that specify the times on a given day at which a given types of obs should be retrieved. --- parm/metplus/Pb2nc_obs.conf | 2 +- scripts/exregional_run_met_pcpcombine.sh | 26 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/parm/metplus/Pb2nc_obs.conf b/parm/metplus/Pb2nc_obs.conf index 729bf2ba06..24d469602f 100644 --- a/parm/metplus/Pb2nc_obs.conf +++ b/parm/metplus/Pb2nc_obs.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 3d4d0cb9fb..9495031722 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -190,11 +190,13 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi @@ -213,22 +215,27 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl_accum_hrs="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl_accum_hrs="${OBS_AVAIL_INTVL_HRS}" fi +input_accum_hh=$(printf "%02d" ${subintvl_accum_hrs}) +vx_output_intvl_hrs="$((10#${ACCUM_HH}))" -set_vx_fhr_list \ - cdate="${CDATE}" \ +set_vx_hrs_list \ + yyyymmddhh_init="${CDATE}" \ fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + vx_output_intvl_hrs="${vx_output_intvl_hrs}" \ + field_is_cumul="TRUE" \ + check_subintvl_files="TRUE" \ + subintvl_accum_hrs="${subintvl_accum_hrs}" \ base_dir="${base_dir}" \ fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_hrs_list="FHR_LIST" # #----------------------------------------------------------------------- # @@ -358,7 +365,8 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' From fffdbd3739e8c5ac542e6fea63e5d8b53f5d907e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:39:24 -0600 Subject: [PATCH 077/131] In the get_obs tasks, use the arrays in var_defns.py that specify the times on a given day at which a given types of obs should be retrieved. Remove the arrays that specify the combined forecast output times for all cycles. --- parm/wflow/verify_det.yaml | 4 + parm/wflow/verify_pre.yaml | 8 +- ush/get_obs_ccpa.sh | 215 ++++++++++++++++++------------------- ush/get_obs_mrms.sh | 47 ++++---- ush/get_obs_ndas.sh | 102 ++++++++++-------- ush/get_obs_nohrsc.sh | 177 ++++-------------------------- 6 files changed, 204 insertions(+), 349 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 4f4d4672ce..c4f420f10c 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -42,6 +42,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: OBTYPE: 'CCPA' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -75,6 +76,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: OBTYPE: 'NOHRSC' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -105,6 +107,7 @@ metatask_GridStat_MRMS_all_mems: OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.MRMS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'L0' FCST_THRESH: 'all' walltime: 02:00:00 @@ -152,6 +155,7 @@ metatask_PointStat_NDAS_all_mems: ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'all' FCST_THRESH: 'all' walltime: 01:00:00 diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 4a9e750c56..80b70f68d3 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -47,7 +47,6 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - OUTPUT_TIMES_ALL: *output_times_all_cumul native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -61,10 +60,6 @@ task_get_obs_mrms: OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' MRMS_FIELDS: 'REFC RETOP' - OUTPUT_TIMES_ALL: &output_times_all_inst - '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} - {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} - {%- endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -77,7 +72,6 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - OUTPUT_TIMES_ALL: *output_times_all_inst queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -96,7 +90,6 @@ task_run_MET_Pb2nc_obs: FCST_OR_OBS: OBS OBTYPE: NDAS OBS_DIR: '&NDAS_OBS_DIR;' - OUTPUT_TIMES_ALL: *output_times_all_inst METPLUSTOOLNAME: 'PB2NC' dependency: and: @@ -127,6 +120,7 @@ metatask_PcpCombine_obs: FCST_OR_OBS: OBS OBTYPE: CCPA OBS_DIR: '&CCPA_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' METPLUSTOOLNAME: 'PCPCOMBINE' dependency: and: diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index d3ad4c49ce..aabb55e5a4 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -158,93 +158,71 @@ set -u #----------------------------------------------------------------------- # -# CCPA accumulation period to consider. Here, we only retrieve data for -# 1-hour accumulations. Other accumulations (03h, 06h, 24h) are obtained -# by other tasks in the workflow that add up these hourly values. -accum="01" +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % CCPA_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} + mod(24, CCPA_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + +# Accumulation period to use when getting obs files. This is simply (a +# properly formatted version of) the obs availability interval. +accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the CCPA -# grib2 files will appear after this script is done. We refer to this as +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as # the "processed" base directory because it contains the files after all # processing by this script is complete. basedir_proc=${OBS_DIR} - -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# APCP (accumulated precipitation) output for the current day. We start -# constructing this by extracting from the full list of all forecast APCP -# output times (i.e. from all cycles) all elements that contain the current -# task's day (in the form YYYYMMDD). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If the 0th hour of the current day is in this list (and if it is, it -# will be the first element), remove it because for APCP, that time is -# considered part of the previous day (because it represents precipitation -# that occurred during the last hour of the previous day). -if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) -fi -# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is -# one of the output times in the list of all APCP output times, we include -# it in the list for the current day because for APCP, that time is -# considered part of the current day (because it represents precipitation -# that occured during the last hour of the current day). -yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) -fi - -# If there are no forecast APCP output times on the day of the current -# task, exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast APCP output times fall within the day (including the -0th hour of the next day) associated with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) +# +#----------------------------------------------------------------------- +# # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the forecast output times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "6 12 18 24". +# will be the case if the observation retrieval times include all hours +# of the task's day and if none of the obs files for this day already +# exist on disk. In other cases, the sequence we loop over will be a +# subset of "6 12 18 24". # # To generate this sequence, we first set its starting and ending values # as well as the interval. +# +#----------------------------------------------------------------------- +# # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last forecast output time of the day. -hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) +# the last obs retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then arcv_hr_end=24 @@ -257,11 +235,11 @@ fi # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fn_proc="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" fp_proc="${day_dir_proc}/${fn_proc}" if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) @@ -273,7 +251,7 @@ File already exists on disk: arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) print_info_msg " -File does not exists on disk: +File does not exist on disk: fp_proc = \"${fp_proc}\" Setting the hour (since 00) of the first archive to retrieve to: arcv_hr_start = \"${arcv_hr_start}\"" @@ -282,32 +260,39 @@ Setting the hour (since 00) of the first archive to retrieve to: done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day)) -if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any files." exit -# Otherwise, will need to retrieve files. In this case, set the sequence -# of hours corresponding to the archives from which files will be retrieved. + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" print_info_msg " At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since 00 of this day): arcv_hrs = ${arcv_hrs_str} " + fi # #----------------------------------------------------------------------- @@ -348,36 +333,48 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the CCPA grib2 files retrieved from the - # current 6-hourly archive file. We refer to this as the "raw" quarter- - # daily directory because it will contain the files as they are in the - # archive before any processing by this script. - qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + # Directory that will contain the grib2 files retrieved from the current + # archive file. We refer to this as the "raw" archive directory because + # it will contain the files as they are in the archive before any processing + # by this script. + arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the forecast APCP output times for the day associated - # with this task fall in the time interval spanned by the current archive. - # If so, set the flag (do_retrieve) to retrieve the files in the current + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current # archive. - yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) - yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} + hrs_ago=$((arcv_hr_incr - 1)) + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_end=${yyyymmddhh_arcv} do_retrieve="FALSE" - for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do - output_time=${fcst_output_times_crnt_day[i]} - if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then do_retrieve="TRUE" break fi done - if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the times in the current day (or hour 00 of the next day) at which +obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly +archive file. The bounds of the data in the current archive file are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else - # Make sure the raw quarter-daily directory exists because it is used - # below as the output directory of the retrieve_data.py script (so if - # this directory doesn't already exist, that script will fail). Creating - # this directory also ensures that the raw base directory (basedir_raw) - # exists before we change location to it below. - mkdir -p ${qrtrday_dir_raw} + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the @@ -389,8 +386,8 @@ arcv_hr = ${arcv_hr}" # same names are extracted into different directories. cd ${basedir_raw} - # Pull CCPA data from HPSS. This will get all 6 obs files in the current - # archive and place them in the raw quarter-daily directory. + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -399,7 +396,7 @@ arcv_hr = ${arcv_hr}" --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type CCPA_obs \ - --output_path ${qrtrday_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" @@ -415,9 +412,12 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" - fp_raw="${qrtrday_dir_raw}/${fn_raw}" + # Create the processed grib2 obs file from the raw one (by moving, copying, + # or otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" + fp_raw="${arcv_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}/${yyyymmdd}" mkdir -p ${day_dir_proc} fn_proc="${fn_raw}" @@ -435,17 +435,6 @@ arcv_hr = ${arcv_hr}" fi done - else - - print_info_msg " -None of the current day's forecast APCP output times fall in the range -spanned by the current 6-hourly archive file. The bounds of the current -archive are: - yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" - yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The forecast output times for APCP are: - fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" - fi done diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index af3b8ca942..a0d0590667 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -84,31 +84,24 @@ yyyymmdd_task=${PDY} # as the "processed" base directory because it contains the files after # all processing by this script is complete. basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi - -# If there are no forecast output times on the day of the current task, -# exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then +# If there are no observation retrieval times on the day of the current +# task, exit the script. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): +None of the observation retrieval times fall within the day associated +with the current task (yyyymmdd_task): yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any obs files." exit @@ -120,7 +113,7 @@ fi num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do + for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -139,7 +132,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day*num_mrms_fields)) +num_needed_files=$((num_obs_retrieve_times_crnt_day*num_mrms_fields)) if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -246,7 +239,7 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." # process renaming it) to the processed location. for hr in $(seq 0 1 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 8b0c87b3eb..7ab6fc652b 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -49,6 +49,18 @@ set -u #----------------------------------------------------------------------- # +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % NDAS_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} + mod(24, NDAS_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} @@ -58,31 +70,28 @@ yyyymmdd_task=${PDY} # as the "processed" base directory because it contains the files after # all processing by this script is complete. basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) + + + -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If there are no forecast output times on the day of the current task, -# exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then +# If there are no observation retrieval times on the day of the current +# task, exit the script. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): +None of the observation retrieval times fall within the day associated +with the current task (yyyymmdd_task): yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any obs files." exit @@ -91,10 +100,10 @@ fi # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the forecast output times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "6 12 18 24". +# will be the case if the observation retrieval times include all hours +# of the task's day and if none of the obs files for this day already +# exist on disk. In other cases, the sequence we loop over will be a +# subset of "6 12 18 24". # # To generate this sequence, we first set its starting and ending values # as well as the interval. @@ -103,14 +112,14 @@ fi arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) +# hour containing obs at the first observation retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last forecast output time of the day. -hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) +# the last observation retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) @@ -118,7 +127,7 @@ arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" @@ -143,7 +152,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day)) +num_needed_files=$((num_obs_retrieve_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -214,17 +223,18 @@ arcv_hr = ${arcv_hr}" # archive before any processing by this script. qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the forecast output times for the day associated - # with this task fall in the time interval spanned by the current archive. - # If so, set the flag (do_retrieve) to retrieve the files in the current + # Check whether any of the observation retrieval times for the day + # associated with this task fall in the time interval spanned by the + # current archive. If so, set the flag (do_retrieve) to retrieve the + # files in the current # archive. yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" - for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do - output_time=${fcst_output_times_crnt_day[i]} - if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${retrieve_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${retrieve_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" break fi @@ -275,7 +285,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" @@ -289,13 +299,13 @@ arcv_hr = ${arcv_hr}" else print_info_msg " -None of the current day's forecast output times fall in the range spanned -by the current 6-hourly archive file. The bounds of the current archive -are: +None of the current day's observation retrieval times fall in the range +spanned by the current 6-hourly archive file. The bounds of the current +archive are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The forecast output times are: - fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" +The observation retrieval times are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" fi diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh index 910cf3c35a..c71266ed07 100755 --- a/ush/get_obs_nohrsc.sh +++ b/ush/get_obs_nohrsc.sh @@ -46,86 +46,9 @@ set -u #----------------------------------------------------------------------- # -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} -# -#----------------------------------------------------------------------- -# -# Generate a list of forecast output times for the current day. Note -# that if the 0th hour of the next day (i.e. the day after the one -# associated with this task) is one of the forecast output times, we -# include it in the list for the current day because the accumulation -# associated with that hour occurred during the current day. -# -#----------------------------------------------------------------------- -# - -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# ASNOW (accumulated snow) output for the current day. We start constructing -# this by extracting from the full list of all forecast ASNOW output times -# (i.e. from all cycles) all elements that contain the current task's day -# (in the form YYYYMMDD). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If the 0th hour of the current day is in this list (and if it is, it -# will be the first element), remove it because for ASNOW, that time is -# considered part of the previous day (because it represents snowfall -# that occurred during the last hour of the previous day). -if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) -fi -# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is -# one of the output times in the list of all ASNOW output times, we -# include it in the list for the current day because for ASNOW, that time -# is considered part of the current day (because it represents snowfall -# that occured during the last hour of the current day). -yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) -fi - -# If there are no forecast ASNOW output times on the day of the current -# task, exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast ASNOW output times fall within the day (including the -0th hour of the next day) associated with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi -# -#----------------------------------------------------------------------- -# -# Generate a list of all the times at which obs are available for the -# current day, possibly including hour 00 of the next day. -# -#----------------------------------------------------------------------- -# - # The time interval (in hours) at which the obs are available on HPSS -# must be evenly divisible into 24. Otherwise, different days would -# have obs available at different hours. Make sure this is the case. +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) if [ ${remainder} -ne 0 ]; then print_err_msg_exit "\ @@ -135,93 +58,36 @@ into 24 but doesn't: mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" fi -# Construct the array of times during the current day (and possibly -# during hour 00 of the next day) at which obs are available on HPSS. -# Each element of this array is of the form "YYYYMMDDHH". -num_obs_avail_times=$((24/NOHRSC_OBS_AVAIL_INTVL_HRS)) -obs_avail_times_crnt_day=() -# Note: Start at i=1 because the output for hour 00 of the current day is -# considered part of the previous day (because it represents accumulation -# that occurred during the previous day). -for (( i=1; i<$((num_obs_avail_times+1)); i++ )); do - hrs=$((i*NOHRSC_OBS_AVAIL_INTVL_HRS)) - obs_avail_times_crnt_day+=( $(${DATE_UTIL} --date "${yyyymmdd_task} ${hrs} hours" +%Y%m%d%H) ) -done -# -#----------------------------------------------------------------------- -# -# Generate a list of all the times at which to retrieve obs. This is -# obtained from the intersection of the list of times at which there is -# forecast output and the list of times at which there are obs available. -# Note that if the forecast output is more frequent than the data is -# available, then the forecast values must be accumulated together to -# get values at the times at which the obs are available. This is done -# in another workflow task using the METplus tool PcpCombine. -# -#----------------------------------------------------------------------- -# -obs_retrieve_times_crnt_day=() -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do - if [[ ${obs_avail_times_crnt_day[@]} =~ ${yyyymmddhh} ]] ; then - obs_retrieve_times_crnt_day+=(${yyyymmddhh}) - fi -done +# Accumulation period to use when getting obs files. This is simply (a +# properly formatted version of) the obs availability interval. +accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} # #----------------------------------------------------------------------- # -# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". # #----------------------------------------------------------------------- # array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times=\( \${${array_name}[@]} \) -echo -echo "QQQQQQQQQQQQQQQQQQQ" -#echo "obs_retrieve_times = |${obs_retrieve_times[@]}|" -echo "obs_retrieve_times =" -echo "|${obs_retrieve_times[@]}|" - -# For testing. -#obs_retrieve_times+=('abcd') -#obs_retrieve_times[4]='abcd' - -err_msg=" -The two methods of obtaining the array of obs retrieve times don't match: - obs_retrieve_times_crnt_day = - (${obs_retrieve_times_crnt_day[@]}) - obs_retrieve_times = - (${obs_retrieve_times[@]})" - -n1=${#obs_retrieve_times_crnt_day[@]} -n2=${#obs_retrieve_times[@]} -if [ ${n1} -ne ${n2} ]; then - print_err_msg_exit "${err_msg}" -fi - -for (( i=0; i<${n1}; i++ )); do - elem1=${obs_retrieve_times_crnt_day[$i]} - elem2=${obs_retrieve_times[$i]} - if [ ${elem1} != ${elem2} ]; then - print_err_msg_exit "${err_msg}" - fi -done - -obs_retrieve_times_crnt_day=($( printf "%s " "${obs_retrieve_times[@]}" )) - -echo -echo "RRRRRRRRRRRRRRRRR" -#echo "obs_retrieve_times_crnt_day = |${obs_retrieve_times_crnt_day[@]}|" -echo "obs_retrieve_times_crnt_day =" -echo "|${obs_retrieve_times_crnt_day[@]}|" - -#exit 1 +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) # #----------------------------------------------------------------------- # # Obs files will be obtained by extracting them from the relevant 24-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "0 24". This will -# be the case if the forecast output times include all hours of the +# be the case if the observation retrieval times include all hours of the # task's day and if none of the obs files for this day already exist on # disk. In other cases, the sequence we loop over will be a subset of # "0 24", e.g. just "0" or just "24". @@ -261,7 +127,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" - fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fn_proc="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_proc="${day_dir_proc}/${fn_proc}" if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) @@ -447,12 +313,11 @@ The times at which obs need to be retrieved are: # or otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - fn_raw="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_raw="${arcv_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" mkdir -p ${day_dir_proc} fn_proc="${fn_raw}" - #fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" fp_proc="${day_dir_proc}/${fn_proc}" ${mv_or_cp} ${fp_raw} ${fp_proc} fi From 31a529c569825da84278afe240c06407e98f4278 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:50:44 -0600 Subject: [PATCH 078/131] Add file (exregional_run_met_pb2nc_obs.sh) that should have been part of commit hash cae50b5d8, and remove file (exregional_run_met_pcpcombine.sh) that should NOT have been. --- scripts/exregional_run_met_pb2nc_obs.sh | 81 ++++++++++-------------- scripts/exregional_run_met_pcpcombine.sh | 26 +++----- 2 files changed, 41 insertions(+), 66 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 63d530f370..494ce74a3d 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -74,40 +74,22 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # -# +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. # #----------------------------------------------------------------------- # -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} - -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi - -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to run ${METPLUSTOOLNAME} on any prepbufr files." - exit -fi +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) # #----------------------------------------------------------------------- # @@ -159,31 +141,32 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. +# Set the array of lead hours (relative to the date associated with this +# task) for which to run the MET/METplus tool. # #----------------------------------------------------------------------- # -FHR_LIST="" +LEADHR_LIST="" num_missing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) fn="prepbufr.ndas.${yyyymmddhh}" fp="${OBS_INPUT_DIR}/${fn}" if [[ -f "${fp}" ]]; then print_info_msg " -Found ${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh): +Found ${OBTYPE} obs file corresponding to observation retrieval time +(yyyymmddhh): yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" " hh_noZero=$((10#${hh})) - #FHR_LIST+=("${yyyymmddhh}") - FHR_LIST="${FHR_LIST},${hh_noZero}" + LEADHR_LIST="${LEADHR_LIST},${hh_noZero}" else num_missing_files=$((num_missing_files+1)) print_info_msg " -${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh) does -not exist on disk: +${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh) +does not exist on disk: yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. @@ -202,12 +185,12 @@ than the maximum allowed number (num_missing_files_max): num_missing_files_max = ${num_missing_files_max}" fi -# Remove leading comma from FHR_LIST. -FHR_LIST=$( echo "${FHR_LIST}" | $SED "s/^,//g" ) +# Remove leading comma from LEADHR_LIST. +LEADHR_LIST=$( echo "${LEADHR_LIST}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours +Final (i.e. after filtering for missing obs files) set of lead hours (saved in a scalar string variable) is: - FHR_LIST = \"${FHR_LIST}\" + LEADHR_LIST = \"${LEADHR_LIST}\" " # #----------------------------------------------------------------------- @@ -242,15 +225,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + LEADHR_LIST = [${LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -314,10 +297,10 @@ settings="\ 'METPLUS_TOOL_NAME': '${METPLUS_TOOL_NAME}' 'metplus_verbosity_level': '${METPLUS_VERBOSITY_LEVEL}' # -# Date and forecast hour information. +# Date and lead hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'leadhr_list': '${LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 9495031722..3d4d0cb9fb 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -190,13 +190,11 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) - OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) - OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) + OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi @@ -215,27 +213,22 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" - subintvl_accum_hrs="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" - subintvl_accum_hrs="${OBS_AVAIL_INTVL_HRS}" fi -input_accum_hh=$(printf "%02d" ${subintvl_accum_hrs}) -vx_output_intvl_hrs="$((10#${ACCUM_HH}))" -set_vx_hrs_list \ - yyyymmddhh_init="${CDATE}" \ +set_vx_fhr_list \ + cdate="${CDATE}" \ fcst_len_hrs="${FCST_LEN_HRS}" \ - vx_output_intvl_hrs="${vx_output_intvl_hrs}" \ - field_is_cumul="TRUE" \ - check_subintvl_files="TRUE" \ - subintvl_accum_hrs="${subintvl_accum_hrs}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ fn_template="${fn_template}" \ + check_accum_contrib_files="TRUE" \ num_missing_files_max="${num_missing_files_max}" \ - outvarname_hrs_list="FHR_LIST" + outvarname_fhr_list="FHR_LIST" # #----------------------------------------------------------------------- # @@ -365,8 +358,7 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'input_accum_hh': '${input_accum_hh}' - 'output_accum_hh': '${ACCUM_HH:-}' + 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' From f77d31484dd7eab232250ec8dd7f4b172bb67e60 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:56:40 -0600 Subject: [PATCH 079/131] Commit change that should have been part of commit hash fffdbd3. --- parm/wflow/verify_pre.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 80b70f68d3..80831f6f29 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -31,10 +31,6 @@ task_get_obs_ccpa: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - OUTPUT_TIMES_ALL: &output_times_all_cumul - '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} - {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} - {%- endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" From 99a87f9442cd3a57a8f92a1d70beb54b8d93b37b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 26 Sep 2024 12:51:30 -0600 Subject: [PATCH 080/131] Refactor the way the vx lead hours (i.e. the hours for which vx will be run) are calculated (and corresponding files checked for), including renaming of the file set_vx_fhr_list.sh to set_leadhrs.sh. --- parm/metplus/EnsembleStat.conf | 2 +- parm/metplus/GenEnsProd.conf | 2 +- parm/metplus/GridStat_ensmean.conf | 2 +- parm/metplus/GridStat_ensprob.conf | 2 +- parm/metplus/GridStat_or_PointStat.conf | 2 +- parm/metplus/PcpCombine.conf | 2 +- parm/metplus/PointStat_ensmean.conf | 2 +- parm/metplus/PointStat_ensprob.conf | 2 +- scripts/exregional_check_post_output.sh | 31 +- ...onal_run_met_genensprod_or_ensemblestat.sh | 49 +-- ...gional_run_met_gridstat_or_pointstat_vx.sh | 53 +-- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 51 +-- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 47 +-- scripts/exregional_run_met_pb2nc_obs.sh | 1 - scripts/exregional_run_met_pcpcombine.sh | 86 +++-- ush/{set_vx_fhr_list.sh => set_leadhrs.sh} | 301 ++++++------------ 16 files changed, 301 insertions(+), 334 deletions(-) rename ush/{set_vx_fhr_list.sh => set_leadhrs.sh} (50%) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 2caeda1521..3759d5d8a1 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GenEnsProd.conf b/parm/metplus/GenEnsProd.conf index 6c47cedb0d..17005ecd1a 100644 --- a/parm/metplus/GenEnsProd.conf +++ b/parm/metplus/GenEnsProd.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 6bbc20e3f8..0cfaa707bf 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index a43b8ed340..6c34eb6ba0 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 7bd0039ab5..865f1c8d14 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 5bdd09c761..de99871bed 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -35,7 +35,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PointStat_ensmean.conf b/parm/metplus/PointStat_ensmean.conf index b16a481dbd..8637a7501d 100644 --- a/parm/metplus/PointStat_ensmean.conf +++ b/parm/metplus/PointStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PointStat_ensprob.conf b/parm/metplus/PointStat_ensprob.conf index 84b9f3954d..885ba121be 100644 --- a/parm/metplus/PointStat_ensprob.conf +++ b/parm/metplus/PointStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 320311cc94..433aba1e4e 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -56,7 +56,7 @@ done # #----------------------------------------------------------------------- # -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -104,38 +104,33 @@ user-staged. #----------------------------------------------------------------------- # i="0" -if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then +if [[ $(boolify "${DO_ENSEMBLE}") == "TRUE" ]]; then i=$( bc -l <<< "${ENSMEM_INDX}-1" ) fi time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # #----------------------------------------------------------------------- # -# Get the list of forecast hours for which there is a post-processed -# output file. Note that: -# -# 1) CDATE (in YYYYMMDDHH format) is already available via the call to -# the job_preamble.sh script in the j-job of this ex-script. -# 2) VAR is set to "APCP" and ACCUM_HH is set to "01" because we assume -# the output files are hourly, so these settings will result in the -# function set_vx_fhr_list checking for existence of hourly post output -# files. +# Check to ensure that all the expected post-processed forecast output +# files are present on disk. This is done by the set_leadhrs function +# below. Note that CDATE (in YYYYMMDDHH format) is already available via +# the call to the job_preamble.sh script in the j-job of this ex-script. # #----------------------------------------------------------------------- # ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="0" \ + lhr_max="${FCST_LEN_HRS}" \ + lhr_intvl="${FCST_OUTPUT_INTVL_HRS}" \ base_dir="${VX_FCST_INPUT_BASEDIR}" \ fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="FHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 583178d3ad..73d98754b4 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -220,23 +220,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + if [ "${MetplusToolName}" = "GenEnsProd" ]; then - set_vx_fhr_list_no_missing \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - outvarname_fhr_list_no_missing="FHR_LIST" + set_leadhrs_no_missing \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ + outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" elif [ "${MetplusToolName}" = "EnsembleStat" ]; then - set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" fi # #----------------------------------------------------------------------- @@ -271,15 +282,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -342,7 +353,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index dd3fcd495b..263d22053f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -95,10 +95,12 @@ FIELDNAME_IN_FCST_INPUT="" FIELDNAME_IN_MET_OUTPUT="" FIELDNAME_IN_MET_FILEDIR_NAMES="" +# Note that ACCUM_HH will not be defined for the REFC, RETOP, ADPSFC, and +# ADPUPA field groups. set_vx_params \ obtype="${OBTYPE}" \ field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + accum_hh="${ACCUM_HH:-}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ @@ -173,8 +175,8 @@ if [ "${grid_or_point}" = "grid" ]; then FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; @@ -209,23 +211,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -259,15 +272,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -330,7 +343,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 5fdafb20d1..5ad0560f28 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -127,8 +127,8 @@ if [ "${grid_or_point}" = "grid" ]; then OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" @@ -157,23 +157,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -216,15 +227,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -287,7 +298,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 201e67ccf1..9a8c35d1cb 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -156,23 +156,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -215,15 +226,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -286,7 +297,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 494ce74a3d..fbf3ec1689 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -21,7 +21,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 3d4d0cb9fb..97d156aa62 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -190,45 +190,76 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data (if combining -# observed APCP) or forecast data (if combining forecast APCP). +# Set the array of lead hours for which to run the MET/METplus tool. +# +#----------------------------------------------------------------------- +# +vx_intvl="$((10#${ACCUM_HH}))" +set_leadhrs_no_missing \ + lhr_min="${vx_intvl}" \ + lhr_max="${FCST_LEN_HRS}" \ + lhr_intvl="${vx_intvl}" \ + outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" +# +#----------------------------------------------------------------------- +# +# Check for the presence of files (either from observations or forecasts) +# needed to create required accumulation given by ACCUM_HH. # #----------------------------------------------------------------------- # if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl="${OBS_AVAIL_INTVL_HRS}" fi +num_missing_files_max="0" +input_accum_hh=$(printf "%02d" ${subintvl}) +# +# Convert the list of hours at which the PcpCombine tool will be run to +# an array. This represents the hours at which each accumulation period +# ends. Then use it to check the presence of all files requied to build +# the required accumulations from the sub-accumulations. +# +subintvl_end_hrs=($( echo ${VX_LEADHR_LIST} | $SED "s/,//g" )) +for hr_end in ${subintvl_end_hrs[@]}; do + hr_start=$((hr_end - vx_intvl + subintvl)) + print_info_msg " +Checking for the presence of files that will contribute to the ${vx_intvl}-hour +accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... +" + set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${hr_start}" \ + lhr_max="${hr_end}" \ + lhr_intvl="${subintvl}" \ + base_dir="${base_dir}" \ + fn_template="${fn_template}" \ + num_missing_files_max="${num_missing_files_max}" \ + outvarname_lhrs_list="tmp" +done -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" +print_info_msg " +${MetplusToolName} will be run for the following lead hours (relative to ${CDATE}): + VX_LEADHR_LIST = ${VX_LEADHR_LIST} +" # #----------------------------------------------------------------------- # @@ -262,15 +293,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -330,7 +361,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -358,7 +389,8 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' diff --git a/ush/set_vx_fhr_list.sh b/ush/set_leadhrs.sh similarity index 50% rename from ush/set_vx_fhr_list.sh rename to ush/set_leadhrs.sh index 8101e927e5..aa3b4b338f 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_leadhrs.sh @@ -1,20 +1,21 @@ # #----------------------------------------------------------------------- # -# This file defines functions used to generate sets of forecast hours for +# This file defines functions used to generate sets of lead hours for # which verification will be performed. # #----------------------------------------------------------------------- # -function set_vx_fhr_list_no_missing() { +function set_leadhrs_no_missing() { # #----------------------------------------------------------------------- # -# This function sets the forecast hours for which verification will be -# performed under the assumption that that the data file (which may be -# a forecast output file or an observation file) for each hour is available -# (i.e. that there are no missing files). +# This function sets the lead hours (relative to some unspecified initial/ +# reference time) for which verification will be performed under the +# assumption that the data file (which may be a forecast output file or +# an observation file) for each hour is available (i.e. it assumes that +# there are no missing files). # #----------------------------------------------------------------------- # @@ -58,10 +59,10 @@ function set_vx_fhr_list_no_missing() { #----------------------------------------------------------------------- # local valid_args=( \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ - "outvarname_fhr_list_no_missing" \ + "lhr_min" \ + "lhr_max" \ + "lhr_intvl" \ + "outvarname_lhrs_list_no_missing" \ ) process_args valid_args "$@" # @@ -81,69 +82,21 @@ function set_vx_fhr_list_no_missing() { # #----------------------------------------------------------------------- # - local fhr_array \ - fhr_list \ - fhr_int \ - fhr_min \ - fhr_max -# -#----------------------------------------------------------------------- -# -# Create the array of forecast hours. -# -#----------------------------------------------------------------------- -# - case "${field}" in - "APCP") - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - ;; - "ASNOW") - if [ "${accum_hh}" = "24" ]; then - fhr_min="24" - fhr_int="12" - else - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - fi - ;; - "REFC") - fhr_min="00" - fhr_int="01" - ;; - "RETOP") - fhr_min="00" - fhr_int="01" - ;; - "ADPSFC") - fhr_min="00" - fhr_int="01" - ;; - "ADPUPA") - fhr_min="00" - fhr_int="06" - ;; - *) - print_err_msg_exit "\ -A method for setting verification parameters has not been specified for -this field (field): - field = \"${field}\"" - ;; - esac - fhr_max="${fcst_len_hrs}" - - fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) - - # Express the forecast hour array as a (scalar) string containing a comma - # (and space) separated list of the elements of fhr_array. - fhr_list=$( printf "%s, " "${fhr_array[@]}" ) - fhr_list=$( echo "${fhr_list}" | $SED "s/, $//g" ) + local lhrs_array \ + lhrs_list +# +#----------------------------------------------------------------------- +# +# Create the array of lead hours. +# +#----------------------------------------------------------------------- +# + lhrs_array=($( seq ${lhr_min} ${lhr_intvl} ${lhr_max} )) - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -(saved in a scalar string variable) is: - fhr_list = \"${fhr_list}\" -" + # Express the array of lead hours as a (scalar) string containing a comma + # (and space) separated list of the elements of lhrs_array. + lhrs_list=$( printf "%s, " "${lhrs_array[@]}" ) + lhrs_list=$( echo "${lhrs_list}" | $SED "s/, $//g" ) # #----------------------------------------------------------------------- # @@ -151,8 +104,8 @@ Initial (i.e. before filtering for missing files) set of forecast hours # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_fhr_list_no_missing}" ]; then - printf -v ${outvarname_fhr_list_no_missing} "%s" "${fhr_list}" + if [ ! -z "${outvarname_lhrs_list_no_missing}" ]; then + printf -v ${outvarname_lhrs_list_no_missing} "%s" "${lhrs_list}" fi # #----------------------------------------------------------------------- @@ -166,22 +119,18 @@ Initial (i.e. before filtering for missing files) set of forecast hours } - # #----------------------------------------------------------------------- # -# This function generates a list of forecast hours such that for each -# such hour, there exists a corresponding data file with a name of the -# form specified by the template fn_template. Depending on fn_template, -# this file may contain forecast or observation data. This function -# generates this forecast hour list by first generating a set of hours -# under the assumption that there is a corresponding data file for each -# hour and then removing from that list any hour for which there is no -# data file. +# This function generates a list of lead hours (relative to an initial or +# reference time yyyymmddhh_init) such that for each such hour, there +# exists a corresponding data file with a name of the form specified by +# the template fn_template. Depending on fn_template, this file may +# contain forecast or observation data. # #----------------------------------------------------------------------- # -function set_vx_fhr_list() { +function set_leadhrs() { # #----------------------------------------------------------------------- # @@ -221,15 +170,14 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # local valid_args=( \ - "cdate" \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ + "yyyymmddhh_init" \ + "lhr_min" \ + "lhr_max" \ + "lhr_intvl" \ "base_dir" \ "fn_template" \ - "check_accum_contrib_files" \ "num_missing_files_max" \ - "outvarname_fhr_list" \ + "outvarname_lhrs_list" \ ) process_args valid_args "$@" # @@ -251,155 +199,102 @@ function set_vx_fhr_list() { # local crnt_tmpl \ crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_list \ fn \ fp \ i \ - num_fcst_hrs \ + lhr \ + lhrs_array \ + lhrs_list \ + num_hrs \ num_missing_files \ - regex_search_tmpl \ remainder \ - skip_this_fhr + skip_this_hour # #----------------------------------------------------------------------- # -# For the specified field, generate the set of forecast hours at which +# For the specified field, generate the set of lead hours at which # verification will be performed under the assumption that for each such -# hour, the corresponding forecast and/or observation files exists. Thus, -# this set of forecast hours is an initial guess for the hours at which -# vx will be performed. +# hour, the corresponding or observation file exists. Thus, this set is +# an initial guess for the lead hours at which vx will be performed. # #----------------------------------------------------------------------- # - set_vx_fhr_list_no_missing \ - fcst_len_hrs="${fcst_len_hrs}" \ - field="${field}" \ - accum_hh="${accum_hh}" \ - outvarname_fhr_list_no_missing="fhr_list_no_missing" + set_leadhrs_no_missing \ + lhr_min="${lhr_min}" \ + lhr_max="${lhr_max}" \ + lhr_intvl="${lhr_intvl}" \ + outvarname_lhrs_list_no_missing="lhrs_list_no_missing" - # For convenience, save the scalar variable fhr_list_no_missing to a bash - # array. - fhr_array=$( printf "%s" "${fhr_list_no_missing}" | $SED "s/,//g" ) - fhr_array=( ${fhr_array} ) + # For convenience, save the scalar variable lhrs_list_no_missing to a + # bash array. + lhrs_array=($( printf "%s" "${lhrs_list_no_missing}" | $SED "s/,//g" )) print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -is: - fhr_array = ( $( printf "\"%s\" " "${fhr_array[@]}" )) +Initial (i.e. before filtering for missing files) set of lead hours +(relative to ${yyyymmddhh_init}) is: + lhrs_array = ( $( printf "\"%s\" " "${lhrs_array[@]}" )) " # #----------------------------------------------------------------------- # -# Loop through all forecast hours. For each one for which a corresponding -# file exists, add the forecast hour to fhr_list. fhr_list will be a -# scalar containing a comma-separated list of forecast hours for which -# corresponding files exist. Also, use the variable num_missing_files -# to keep track of the number of files that are missing. +# Loop through the array of lead hours generated above and construct the +# variable lhrs_list that will be scalar (string) containing a comma- +# separated list of hours for which corresponding forecast or observation +# files have been confirmed to exist. Also, use the variable +# num_missing_files to keep track of the number of files that are missing. # #----------------------------------------------------------------------- # - fhr_list="" + lhrs_list="" num_missing_files="0" - num_fcst_hrs=${#fhr_array[@]} - for (( i=0; i<${num_fcst_hrs}; i++ )); do - - fhr_orig="${fhr_array[$i]}" + num_hrs=${#lhrs_array[@]} + for (( i=0; i<${num_hrs}; i++ )); do - if [ "${check_accum_contrib_files}" = "TRUE" ]; then - fhr=$(( ${fhr_orig} - ${accum_hh} + 1 )) - num_back_hrs=${accum_hh} - else - fhr=${fhr_orig} - num_back_hrs=1 - fi - - skip_this_fhr="FALSE" - for (( j=0; j<${num_back_hrs}; j++ )); do -# -# Use the provided template to set the name of/relative path to the file -# Note that the while-loop below is over all METplus time string templates -# of the form {...} in the template fn_template; it continues until all -# such templates have been evaluated to actual time strings. -# - fn="${fn_template}" - regex_search_tmpl="(.*)(\{.*\})(.*)" - crnt_tmpl=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - while [ ! -z "${crnt_tmpl}" ]; do - - eval_METplus_timestr_tmpl \ - init_time="$cdate" \ - fhr="$fhr" \ - METplus_timestr_tmpl="${crnt_tmpl}" \ - outvarname_formatted_time="actual_value" -# -# Replace METplus time templates in fn with actual times. Note that -# when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - fn=$( echo "${fn}" | \ - $SED -n -r "s|(.*)(${crnt_tmpl_esc})(.*)|\1${actual_value}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_tmpl=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done + lhr="${lhrs_array[$i]}" + skip_this_hour="FALSE" +# +# Evaluate the METplus file name template containing METplus timestrings +# for the specified yyyymmddhh_init and current hour (lhr) to obtain the +# name of the current file (including possibly a relative directory). +# + eval_METplus_timestr_tmpl \ + init_time="${yyyymmddhh_init}" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${fn_template}" \ + outvarname_evaluated_timestr="fn" # # Get the full path to the file and check if it exists. # - fp="${base_dir}/${fn}" - - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate): - fhr = \"$fhr\" - cdate = \"$cdate\" + fp="${base_dir}/${fn}" + if [ -f "${fp}" ]; then + print_info_msg "\ +Found file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}): fp = \"${fp}\" " - else - skip_this_fhr="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate) is missing: - fhr = \"$fhr\" - cdate = \"$cdate\" + else + skip_this_hour="TRUE" + num_missing_files=$(( ${num_missing_files} + 1 )) + print_info_msg "\ +The file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}) is MISSING: fp = \"${fp}\" -Excluding the current forecast hour from the list of hours passed to the -METplus configuration file. +Excluding this hour from the list of lead hours to return. " - break - fi - - fhr=$(( $fhr + 1 )) - - done + break + fi - if [ "${skip_this_fhr}" != "TRUE" ]; then - fhr_list="${fhr_list},${fhr_orig}" + if [[ ! $(boolify "${skip_this_hour}") == "TRUE" ]]; then + lhrs_list="${lhrs_list},${lhr}" fi done # -# Remove leading comma from fhr_list. +# Remove leading comma from lhrs_list. # - fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) + lhrs_list=$( echo "${lhrs_list}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours -(saved in a scalar string variable) is: - fhr_list = \"${fhr_list}\" +Final (i.e. after filtering for missing files) set of lead hours relative +to ${yyyymmddhh_init} (saved in a scalar string variable) is: + lhrs_list = \"${lhrs_list}\" " # #----------------------------------------------------------------------- @@ -424,8 +319,8 @@ maximum allowed number (num_missing_files_max): # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_fhr_list}" ]; then - printf -v ${outvarname_fhr_list} "%s" "${fhr_list}" + if [ ! -z "${outvarname_lhrs_list}" ]; then + printf -v ${outvarname_lhrs_list} "%s" "${lhrs_list}" fi # #----------------------------------------------------------------------- From da81dbb0680c7973a662a8dc51c520fbd182dfb0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 26 Sep 2024 14:46:19 -0600 Subject: [PATCH 081/131] Modify variable names for clarity. --- ush/set_cycle_and_obs_timeinfo.py | 64 +++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 36635b643e..9f9fbe9820 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -34,7 +34,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): while cdate <= start_time_last_cycl: cyc = datetime.strftime(cdate, "%Y%m%d%H") all_cdates.append(cyc) - cdate += cycl_intvl + cdate += cycl_intvl return all_cdates @@ -69,7 +69,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( Time interval between forecast output times; a timedelta object. Returns: - output_times_all_cycles_inst: + fcst_output_times_all_cycles_inst: List of forecast output times over all cycles of instantaneous fields. Each element is a string of the form 'YYYYMMDDHH'. @@ -78,7 +78,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( perform verification) over all cycles of instantaneous fields. Each element is a string of the form 'YYYYMMDD'. - output_times_all_cycles_cumul: + fcst_output_times_all_cycles_cumul: List of forecast output times over all cycles of cumulative fields. Each element is a string of the form 'YYYYMMDDHH'. @@ -98,34 +98,34 @@ def set_fcst_output_times_and_obs_days_all_cycles( cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] # Get the number of forecast output times per cycle/forecast. - num_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) # Initialize sets that will contain the various forecast output and obs # day information. - output_times_all_cycles_inst = set() + fcst_output_times_all_cycles_inst = set() obs_days_all_cycles_inst = set() - output_times_all_cycles_cumul = set() + fcst_output_times_all_cycles_cumul = set() obs_days_all_cycles_cumul = set() for i, start_time_crnt_cycle in enumerate(cycle_start_times): # Create a list of forecast output times of instantaneous fields for the # current cycle. - output_times_crnt_cycle_inst \ + fcst_output_times_crnt_cycle_inst \ = [start_time_crnt_cycle + i*fcst_output_intvl - for i in range(0,num_output_times_per_cycle)] - # Include the output times of instantaneous fields for the current cycle + for i in range(0,num_fcst_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle # in the set of all such output times over all cycles. - output_times_all_cycles_inst \ - = output_times_all_cycles_inst | set(output_times_crnt_cycle_inst) + fcst_output_times_all_cycles_inst \ + = fcst_output_times_all_cycles_inst | set(fcst_output_times_crnt_cycle_inst) # Create a list of instantaneous field obs days (i.e. days on which # observations of instantaneous fields are needed for verification) for # the current cycle. We do this by dropping the hour-of-day from each # element of the list of forecast output times and keeping only unique # elements. - tmp = [datetime_obj.date() for datetime_obj in output_times_crnt_cycle_inst] + tmp = [datetime_obj.date() for datetime_obj in fcst_output_times_crnt_cycle_inst] obs_days_crnt_cycl_inst = sorted(set(tmp)) - # Include the obs days for instantaneous fields for the current cycle + # Include the obs days for instantaneous fields for the current cycle # in the set of all such obs days over all cycles. obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) @@ -133,12 +133,12 @@ def set_fcst_output_times_and_obs_days_all_cycles( # current cycle. This is simply the list of forecast output times for # instantaneous fields but with the first time dropped (because nothing # has yet accumulated at the starting time of the cycle). - output_times_crnt_cycle_cumul = output_times_crnt_cycle_inst - output_times_crnt_cycle_cumul.pop(0) + fcst_output_times_crnt_cycle_cumul = fcst_output_times_crnt_cycle_inst + fcst_output_times_crnt_cycle_cumul.pop(0) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - output_times_all_cycles_cumul \ - = output_times_all_cycles_cumul | set(output_times_crnt_cycle_cumul) + fcst_output_times_all_cycles_cumul \ + = fcst_output_times_all_cycles_cumul | set(fcst_output_times_crnt_cycle_cumul) # Create a list of cumulative field obs days (i.e. days on which # observations of cumulative fields are needed for verification) for @@ -150,8 +150,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( # the scripts/tasks that get observations of cumulative fields, the # zeroth hour of a day is considered part of the previous day (because # it represents accumulation that occurred on the previous day). - tmp = output_times_crnt_cycle_cumul - last_output_time_cumul = output_times_crnt_cycle_cumul[-1] + tmp = fcst_output_times_crnt_cycle_cumul + last_output_time_cumul = fcst_output_times_crnt_cycle_cumul[-1] if last_output_time_cumul.hour == 0: tmp.pop() tmp = [datetime_obj.date() for datetime_obj in tmp] @@ -162,9 +162,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Convert the set of output times of instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDDHH'. - output_times_all_cycles_inst = sorted(output_times_all_cycles_inst) - output_times_all_cycles_inst = [datetime.strftime(output_times_all_cycles_inst[i], "%Y%m%d%H") - for i in range(len(output_times_all_cycles_inst))] + fcst_output_times_all_cycles_inst = sorted(fcst_output_times_all_cycles_inst) + fcst_output_times_all_cycles_inst = [datetime.strftime(fcst_output_times_all_cycles_inst[i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles_inst))] # Convert the set of obs days for instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDD'. @@ -174,9 +174,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Convert the set of output times of cumulative fields over all cycles to # a sorted list of strings of the form 'YYYYMMDDHH'. - output_times_all_cycles_cumul = sorted(output_times_all_cycles_cumul) - output_times_all_cycles_cumul = [datetime.strftime(output_times_all_cycles_cumul[i], "%Y%m%d%H") - for i in range(len(output_times_all_cycles_cumul))] + fcst_output_times_all_cycles_cumul = sorted(fcst_output_times_all_cycles_cumul) + fcst_output_times_all_cycles_cumul = [datetime.strftime(fcst_output_times_all_cycles_cumul[i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles_cumul))] # Convert the set of obs days for cumulative fields over all cycles to a # sorted list of strings of the form 'YYYYMMDD'. @@ -184,8 +184,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") for i in range(len(obs_days_all_cycles_cumul))] - return output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - output_times_all_cycles_cumul, obs_days_all_cycles_cumul + return fcst_output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + fcst_output_times_all_cycles_cumul, obs_days_all_cycles_cumul def set_cycledefs_for_obs_days(obs_days_all_cycles): @@ -195,17 +195,17 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): list of days must be increasing in time, but the days do not have to be consecutive, i.e. there may be gaps between days that are greater than one day. - + Each cycledef string in the output list represents a set of consecutive days in the input string (when used inside a tag in a ROCOTO - XML). Thus, when the cycledef strings in the output string are all + XML). Thus, when the cycledef strings in the output string are all assigned to the same cycledef group in a ROCOTO XML, that group will represent all the days on which observations are needed. Args: obs_days_all_cycles: A list of strings of the form 'YYYYMMDD', with each string representing - a day on which observations are needed. Note that the list must be + a day on which observations are needed. Note that the list must be sorted, i.e. the days must be increasing in time, but there may be gaps between days. @@ -218,7 +218,7 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): where {yyyymmdd_start} is the starting day of the first cycle in the cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note - that the minutes and hours in these cycledef stirngs are always set to + that the minutes and hours in these cycledef stirngs are always set to '00'). Thus, one of the elements of the output list may be as follows: '202404290000 202405010000 24:00:00' @@ -229,7 +229,7 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # list of datetime objects. tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] - # Initialize the variable that in the loop below contains the date of + # Initialize the variable that in the loop below contains the date of # the previous day. This is just the first element of the list of # datetime objects constructed above. Then use it to initialize the # list (contin_obs_day_lists) that will contain lists of consecutive From fb3e7f42fa992c47618bf6fc9865d90b1827762d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 16:36:15 -0600 Subject: [PATCH 082/131] Check for the form of accum_hh (accumulation) only if it's going to be used. --- ush/set_vx_params.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 9b67e36d22..267cd6902f 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -3,8 +3,9 @@ # # This file defines a function that sets various parameters needed when # performing verification. The way these parameters are set depends on -# the field being verified and, if the field is accumulated precipitation, -# the accumulation period (both of which are inputs to this function). +# the field being verified and, if the field is cumulative (e.g. +# accumulated precipitation or snowfall), the accumulation period +# (both of which are inputs to this function). # # As of 20220928, the verification tasks in the SRW App workflow use the # MET/METplus software (MET = Model Evaluation Tools) developed at the @@ -91,10 +92,14 @@ function set_vx_params() { # #----------------------------------------------------------------------- # - if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then - print_err_msg_exit "\ -The accumulation (accum_hh) must be a 2-digit integer: + if [ "${obtype}" = "CCPA" ] || [ "${obtype}" = "NOHRSC" ]; then + if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then + print_err_msg_exit "\ +For the given observation type (obtype), the accumulation (accum_hh) must +be a 2-digit integer: + obtype = \"${obtype}\" accum_hh = \"${accum_hh}\"" + fi fi # #----------------------------------------------------------------------- From 8f043fc749421191f029f95a284c9d32a156e9c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 16:37:46 -0600 Subject: [PATCH 083/131] Changes that allow checks on the verification parameters and generation of new vx config parameters to allow the workflow to have obs-day-based as well as cycle-based tasks. --- ush/set_cycle_and_obs_timeinfo.py | 644 +++++++++++++++++++++++++----- ush/setup.py | 207 ++++++---- 2 files changed, 673 insertions(+), 178 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9f9fbe9820..8f45e60f2a 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -2,14 +2,20 @@ from datetime import datetime, timedelta, date from pprint import pprint +from textwrap import dedent from python_utils import print_input_args, print_err_msg_exit +import logging -def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): - """This file defines a function that, given the start and end dates - as date time objects, and a cycling frequency, returns an array of - cycle date-hours whose elements have the form YYYYMMDDHH. Here, - YYYY is a four-digit year, MM is a two- digit month, DD is a - two-digit day of the month, and HH is a two-digit hour of the day. + +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='string'): + """ + This file defines a function that returns a list containing the starting + times of all the cycles in the experiment. + + If return_type is set to "string" (the default value), the returned list + contains strings in the format 'YYYYMMDDHH'. If it is set to "datetime", + the returned list contains a set of datetime objects. Args: start_time_first_cycl: @@ -21,38 +27,85 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): cycl_intvl: Time interval between cycle starting times; a timedelta object. + return_type: + String that specifies the type of the returned list. + Returns: - A list of strings containing cycle starting times in the format - 'YYYYMMDDHH' + all_cdates: + Either a list of strings in the format 'YYYYMMDDHH' or a list of datetime + objects containing the cycle starting times. """ print_input_args(locals()) + + valid_values = ['string', 'datetime'] + if return_type not in valid_values: + raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") + msg = dedent(f""" + Invalid value for optional argument "return_type": + return_type = {return_type} + Valid values are: + valid_values = {valid_values} + """) + raise Exception(msg) # iterate over cycles all_cdates = [] cdate = start_time_first_cycl while cdate <= start_time_last_cycl: - cyc = datetime.strftime(cdate, "%Y%m%d%H") - all_cdates.append(cyc) + all_cdates.append(cdate) cdate += cycl_intvl + + if return_type == "string": + all_cdates = [datetime.strftime(cdate, "%Y%m%d%H") for cdate in all_cdates] + return all_cdates -def set_fcst_output_times_and_obs_days_all_cycles( - start_time_first_cycl, start_time_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl): - """Given the starting time of the first cycle of an SRW App experiment, the - starting time of the last cycle, the interval between cycle start times, - the forecast length, and the forecast output interval, this function - returns two pairs of lists: the first of each pair is a list of strings - of forecast output times over all cycles (each element of the form - 'YYYYMMDDHH'), and the second is a list of days over all cycles on which - observations are needed to perform verification (each element of the form - 'YYYYMMDD'). The first pair of lists is for instantaneous output fields - (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. - APCP or accumulated precipitation). The accumulation period for the latter - is the forecast output interval. +def check_temporal_consistency_cumul_fields( + vx_config, + start_time_first_cycl, start_time_last_cycl, cycl_intvl, + fcst_len, fcst_output_intvl): + """ + This function reads in a subset of the parameters in the verification + configuration dictionary and ensures that certain temporal constraints on + these parameters are satisfied. It then returns an updated version of + the verification configuration dictionary that satisfies these constranints. + + The constraints are on the accumulation intervals associated with the + cumulative forecast fields and corresponding observation type pairs that + are to be verified. The constraints on each such accumulation interval + are as follows: + + 1) The accumulation interval is less than or equal to the forecast length + (since otherwise, the forecast field cannot be accumulated over that + interval). + + 2) The obs availability interval evenly divides the accumulation interval. + This ensures that the obs can be added together to obtain accumulated + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added + to obtain 24-hour observed snowfall accumulations. + + 3) The forecast output interval evenly divides the accumulation interval. + This ensures that the forecast output can be added together to obtain + accumulated values of the forecast field, e.g. if the forecast output + interval is 3 hours, the resulting 3-hourly APCP outputs from the + forecast can be added to obtain 6-hourly forecast APCP. + + 4) The hour-of-day at which the accumulated forecast values will be + available are a subset of the ones at which the accumulated obs + values are available. This ensures that the accumulated fields + from the obs and forecast are valid at the same times and thus can + be compared in the verification. + + If for a given field-accumulation combination any of these constraints + is violated, that accumulation is removed from the list of accumulations + to verify for that field. Args: + vx_config: + The verification configuration dictionary. + start_time_first_cycl: Starting time of first cycle; a datetime object. @@ -69,43 +122,295 @@ def set_fcst_output_times_and_obs_days_all_cycles( Time interval between forecast output times; a timedelta object. Returns: - fcst_output_times_all_cycles_inst: - List of forecast output times over all cycles of instantaneous fields. - Each element is a string of the form 'YYYYMMDDHH'. + vx_config: + An updated version of the verification configuration dictionary. + + fcst_obs_matched_times_all_cycles_cumul: + Dictionary containing the times (in YYYYMMDDHH string format) at + which various field/accumlation combinations are output and at + which the corresponding obs type is also available. + """ + # Set dictionary containing all cumulative fields (i.e. whether or not + # they are to be verified). The keys are the observation types and the + # values are the field names in the forecasts. + vx_cumul_fields_all = {"CCPA": "APCP", "NOHRSC": "ASNOW"} + + # Convert from datetime.timedelta objects to integers. + one_hour = timedelta(hours=1) + fcst_len_hrs = int(fcst_len/one_hour) + fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) + + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + cycle_start_times \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='datetime') + + # Initialize one of the variables that will be returned to an empty + # dictionary. + fcst_obs_matched_times_all_cycles_cumul = dict() + + for obtype, field_fcst in vx_cumul_fields_all.items(): + + # If the current cumulative field is not in the list of fields to be + # verified, just skip to the next field. + if field_fcst not in vx_config["VX_FIELDS"]: + continue + + # Initialize a sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul.update({field_fcst: {}}) + + # + # Get the availability interval of the current observation type from the + # verification configuration dictionary and use it to calculate the hours- + # of-day at which the obs will be available. + # + # Get the obs availability interval. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + # Ensure that the obs availability interval evenly divides into 24. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + raise Exception(msg) + # Assume that the obs are available at hour 0 of the day regardless + # of obs type. + obs_avail_hr_start = 0 + obs_avail_hr_end = obs_avail_hr_start + 24 + # Construct list of obs availability hours-of-day. + obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] + obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] + # + # Get the array of accumulation intervals for the current cumulative field. + # Then loop over them to ensure that the constraints listed above are + # satisfied. If for a given accumulation one or more of the constraints + # is not satisfied, remove that accumulation from the list of accumulations + # for the current field. + # + accum_intvls_array_name = "".join(["VX_", field_fcst, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + # + # Loop through the accumulation intervals and check the temporal constraints + # listed above. + # + for accum_hrs in accum_intvls_hrs.copy(): + + accum_hh = f"{accum_hrs:02d}" + # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] + # + # Check that accumulation inervals are shorter than the forecast length. + # + if accum_hrs > fcst_len_hrs: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is greater + than the forecast length (fcst_len_hrs): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} + fcst_len_hrs = {fcst_len_hrs} + Thus, this forecast field cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that accumulation inervals are evenly divisible by the observation + # availability interval. + # + if accum_hrs in accum_intvls_hrs: + rem_obs = accum_hrs % obs_avail_intvl_hrs + if rem_obs != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is not + evenly divisible by the observation type's availability interval + (obs_avail_intvl_hrs): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + accum_hrs % obs_avail_intvl_hrs = {rem_obs} + Thus, this observation type cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that accumulation inervals are evenly divisible by the forecast + # output interval. + # + if accum_hrs in accum_intvls_hrs: + rem_fcst = accum_hrs % fcst_output_intvl_hrs + if rem_fcst != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is not + evenly divisible by the forecast output interval (fcst_output_intvl): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} hr + fcst_output_intvl_hrs = {forecast_output_intvl} hr + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, this forecast field cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that the hours-of-day at which the current cumulative field will + # be output are a subset of the hours-of-day at which the corresponding + # obs type is output. + # + if accum_hrs in accum_intvls_hrs: + + # Initialize sets that will contain the forecast output times of the + # current cumulative field over all cycles. + fcst_output_times_all_cycles = set() + + # Calculate the forecast output times of the current cumulative field + # for the current cycle and include them in the the set of such times + # over all cycles. + accum = timedelta(hours=accum_hrs) + num_fcst_output_times_per_cycle = int(fcst_len/accum) + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + fcst_output_times_crnt_cycle \ + = [start_time_crnt_cycle + (i+1)*accum + for i in range(0, num_fcst_output_times_per_cycle)] + fcst_output_times_all_cycles \ + = fcst_output_times_all_cycles | set(fcst_output_times_crnt_cycle) + + # Get all the hours-of-day at which the current cumulative field will be + # output by the forecast. + fcst_output_times_all_cycles = sorted(fcst_output_times_all_cycles) + fcst_output_times_all_cycles_str \ + = [datetime.strftime(dt_object, "%Y%m%d%H") + for dt_object in fcst_output_times_all_cycles] + fcst_output_hrs_of_day_str = [yyyymmddhh[8:10] for yyyymmddhh in fcst_output_times_all_cycles_str] + fcst_output_hrs_of_day_str.sort() + + # Check that all the forecast output hours-of-day are a subset of the obs + # availability hours-of-day. If not, remove the current accumulation + # interval from the list of intervals to verify. + if not set(fcst_output_hrs_of_day_str) <= set(obs_avail_hrs_of_day_str): + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) is such that the forecast will output the field on at + least one of hour-of-day on which the corresponding observation type is + not available: + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} hr + The forecast output hours-of-day for this field/accumulation interval + combination are: + fcst_output_hrs_of_day_str = {fcst_output_hrs_of_day_str} + The hours-of-day at which the obs are available are: + obs_avail_hrs_of_day_str = {obs_avail_hrs_of_day_str} + Thus, at least some of the forecast output cannot be verified. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + else: + fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = fcst_output_times_all_cycles_str + # + # Update the value in the experiment configuration dictionary of the list + # of accumulation intervals to verify for this cumulative field (since + # some accumulation intervals may have been removed after the checks above). + # + vx_config[accum_intvls_array_name] = accum_intvls_hrs + # + # If the updated list of accumulations for the current cumulative field + # is empty, remove the field from the list of fields to verify in the + # verification configuration dictionary. + # + if not accum_intvls_hrs: + vx_config["VX_FIELDS"].remove(field_fcst) + msg = dedent(f""" + The list of accumulation intervals (accum_intvls_hrs) for the current + cumulative field to verify (field_fcst) is empty: + field_fcst = {field_fcst} + accum_intvls_hrs = {accum_intvls_hrs} + Removing this field from the list of fields to verify. The updated list + is: + {vx_config["VX_FIELDS"]} + """) + logging.info(msg) + + return vx_config, fcst_obs_matched_times_all_cycles_cumul + + +def set_fcst_output_times_and_obs_days_all_cycles( + start_time_first_cycl, start_time_last_cycl, cycl_intvl, + fcst_len, fcst_output_intvl): + """ + This function returns forecast output times and observation days (i.e. + days on which obs are needed because there is forecast output on those + days) for both instantaneous (e.g. REFC, RETOP, T2m) and cumulative (e.g. + APCP) fields that need to be verified. Note that for cumulative fields, + the only accumulation interval considered is the forecast output interval. + Accumulation intervals larger than this are considered elsewhere (and + accumulation interval smaller than this are obviously not allowed). + + Args: + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. - obs_days_all_cycles_inst: - List of observation days (i.e. days on which observations are needed to - perform verification) over all cycles of instantaneous fields. Each - element is a string of the form 'YYYYMMDD'. + fcst_len: + The length of each forecast; a timedelta object. - fcst_output_times_all_cycles_cumul: - List of forecast output times over all cycles of cumulative fields. Each - element is a string of the form 'YYYYMMDDHH'. + fcst_output_intvl: + Time interval between forecast output times; a timedelta object. - obs_days_all_cycles_cumul: - List of observation days (i.e. days on which observations are needed to - perform verification) over all cycles of cumulative fields. Each element - is a string of the form 'YYYYMMDD'. + Returns: + fcst_output_times_all_cycles: + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + obs_days_all_cycles: + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. """ # Get the list containing the starting times of the cycles. Each element - # of the list is a string of the form 'YYYYMMDDHH'. - cycle_start_times_str \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl) - - # Convert cycle_start_times_str to a list of datetime objects. - cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] + # of the list will be a datetime object. + cycle_start_times \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='datetime') # Get the number of forecast output times per cycle/forecast. num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) - # Initialize sets that will contain the various forecast output and obs - # day information. - fcst_output_times_all_cycles_inst = set() - obs_days_all_cycles_inst = set() - fcst_output_times_all_cycles_cumul = set() - obs_days_all_cycles_cumul = set() + # Initialize dictionaries that will contain the various forecast output + # time and obs day information. Note that we initialize the contents of + # these dictionaries as sets because that better suites the data manipulation + # we will need to do, but these sets will later be converted to lists. + fcst_output_times_all_cycles = dict() + fcst_output_times_all_cycles['inst'] = set() + fcst_output_times_all_cycles['cumul'] = set() + obs_days_all_cycles = dict() + obs_days_all_cycles['inst'] = set() + obs_days_all_cycles['cumul'] = set() for i, start_time_crnt_cycle in enumerate(cycle_start_times): # Create a list of forecast output times of instantaneous fields for the @@ -115,8 +420,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( for i in range(0,num_fcst_output_times_per_cycle)] # Include the output times of instantaneous fields for the current cycle # in the set of all such output times over all cycles. - fcst_output_times_all_cycles_inst \ - = fcst_output_times_all_cycles_inst | set(fcst_output_times_crnt_cycle_inst) + fcst_output_times_all_cycles['inst'] \ + = fcst_output_times_all_cycles['inst'] | set(fcst_output_times_crnt_cycle_inst) # Create a list of instantaneous field obs days (i.e. days on which # observations of instantaneous fields are needed for verification) for @@ -127,7 +432,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_crnt_cycl_inst = sorted(set(tmp)) # Include the obs days for instantaneous fields for the current cycle # in the set of all such obs days over all cycles. - obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) + obs_days_all_cycles['inst'] = obs_days_all_cycles['inst'] | set(obs_days_crnt_cycl_inst) # Create a list of forecast output times of cumulative fields for the # current cycle. This is simply the list of forecast output times for @@ -137,8 +442,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( fcst_output_times_crnt_cycle_cumul.pop(0) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - fcst_output_times_all_cycles_cumul \ - = fcst_output_times_all_cycles_cumul | set(fcst_output_times_crnt_cycle_cumul) + fcst_output_times_all_cycles['cumul'] \ + = fcst_output_times_all_cycles['cumul'] | set(fcst_output_times_crnt_cycle_cumul) # Create a list of cumulative field obs days (i.e. days on which # observations of cumulative fields are needed for verification) for @@ -158,49 +463,57 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_crnt_cycl_cumul = sorted(set(tmp)) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - obs_days_all_cycles_cumul = obs_days_all_cycles_cumul | set(obs_days_crnt_cycl_cumul) + obs_days_all_cycles['cumul'] = obs_days_all_cycles['cumul'] | set(obs_days_crnt_cycl_cumul) # Convert the set of output times of instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDDHH'. - fcst_output_times_all_cycles_inst = sorted(fcst_output_times_all_cycles_inst) - fcst_output_times_all_cycles_inst = [datetime.strftime(fcst_output_times_all_cycles_inst[i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles_inst))] + fcst_output_times_all_cycles['inst'] = sorted(fcst_output_times_all_cycles['inst']) + fcst_output_times_all_cycles['inst'] \ + = [datetime.strftime(fcst_output_times_all_cycles['inst'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['inst']))] # Convert the set of obs days for instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDD'. - obs_days_all_cycles_inst = sorted(obs_days_all_cycles_inst) - obs_days_all_cycles_inst = [datetime.strftime(obs_days_all_cycles_inst[i], "%Y%m%d") - for i in range(len(obs_days_all_cycles_inst))] + obs_days_all_cycles['inst'] = sorted(obs_days_all_cycles['inst']) + obs_days_all_cycles['inst'] \ + = [datetime.strftime(obs_days_all_cycles['inst'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['inst']))] # Convert the set of output times of cumulative fields over all cycles to # a sorted list of strings of the form 'YYYYMMDDHH'. - fcst_output_times_all_cycles_cumul = sorted(fcst_output_times_all_cycles_cumul) - fcst_output_times_all_cycles_cumul = [datetime.strftime(fcst_output_times_all_cycles_cumul[i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles_cumul))] + fcst_output_times_all_cycles['cumul'] = sorted(fcst_output_times_all_cycles['cumul']) + fcst_output_times_all_cycles['cumul'] \ + = [datetime.strftime(fcst_output_times_all_cycles['cumul'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['cumul']))] # Convert the set of obs days for cumulative fields over all cycles to a # sorted list of strings of the form 'YYYYMMDD'. - obs_days_all_cycles_cumul = sorted(obs_days_all_cycles_cumul) - obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") - for i in range(len(obs_days_all_cycles_cumul))] + obs_days_all_cycles['cumul'] = sorted(obs_days_all_cycles['cumul']) + obs_days_all_cycles['cumul'] \ + = [datetime.strftime(obs_days_all_cycles['cumul'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['cumul']))] - return fcst_output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - fcst_output_times_all_cycles_cumul, obs_days_all_cycles_cumul + return fcst_output_times_all_cycles, obs_days_all_cycles -def set_cycledefs_for_obs_days(obs_days_all_cycles): - """Given a list of days on which obs are needed, this function generates a - list of ROCOTO-style cycledef strings that together span the days (over - all cycles of an SRW App experiment) on which obs are needed. The input - list of days must be increasing in time, but the days do not have to be - consecutive, i.e. there may be gaps between days that are greater than - one day. +def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): + """ + Given a list of days on which observations are needed (because there is + forecast output on those days), this function generates a list of ROCOTO- + style cycledef strings that together span the days (over all cycles of an + SRW App experiment) on which obs are needed. The input list of days must + be increasing in time, but the days do not have to be consecutive, i.e. + there may be gaps between days that are greater than one day. Each cycledef string in the output list represents a set of consecutive days in the input string (when used inside a tag in a ROCOTO XML). Thus, when the cycledef strings in the output string are all assigned to the same cycledef group in a ROCOTO XML, that group will - represent all the days on which observations are needed. + represent all the days on which observations are needed. This allows + the ROCOTO workflow to define a single set of non-consecutive days on + which obs are needed and define tasks (e.g. get_obs) only for those + days, thereby avoiding the redundant creation of these tasks for any + in-between days on which obs are not needed. Args: obs_days_all_cycles: @@ -210,16 +523,16 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): gaps between days. Returns: - cycledef_all_obs_days: + cycledefs_all_obs_days: A list of strings, with each string being a ROCOTO-style cycledef of the form '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' where {yyyymmdd_start} is the starting day of the first cycle in the - cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note + cycledef and {yyyymmdd_end} is the starting day of the last cycle (note that the minutes and hours in these cycledef stirngs are always set to - '00'). Thus, one of the elements of the output list may be as follows: + '00'). For example, an element of the output list may be: '202404290000 202405010000 24:00:00' """ @@ -232,16 +545,17 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # Initialize the variable that in the loop below contains the date of # the previous day. This is just the first element of the list of # datetime objects constructed above. Then use it to initialize the - # list (contin_obs_day_lists) that will contain lists of consecutive + # list (consec_obs_days_lists) that will contain lists of consecutive # observation days. Thus, after its construction is complete, each - # element of contin_obs_day_lists will itself be a list containing - # datetime objects that are 24 hours apart. + # element of consec_obs_days_lists will itself be a list containing + # datetime objects that represent consecutive days (i.e. are guaranteed + # to be 24 hours apart). day_prev = tmp[0] - contin_obs_day_lists = list() - contin_obs_day_lists.append([day_prev]) + consec_obs_days_lists = list() + consec_obs_days_lists.append([day_prev]) # Remove the first element of the list of obs days since it has already - # been used initiliaze contin_obs_day_lists. + # been used initiliaze consec_obs_days_lists. tmp.pop(0) # Loop over the remaining list of obs days and construct the list of @@ -250,14 +564,14 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): for day_crnt in tmp: # If the current obs day comes 24 hours after the previous obs day, i.e. # if it is the next day of the previous obs day, append it to the last - # existing list in contin_obs_day_lists. + # existing list in consec_obs_days_lists. if day_crnt == day_prev + one_day: - contin_obs_day_lists[-1].append(day_crnt) + consec_obs_days_lists[-1].append(day_crnt) # If the current obs day is NOT the next day of the previous obs day, - # append a new element to contin_obs_day_lists and initialize it as a + # append a new element to consec_obs_days_lists and initialize it as a # list containing a single element -- the current obs day. else: - contin_obs_day_lists.append([day_crnt]) + consec_obs_days_lists.append([day_crnt]) # Update the value of the previous day in preparation for the next # iteration of the loop. day_prev = day_crnt @@ -267,13 +581,149 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # obs days when included in a tag in a ROCOTO XML. Each # string in this new list corresponds to a series of consecutive days on # which observations are needed (where by "consecutive" we mean no days - # are skipped), and there is at least a one day gap between each such + # are skipped), and there is at least a one-day gap between each such # series. These cycledefs together represent all the days (i.e. over all # cycles of the experiment) on which observations are needed. - cycledef_all_obs_days = list() - for contin_obs_day_list in contin_obs_day_lists: - cycledef_start = contin_obs_day_list[0].strftime('%Y%m%d%H%M') - cycledef_end = contin_obs_day_list[-1].strftime('%Y%m%d%H%M') - cycledef_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + cycledefs_all_obs_days = list() + for consec_obs_days_list in consec_obs_days_lists: + cycledef_start = consec_obs_days_list[0].strftime('%Y%m%d%H%M') + cycledef_end = consec_obs_days_list[-1].strftime('%Y%m%d%H%M') + cycledefs_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledefs_all_obs_days + + +def get_obs_retrieve_times_by_day( + vx_config, fcst_output_times_all_cycles, obs_days_all_cycles): + """ + This function generates dictionary of dictionaries that, for each + combination of obs type needed and each obs day, contains a string list + of the times at which that type of observation is needed on that day. + The elements of each list are formatted as 'YYYYMMDDHH'. + + Args: + vx_config: + The verification configuration dictionary. + + fcst_output_times_all_cycles: + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles: + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. + + Returns: + obs_retrieve_times_by_day: + Dictionary of dictionaries containing times at which each type of obs is + needed on each obs day. + """ + # Convert string contents of input dictionaries to datetime objects. + for time_type in ['cumul', 'inst']: + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] + + # Get list of forecast fields to be verified. + vx_fields = vx_config['VX_FIELDS'] + + # Define dictionary containing information about all fields that may + # possibly be verified. This information includes their temporal + # characteristics (cumulative vs. instantaneous) and the mapping between + # the observation type and the forecast field. + vx_field_info = {'cumul': [{'obtype': 'CCPA', 'fcst_fields': ['APCP']}, + {'obtype': 'NOHRSC', 'fcst_fields': ['ASNOW']}], + 'inst': [{'obtype': 'MRMS', 'fcst_fields': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] + } + + # Keep only those items in the dictionary above that have forecast fields + # that appear in the list of forecast fields to be verified. + for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): + for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): + obtype = obtypes_to_fcst_fields_dict['obtype'] + fcst_fields = obtypes_to_fcst_fields_dict['fcst_fields'] + fcst_fields = [field for field in fcst_fields if field in vx_fields] + obtypes_to_fcst_fields_dict['fcst_fields'] = fcst_fields + if not fcst_fields: obtypes_to_fcst_fields_dict_list.remove(obtypes_to_fcst_fields_dict) + if not obtypes_to_fcst_fields_dict_list: vx_field_info.pop(obs_time_type) + + # Create dictionary containing the temporal characteristics as keys and + # a string list of obs types to verify as the values. + obs_time_type_to_obtypes_dict = dict() + for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.items(): + obtype_list = [the_dict['obtype'] for the_dict in obtypes_to_fcst_fields_dict_list] + obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list + + # Initialize the return variable. + obs_retrieve_times_by_day = dict() + + # Define timedelta object representing a single day. + one_day = timedelta(days=1) - return cycledef_all_obs_days + # Loop over all obs types to be verified (by looping over the temporal + # type and the specific obs under that type). For each obs type, loop + # over each obs day and find the times within that that at which the obs + # need to be retrieved. + for obs_time_type, obtypes in obs_time_type_to_obtypes_dict.items(): + + fcst_output_times_all_cycles_crnt_ttype = fcst_output_times_all_cycles[obs_time_type] + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + + for obtype in obtypes: + + obs_retrieve_times_by_day[obtype] = dict() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. Then make sure it divides evenly + # into 24. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + raise Exception(msg) + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) + + # Loop over all obs days over all cycles (for the current obs type). For + # each such day, get the list forecast output times and the list of obs + # availability times. Finally, set the times (on that day) that obs need + # to be retrieved to the intersection of these two lists. + for obs_day in obs_days_all_cycles_crnt_ttype: + + next_day = obs_day + one_day + if obs_time_type == "cumul": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] + elif obs_time_type == "inst": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] + fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] + + if obs_time_type == "cumul": + obs_avail_times_crnt_day \ + = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + elif obs_time_type == "inst": + obs_avail_times_crnt_day \ + = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] + + obs_retrieve_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) + obs_retrieve_times_crnt_day.sort() + + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day + + return obs_retrieve_times_by_day diff --git a/ush/setup.py b/ush/setup.py index d6e9e5c2d0..8aaec0ef90 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -42,7 +42,9 @@ from set_cycle_and_obs_timeinfo import \ set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ - set_cycledefs_for_obs_days + set_rocoto_cycledefs_for_obs_days, \ + check_temporal_consistency_cumul_fields, \ + get_obs_retrieve_times_by_day from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid @@ -546,32 +548,106 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - # For vx fields that are accumulated, remove those accumulation hours - # that are longer than the forecast length. If that leaves the array - # of accumulation hours for that field empty, then remove the field - # from the list of fields to be verified. + # Set some variables needed for running checks on and creating new + # (derived) configuration variables for the verification. # # ----------------------------------------------------------------------- # - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fields_accum = ["APCP", "ASNOW"] - for field in vx_fields_accum: - if field in vx_fields_config: - accum_periods_array_name = "".join(["VX_", field, "_ACCUMS_HRS"]) - accum_periods = expt_config["verification"][accum_periods_array_name] - accum_periods = [accum for accum in accum_periods if (accum <= fcst_len_hrs)] - expt_config["verification"][accum_periods_array_name] = accum_periods - if not accum_periods: - vx_fields_config.remove(field) - - expt_config["verification"]["VX_FIELDS"] = vx_fields_config + + # Set the forecast output interval. Ideally, this should be obtained + # from the SRW App's configuration file, but such a variable doesn't + # yet exist in that file. + fcst_output_intvl_hrs = 1 + workflow_config['FCST_OUTPUT_INTVL_HRS'] = fcst_output_intvl_hrs + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + fcst_output_intvl_dt = datetime.timedelta(hours=fcst_output_intvl_hrs) + # + # ----------------------------------------------------------------------- + # + # Ensure that the configuration parameters associated with cumulative + # fields (e.g. APCP) in the verification section of the experiment + # dicitonary are temporally consistent, e.g. that accumulation intervals + # are less than or equal to the forecast length. Update the verification + # section of the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, + date_first_cycl, date_last_cycl, cycl_intvl_dt, + fcst_len_dt, fcst_output_intvl_dt) + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification because + # there is forecast output on those days) over all cycles, both for + # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones + # (e.g. APCP). Then add these lists to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + # + # ----------------------------------------------------------------------- + # + fcst_output_times_all_cycles, obs_days_all_cycles, \ + = set_fcst_output_times_and_obs_days_all_cycles( + date_first_cycl, date_last_cycl, cycl_intvl_dt, + fcst_len_dt, fcst_output_intvl_dt) + + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] + # + # ----------------------------------------------------------------------- + # + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. Then save the + # lists of cycledefs in the dictionary containing values needed to + # construct the ROCOTO XML. # # ----------------------------------------------------------------------- # - # Remove all verification [meta]tasks for which no fields are specified. + cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) + cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) + + rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst + rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul + # + # ----------------------------------------------------------------------- + # + # Generate dictionary of dictionaries that, for each combination of obs + # type needed and obs day, contains a string list of the times at which + # that type of observation is needed on that day. The elements of each + # list are formatted as 'YYYYMMDDHH'. This information is used by the + # day-based get_obs tasks in the workflow to get obs only at those times + # at which they are needed (as opposed to for the whole day). + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + obs_retrieve_times_by_day \ + = get_obs_retrieve_times_by_day( + vx_config, fcst_output_times_all_cycles, obs_days_all_cycles) + + for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): + for obs_day, obs_retrieve_times in obs_days_dict.items(): + array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) + vx_config[array_name] = obs_retrieve_times + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Remove all verification (meta)tasks for which no fields are specified. # # ----------------------------------------------------------------------- # @@ -579,7 +655,8 @@ def remove_tag(tasks, tag): vx_metatasks_all = {} vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["metatask_PcpCombine_obs", + vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_CCPA_all_accums", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -587,33 +664,38 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_NOHRSC_all_accums", "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", "metatask_GridStat_NOHRSC_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_NOHRSC", "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", + vx_metatasks_all["MRMS"] = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", "metatask_GenEnsProd_EnsembleStat_MRMS", "metatask_GridStat_MRMS_ensprob"] vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_run_MET_Pb2nc_obs", + vx_metatasks_all["NDAS"] = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", "metatask_PointStat_NDAS_all_mems", "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. - if not vx_fields_config: + vx_config = expt_config["verification"] + vx_fields = vx_config["VX_FIELDS"] + if not vx_fields: metatask = "metatask_check_post_output_all_mems" rocoto_config['tasks'].pop(metatask) # If for a given obstype no fields are specified, remove all vx metatasks # for that obstype. for obstype in vx_fields_all: - vx_fields_obstype = [field for field in vx_fields_config if field in vx_fields_all[obstype]] - if not vx_fields_obstype: + vx_fields_by_obstype = [field for field in vx_fields if field in vx_fields_all[obstype]] + if not vx_fields_by_obstype: for metatask in vx_metatasks_all[obstype]: if metatask in rocoto_config['tasks']: logging.info(dedent( @@ -627,6 +709,24 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + # + # ----------------------------------------------------------------------- + # + date_second_cycl = date_first_cycl + cycl_intvl_dt + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") + # + # ----------------------------------------------------------------------- + # # ICS and LBCS settings and validation # # ----------------------------------------------------------------------- @@ -775,61 +875,6 @@ def get_location(xcs, fmt, expt_cfg): run_envir = expt_config["user"].get("RUN_ENVIR", "") - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") - date_last_cycl = workflow_config.get("DATE_LAST_CYCL") - incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - - # Set the forecast output interval. Ideally, this should be obtained - # from the SRW App's configuration file, but such a variable doesn't - # yet exist in that file. - fcst_output_intvl_hrs = 1 - - # To enable arithmetic with dates and times, convert various time - # intervals from integer to datetime.timedelta objects. - cycl_intvl = datetime.timedelta(days=0, hours=incr_cycl_freq, minutes=0, seconds=0) - fcst_len = datetime.timedelta(days=0, hours=fcst_len_hrs, minutes=0, seconds=0) - fcst_output_intvl = datetime.timedelta(days=0, hours=fcst_output_intvl_hrs, minutes=0, seconds=0) - - # Generate a list of forecast output times and a list of obs days (i.e. - # days on which observations are needed to perform verification) over all - # cycles, both for instantaneous fields (e.g. T2m, REFC, RETOP) and for - # cumulative ones (e.g. APCP). - output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - output_times_all_cycles_cumul, obs_days_all_cycles_cumul \ - = set_fcst_output_times_and_obs_days_all_cycles( \ - date_first_cycl, date_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl) - - # Add the list generated above to the dictionary containing workflow - # configuration variables. These will be needed in generating the ROCOTO - # XML. - workflow_config['OUTPUT_TIMES_ALL_CYCLES_INST'] = output_times_all_cycles_inst - workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles_inst - workflow_config['OUTPUT_TIMES_ALL_CYCLES_CUMUL'] = output_times_all_cycles_cumul - workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles_cumul - - # Generate lists of ROCOTO cycledef strings corresonding to the obs days - # for instantaneous fields and those for cumulative ones. - cycledef_obs_days_inst = set_cycledefs_for_obs_days(obs_days_all_cycles_inst) - cycledef_obs_days_cumul = set_cycledefs_for_obs_days(obs_days_all_cycles_cumul) - # Save the lists of cycledefs in the dictionary containing values needed - # to construct the ROCOTO XML. - rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst - rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul - - # The "cycled_from_second" cycledef in the default workflow configuration - # file (default_workflow.yaml) requires the starting date of the second - # cycle. That is difficult to calculate in the yaml file itself because - # currently, there are no utilities to perform arithmetic with dates. - # Thus, we calculate it here and save it as a variable in the workflow - # configuration dictionary. Note that correct functioning of the default - # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all - # be strings, not datetime objects. We perform those conversions here. - date_second_cycl = date_first_cycl + cycl_intvl - workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") - workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") - workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") - # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") From ab1332d6b6a4d310de55ea8592727d5a94e672ff Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:40:19 -0600 Subject: [PATCH 084/131] Bug fixes. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 41428a7939..418e47e95e 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 2fae0d6388..913d5093bb 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -59,4 +59,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index e150234a47..a859a03ac8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index d8eb349433..563b8852a8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From d50b4a0ce7fe8eff7d3341b506b5b360636cd4db Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:47:24 -0600 Subject: [PATCH 085/131] Change name of cycledefs for obs days as was done in other files (python scripts) in previous commits. --- parm/wflow/verify_pre.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 80831f6f29..3ce65da55d 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: cycledef_obs_days_inst + cycledefs: cycledefs_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -24,7 +24,7 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre attrs: - cycledefs: cycledef_obs_days_cumul + cycledefs: cycledefs_obs_days_cumul maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: @@ -76,7 +76,7 @@ task_get_obs_ndas: task_run_MET_Pb2nc_obs: <<: *default_task_verify_pre attrs: - cycledefs: cycledef_obs_days_inst + cycledefs: cycledefs_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: From 5b5e71f22e7290e1695c9cb9f382487a22ad908a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:49:38 -0600 Subject: [PATCH 086/131] Increase walltime for PcpCombine_fcst tasks since some WE2E tests are running out of time. --- parm/wflow/verify_pre.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 3ce65da55d..102eb9dafa 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -246,7 +246,7 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: var: @@ -274,4 +274,4 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 From 2abd9df2df01426924de5641cf0b1cd7281035ee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:53:02 -0600 Subject: [PATCH 087/131] Remove unneeded environment variables from some tasks. --- parm/wflow/verify_det.yaml | 2 -- parm/wflow/verify_pre.yaml | 1 - 2 files changed, 3 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index c4f420f10c..47be6bb42b 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -102,7 +102,6 @@ metatask_GridStat_MRMS_all_mems: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' VAR: '#VAR#' - ACCUM_HH: '01' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" @@ -152,7 +151,6 @@ metatask_PointStat_NDAS_all_mems: VAR: '#VAR#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' - ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 102eb9dafa..6561954d8f 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -154,7 +154,6 @@ metatask_check_post_output_all_mems: envars: <<: *default_vars VAR: APCP - ACCUM_HH: '01' ENSMEM_INDX: '#mem#' dependency: # This "or" checks that the necessary stand-alone post tasks or forecast From 97f0a9c43a4e6a743c5a53d1d32aab352a7f2c90 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:58:52 -0600 Subject: [PATCH 088/131] Bug fix for PcpCombine path. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 7 +++++-- scripts/exregional_run_met_pcpcombine.sh | 9 +++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 263d22053f..aca2795018 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -144,6 +144,7 @@ ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -160,8 +161,10 @@ else # if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then slash_ensmem_subdir_or_null="/${ensmem_name}" + slash_obs_or_null="/obs" else slash_ensmem_subdir_or_null="" + slash_obs_or_null="" fi fi @@ -169,13 +172,13 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 97d156aa62..89d375b7c9 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -147,6 +147,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -169,7 +170,11 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then fi elif [ "${FCST_OR_OBS}" = "OBS" ]; then slash_cdate_or_null="/${CDATE}" - slash_ensmem_subdir_or_null="/obs" + if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then + slash_obs_or_null="/obs" + else + slash_obs_or_null="" + fi fi OBS_INPUT_DIR="" @@ -193,7 +198,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) From 1c20ad4f40962cd35f8f13af3654ed01b80a5649 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 28 Sep 2024 08:36:22 -0600 Subject: [PATCH 089/131] Modify metatask and task names for clarity; fix paths; add pcpcombine task for NOHRSC; fix task dependencies. --- ...C_OBS => JREGIONAL_RUN_MET_PB2NC_OBS_NDAS} | 2 +- parm/wflow/verify_det.yaml | 12 ++--- parm/wflow/verify_ens.yaml | 27 +++++----- parm/wflow/verify_pre.yaml | 50 +++++++++++++++++-- ...h => exregional_run_met_pb2nc_obs_ndas.sh} | 9 ++-- scripts/exregional_run_met_pcpcombine.sh | 17 +++---- ush/setup.py | 4 +- 7 files changed, 78 insertions(+), 43 deletions(-) rename jobs/{JREGIONAL_RUN_MET_PB2NC_OBS => JREGIONAL_RUN_MET_PB2NC_OBS_NDAS} (98%) rename scripts/{exregional_run_met_pb2nc_obs.sh => exregional_run_met_pb2nc_obs_ndas.sh} (98%) diff --git a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS similarity index 98% rename from jobs/JREGIONAL_RUN_MET_PB2NC_OBS rename to jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS index 89c9bb73f4..a6ed90a1a3 100755 --- a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS +++ b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS @@ -76,7 +76,7 @@ NDAS observations. # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_run_met_pb2nc_obs.sh || \ +$SCRIPTSdir/exregional_run_met_pb2nc_obs_ndas.sh || \ print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 47be6bb42b..a08fe69e3e 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -50,7 +50,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA taskdep_pcpcombine_fcst: attrs: task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# @@ -82,9 +82,9 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: walltime: 02:00:00 dependency: and: - taskdep_get_obs_nohrsc: + taskdep_pcpcombine_obs: attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC taskdep_pcpcombine_fcst: attrs: task: run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem# @@ -159,7 +159,7 @@ metatask_PointStat_NDAS_all_mems: walltime: 01:00:00 dependency: and: - datadep_all_pb2nc_obs_complete: + datadep_all_pb2nc_obs_ndas_complete: attrs: age: 00:00:00:30 # Check that the flag files that indicate that the Pb2NC tasks are @@ -170,11 +170,11 @@ metatask_PointStat_NDAS_all_mems: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 046849e126..f92aef4c60 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -48,9 +48,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_THRESH: 'none' dependency: and: - taskdep_pcpcombine_obs: &taskdep_pcpcombine_obs + taskdep_pcpcombine_obs_ccpa: &taskdep_pcpcombine_obs_ccpa attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -83,9 +83,9 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'none' dependency: and: - taskdep: + taskdep_pcpcombine_obs_nohrsc: &taskdep_pcpcombine_obs_nohrsc attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h @@ -169,7 +169,7 @@ metatask_GenEnsProd_EnsembleStat_NDAS: walltime: 01:00:00 dependency: and: - datadep_all_pb2nc_obs_complete: &all_pb2nc_obs_complete + datadep_all_pb2nc_obs_ndas_complete: &all_pb2nc_obs_ndas_complete attrs: age: 00:00:00:30 # Check that the flag files that indicate that the Pb2NC tasks are @@ -180,11 +180,11 @@ metatask_GenEnsProd_EnsembleStat_NDAS: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' @@ -213,8 +213,8 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: FCST_THRESH: 'all' dependency: and: - taskdep_pcpcombine_obs: - <<: *taskdep_pcpcombine_obs + taskdep_pcpcombine_obs_ccpa: + <<: *taskdep_pcpcombine_obs_ccpa taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -240,9 +240,8 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: FCST_THRESH: 'all' dependency: and: - taskdep: - attrs: - task: get_obs_nohrsc + taskdep_pcpcombine_obs_nohrsc: + <<: *taskdep_pcpcombine_obs_nohrsc taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h @@ -291,8 +290,8 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_THRESH: 'all' dependency: and: - datadep_all_pb2nc_obs_complete: - <<: *all_pb2nc_obs_complete + datadep_all_pb2nc_obs_ndas_complete: + <<: *all_pb2nc_obs_ndas_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 6561954d8f..220b029412 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -73,12 +73,12 @@ task_get_obs_ndas: partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' walltime: 02:00:00 -task_run_MET_Pb2nc_obs: +task_run_MET_Pb2nc_obs_NDAS: <<: *default_task_verify_pre attrs: cycledefs: cycledefs_obs_days_inst maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS"' envars: <<: *default_vars VAR: ADPSFC @@ -100,10 +100,10 @@ task_run_MET_Pb2nc_obs: attrs: task: get_obs_ndas -metatask_PcpCombine_obs: +metatask_PcpCombine_obs_APCP_all_accums_CCPA: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h: + task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -142,6 +142,48 @@ metatask_PcpCombine_obs: {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' +metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: + var: + ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' + task_run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC: + <<: *default_task_verify_pre + attrs: + cycledefs: forecast + maxtries: '2' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' + envars: + <<: *default_vars + VAR: ASNOW + ACCUM_HH: '#ACCUM_HH#' + FCST_OR_OBS: OBS + OBTYPE: NOHRSC + OBS_DIR: '&NOHRSC_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' + METPLUSTOOLNAME: 'PCPCOMBINE' + dependency: + and: + datadep: + text: "&NOHRSC_OBS_DIR;" + datadep_all_get_obs_nohrsc_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_nohrsc tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' + metatask_check_post_output_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh similarity index 98% rename from scripts/exregional_run_met_pb2nc_obs.sh rename to scripts/exregional_run_met_pb2nc_obs_ndas.sh index fbf3ec1689..01e0362cc3 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -154,8 +154,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do fp="${OBS_INPUT_DIR}/${fn}" if [[ -f "${fp}" ]]; then print_info_msg " -Found ${OBTYPE} obs file corresponding to observation retrieval time -(yyyymmddhh): +Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" " @@ -263,8 +262,8 @@ metplus_config_tmpl_fn="${MetplusToolName}_obs" # information, but we still include that info in the file name so that # the behavior in the two modes is as similar as possible. # -metplus_config_fn="${metplus_config_tmpl_fn}_${CDATE}" -metplus_log_fn="${metplus_config_fn}" +metplus_config_fn="${metplus_config_tmpl_fn}_NDAS_${CDATE}" +metplus_log_fn="${metplus_config_fn}_NDAS" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -379,7 +378,7 @@ METplus configuration file used is: #----------------------------------------------------------------------- # mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_${PDY}_complete.txt" +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_ndas_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 89d375b7c9..43da23ca2e 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -320,18 +320,13 @@ fi # First, set the base file names. # metplus_config_tmpl_fn="${MetplusToolName}" -metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${ENSMEM_INDX:+_${ensmem_name}}" -metplus_log_fn="${metplus_config_fn}_$CDATE" -# -# If operating on observation files, append the cycle date to the name -# of the configuration file because in this case, the output files from -# METplus are not placed under cycle directories (so another method is -# necessary to associate the configuration file with the cycle for which -# it is used). -# -if [ "${FCST_OR_OBS}" = "OBS" ]; then - metplus_config_fn="${metplus_log_fn}" +if [ "${FCST_OR_OBS}" = "FCST" ]; then + suffix="${ENSMEM_INDX:+_${ensmem_name}}" +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + suffix="_${OBTYPE}" fi +metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${suffix}" +metplus_log_fn="${metplus_config_fn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/ush/setup.py b/ush/setup.py index 8aaec0ef90..81e82cc9e6 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -656,7 +656,7 @@ def remove_tag(tasks, tag): vx_fields_all["CCPA"] = ["APCP"] vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_CCPA_all_accums", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -664,7 +664,7 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_NOHRSC_all_accums", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", "metatask_GridStat_NOHRSC_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_NOHRSC", From 057ba700fb72c75a45d3b921ecef5cb5c72179a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 29 Sep 2024 17:57:34 -0600 Subject: [PATCH 090/131] Make adjustments to ASNOW settings to account for the fact that for ASNOW obs, it's the netcdf files that come out of PcpCombine_obs tasks that are used for verification. --- parm/metplus/EnsembleStat.conf | 9 +-------- parm/metplus/GridStat_ensmean.conf | 9 +-------- parm/metplus/GridStat_ensprob.conf | 9 +-------- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 4 ++-- ...xregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 ++-- 5 files changed, 7 insertions(+), 28 deletions(-) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 3759d5d8a1..ce38b2d209 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -516,15 +516,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 0cfaa707bf..21d23ac4eb 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -400,15 +400,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index 6c34eb6ba0..abde89ef4b 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -354,15 +354,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 73d98754b4..9e1d0bd390 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -140,8 +140,8 @@ if [ "${grid_or_point}" = "grid" ]; then FCST_INPUT_DIR="${vx_output_basedir}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; "REFC") diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 9a8c35d1cb..6a8da3166d 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -127,8 +127,8 @@ if [ "${grid_or_point}" = "grid" ]; then OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" From abf2014db39ae292d29264c9e6d592708893e8e7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 1 Oct 2024 16:45:26 -0600 Subject: [PATCH 091/131] Clarify informational message. --- ush/mrms_pull_topofhour.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 310c5d97f9..32f511c393 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -43,7 +43,7 @@ def main(): valid = datetime.datetime(YYYY, MM, DD, HH, 0, 0) valid_str = valid.strftime("%Y%m%d") - print(f"Pulling {args.valid_time} MRMS data") + print(f"Pulling MRMS product {args.product} for valid time: {args.valid_time}") # Set up working directory From 8937a8c472987d4f2f010d1ca39d5c436d29ab2a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 1 Oct 2024 16:46:00 -0600 Subject: [PATCH 092/131] Comment out debugging lines to reduce clutter. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index ae4a1c7ebf..a4421958ee 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -62,7 +62,7 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - print_input_args "valid_args" +# print_input_args "valid_args" # #----------------------------------------------------------------------- # @@ -217,7 +217,7 @@ function eval_single_METplus_timefmt() { # #----------------------------------------------------------------------- # - print_input_args "valid_args" +# print_input_args "valid_args" # #----------------------------------------------------------------------- # From 841e141853d7cd9d19c7b79d5f306ae28ceb99f4 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:31:43 -0600 Subject: [PATCH 093/131] Ensure that the observation file name templates specified in the SRW App configuration file are used when running the Pb2nc task (as opposed to hard-coding file names). --- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 01e0362cc3..77cdb1221d 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -79,6 +79,11 @@ to convert NDAS prep buffer observation files to NetCDF format. #----------------------------------------------------------------------- # yyyymmdd_task=${PDY} + +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -150,8 +155,17 @@ num_missing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - fn="prepbufr.ndas.${yyyymmddhh}" - fp="${OBS_INPUT_DIR}/${fn}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp" + if [[ -f "${fp}" ]]; then print_info_msg " Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): From 2e6299ff3d2d8258b0318cd38059eba40358a8f1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:44:03 -0600 Subject: [PATCH 094/131] Add argument to mrms_pull_topofhour.py that specifies whether a subdirectory having the name of the valid day (of the form "YYYYMMDD") should be assumed to exist under the specified source directory, and whether such a subdirectory should be created under the specified output directory. Previously, such a subdirectory was always assumed to exist/created; now, it is an option. --- ush/mrms_pull_topofhour.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 32f511c393..3e5b5ddb6e 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -20,6 +20,8 @@ def main(): help='Name of MRMS product') parser.add_argument('-l', '--level', type=str, help='MRMS product level', choices=['_00.50_','_18_00.50_']) + parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, + help='Flag to add valid-date subdirectory to source and destination directories') parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') args = parser.parse_args() @@ -47,14 +49,18 @@ def main(): # Set up working directory - dest_dir = os.path.join(args.outdir, valid_str) + valid_str_or_empty = '' + if args.add_vdate_subdir: + valid_str_or_empty = valid_str + + dest_dir = os.path.join(args.outdir, valid_str_or_empty) if not os.path.exists(dest_dir): os.makedirs(dest_dir) # Sort list of files for each MRMS product if args.debug: print(f"Valid date: {valid_str}") - search_path = f"{args.source}/{valid_str}/{args.product}*.gz" + search_path = os.path.join(args.source, valid_str_or_empty, args.product + "*.gz") file_list = [f for f in glob.glob(search_path)] if args.debug: print(f"Files found: \n{file_list}") @@ -78,7 +84,7 @@ def main(): if difference.total_seconds() <= 900: filename1 = f"{args.product}{args.level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" filename2 = f"{args.product}{args.level}{valid.strftime('%Y%m%d-%H')}0000.grib2" - origfile = os.path.join(args.source, valid_str, filename1) + origfile = os.path.join(args.source, valid_str_or_empty, filename1) target = os.path.join(dest_dir, filename2) if args.debug: From 8eed4a267f4c619146166d9564ca71d6273ef9d6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:51:53 -0600 Subject: [PATCH 095/131] When retrieving files from different data stores (e.g. NOAA's HPSS), make sure that the get_obs tasks place the files (and name them) according to the file name templates specified for each obs type in the SRW App configuration file. Also, remove the variable basedir_proc since it is redundant. --- ush/get_obs_ccpa.sh | 114 +++++++++++++-------- ush/get_obs_mrms.sh | 196 ++++++++++++++++++++++++------------ ush/get_obs_ndas.sh | 227 ++++++++++++++++++++++++------------------ ush/get_obs_nohrsc.sh | 104 ++++++++++++------- 4 files changed, 406 insertions(+), 235 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index aabb55e5a4..d3c486c607 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -167,7 +167,7 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} - mod(24, CCPA_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % CCPA_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # Accumulation period to use when getting obs files. This is simply (a @@ -178,11 +178,10 @@ accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -231,16 +230,26 @@ else arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) fi -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -304,7 +313,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -317,11 +327,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# CCPA grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -333,10 +343,10 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the grib2 files retrieved from the current - # archive file. We refer to this as the "raw" archive directory because - # it will contain the files as they are in the archive before any processing - # by this script. + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" # Check whether any of the obs retrieval times for the day associated with @@ -378,12 +388,14 @@ The times at which obs need to be retrieved are: # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ccpa tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ccpa tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} # Pull obs from HPSS. This will get all the obs files in the current @@ -402,36 +414,56 @@ The times at which obs need to be retrieved are: print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed CCPA grib2 files. This usually consists of just - # moving or copying the raw files to their processed location, but for - # times between 20180718 and 20210504 and hours-of-day 19 through the - # end of the day (i.e. hour 0 of the next day), it involves using wgrib2 - # to correct an error in the metadata of the raw file and writing the - # corrected data to a new grib2 file in the processed location. - for hrs_ago in $(seq 5 -1 0); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For CCPA obs, for most dates this consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. However, for dates between + # 20180718 and 20210504 and hours-of-day 19 through the end of the day + # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + for hrs_ago in $(seq 5 -${CCPA_OBS_AVAIL_INTVL_HRS} 0); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed grib2 obs file from the raw one (by moving, copying, - # or otherwise) only if the time of the current file in the current archive + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" fp_raw="${arcv_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}/${yyyymmdd}" - mkdir -p ${day_dir_proc} - fn_proc="${fn_raw}" - fp_proc="${day_dir_proc}/${fn_proc}" - hh_noZero=$((10#${hh})) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. After the data is # pulled, reorganize into correct yyyymmdd structure. + hh_noZero=$((10#${hh})) if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s else ${mv_or_cp} ${fp_raw} ${fp_proc} fi + fi done diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index a0d0590667..71eae52b9c 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -49,6 +49,18 @@ set -u #----------------------------------------------------------------------- # +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % MRMS_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval MRMS_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + MRMS_OBS_AVAIL_INTVL_HRS = ${MRMS_OBS_AVAIL_INTVL_HRS} + 24 % MRMS_OBS_AVAIL_INTVL_HRS = ${remainder}" +fi + # Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an # environment variable created in the ROCOTO XML. It is a scalar variable # because there doesn't seem to be a way to pass a bash array from the @@ -59,14 +71,17 @@ mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) # corresponding to each. fields_in_filenames=() levels_in_filenames=() +obs_mrms_fp_templates=() for field in ${mrms_fields[@]}; do # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then fields_in_filenames+=("MergedReflectivityQCComposite") levels_in_filenames+=("00.50") + obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_REFC_FN_TEMPLATE}") elif [ "${field}" = "RETOP" ]; then fields_in_filenames+=("EchoTop") levels_in_filenames+=("18_00.50") + obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_RETOP_FN_TEMPLATE}") else print_err_msg_exit "\ Invalid field specified: @@ -79,11 +94,10 @@ done # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the MRMS -# grib2 files will appear after this script is done. We refer to this -# as the "processed" base directory because it contains the files after -# all processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -95,45 +109,44 @@ basedir_proc=${OBS_DIR} array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# If there are no observation retrieval times on the day of the current -# task, exit the script. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the observation retrieval times fall within the day associated -with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} -for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + for (( i=0; i<${num_mrms_fields}; i++ )); do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd}-${hh}0000.grib2" - fp_proc="${day_dir_proc}/${fn_proc}" + + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " File already exists on disk: fp_proc = \"${fp_proc}\"" else - break + print_info_msg " +File does not exist on disk: + fp_proc = \"${fp_proc}\" +Will attempt to retrieve all obs files." + break 2 fi done done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_obs_retrieve_times_crnt_day*num_mrms_fields)) -if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: @@ -146,8 +159,9 @@ else At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files. @@ -162,7 +176,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -175,42 +190,52 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# MRMS grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" # Time associated with the archive. MRMS data have daily archives that # have the hour-of-day set to "00". yyyymmddhh_arcv="${yyyymmdd_task}00" -# Directory that will contain the MRMS grib2 files retrieved from the -# current 6-hourly archive file. We refer to this as the "raw" quarter- -# daily directory because it will contain the files as they are in the -# archive before any processing by this script. -day_dir_raw="${basedir_raw}/${yyyymmdd_task}" +# Directory that will contain the files retrieved from the current archive +# file. We refer to this as the "raw" archive directory because it will +# contain the files as they are in the archive before any processing by +# this script. +# +# Note: +# Normally, arcv_dir_raw should consist of basedir_raw and a subdirectory +# that depends on the archive date, e.g. +# +# arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" +# +# but since for MRMS data there is only one archive per day, that directory +# is redundant, so simplicity we set arcv_dir_raw to just basedir_raw. +arcv_dir_raw="${basedir_raw}" -# Make sure the raw quarter-daily directory exists because it is used -# below as the output directory of the retrieve_data.py script (so if -# this directory doesn't already exist, that script will fail). Creating -# this directory also ensures that the raw base directory (basedir_raw) -# exists before we change location to it below. -mkdir -p ${day_dir_raw} +# Make sure the raw archive directory exists because it is used below as +# the output directory of the retrieve_data.py script (so if this directory +# doesn't already exist, that script will fail). Creating this directory +# also ensures that the raw base directory (basedir_raw) exists before we +# change location to it below. +mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the -# specified output location (via the --output_path option). In order to -# avoid other get_obs_ndas tasks (i.e. those associated with other days) -# from interfering with (clobbering) these files (because extracted files -# from different get_obs_ndas tasks to have the same names or relative -# paths), we change location to the base raw directory so that files with -# same names are extracted into different directories. +# specified output location (via the --output_path option). Note that +# the relative paths of obs files within archives associted with different +# days may be the same. Thus, if files with the same archive-relative +# paths are being simultaneously extracted from multiple archive files +# (by multiple get_obs tasks), they will likely clobber each other if the +# extracton is being carried out into the same location on disk. To avoid +# this, we first change location to the raw base directory (whose name is +# obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} -# Pull MRMS data from HPSS. This will get all 7 obs files in the current -# archive and place them in the raw quarter-daily directory, although we -# will make use of only 6 of these (we will not use the tm00 file). +# Pull obs from HPSS. This will get all the obs files in the current +# archive and place them in the raw archive directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -219,7 +244,7 @@ python3 -u ${USHdir}/retrieve_data.py \ --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type MRMS_obs \ - --output_path ${day_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" @@ -233,19 +258,68 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." #----------------------------------------------------------------------- # -# Loop through all hours of the day associated with the task. For each -# hour, find the gzipped grib2 file in the raw daily directory that is -# closest in time to this hour. Then gunzip the file and copy it (in the -# process renaming it) to the processed location. -for hr in $(seq 0 1 23); do +# Loop over the raw obs files extracted from the current archive and +# generate from them the processed obs files. +# +# For MRMS obs, the raw obs consist of gzipped grib2 files that are +# usually a few minutes apart in time. However, because forecast data +# is available at most every hour, the SRW App configuration parameter +# MRMS_OBS_AVAIL_INTVL_HRS is set to 1 hour instead of a few minutes. +# Below, we loop over the whole day using this 1-hourly interval. For +# each hour of the day, we call the script mrms_pull_topofhour.py to find +# the gzipped grib2 file in the raw archive directory that is closest in +# time to the hour and unzip it in a temporary directory. We then copy +# or move it to the processed directory, possibly renaming it in the +# process. +for hr in $(seq 0 ${MRMS_OBS_AVAIL_INTVL_HRS} 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do + + # First, select from the set of raw files for the current day those that + # are nearest in time to the current hour. Unzip these in a temporary + # subdirectory under the raw base directory. + # + # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possibile templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # For now, we hard-code the file name here. python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ - --outdir ${basedir_proc} \ --source ${basedir_raw} \ - --product ${fields_in_filenames[$i]} + --outdir ${basedir_raw}/topofhour \ + --product ${fields_in_filenames[$i]} \ + --no-add_vdate_subdir + + # Set the name of and the full path to the raw obs file created by the + # mrms_pull_topofhour.py script. This name is currently hard-coded to + # the output of that script. In the future, it should be set in a more + # general way (e.g. obtain from a settings file). + fn_raw="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd_task}-${hh}0000.grib2" + fp_raw="${basedir_raw}/topofhour/${fn_raw}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + + mv ${fp_raw} ${fp_proc} + done fi done diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 7ab6fc652b..45338714a2 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -58,18 +58,17 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} - mod(24, NDAS_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % NDAS_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the NDAS -# prepbufr files will appear after this script is done. We refer to this -# as the "processed" base directory because it contains the files after -# all processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -80,23 +79,9 @@ basedir_proc=${OBS_DIR} # array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) - - - - - -# If there are no observation retrieval times on the day of the current -# task, exit the script. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the observation retrieval times fall within the day associated -with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - +# +#----------------------------------------------------------------------- +# # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This @@ -107,32 +92,45 @@ fi # # To generate this sequence, we first set its starting and ending values # as well as the interval. +# +#----------------------------------------------------------------------- +# # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first observation retrieval time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last observation retrieval time of the day. +# the last obs retrieval time of the day. hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}" - fn_proc="prepbufr.ndas.${yyyymmddhh}" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -151,32 +149,39 @@ Setting the hour (since 00) of the first archive to retrieve to: done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_obs_retrieve_times_crnt_day)) -if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any files." exit -# Otherwise, will need to retrieve files. In this case, set the sequence -# of hours corresponding to the archives from which files will be retrieved. + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" print_info_msg " At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since 00 of this day): arcv_hrs = ${arcv_hrs_str} " + fi # #----------------------------------------------------------------------- @@ -188,7 +193,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -201,11 +207,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# NDAS prepbufr files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -217,51 +223,65 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the NDAS prepbufr files retrieved from the - # current 6-hourly archive file. We refer to this as the "raw" quarter- - # daily directory because it will contain the files as they are in the - # archive before any processing by this script. - qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. + arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the observation retrieval times for the day - # associated with this task fall in the time interval spanned by the - # current archive. If so, set the flag (do_retrieve) to retrieve the - # files in the current + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current # archive. - yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) - yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${retrieve_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${retrieve_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then do_retrieve="TRUE" break fi done - if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the current day's observation retrieval times fall in the range +spanned by the current ${arcv_hr_incr}-hourly archive file. The bounds of the current +archive are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else - # Make sure the raw quarter-daily directory exists because it is used - # below as the output directory of the retrieve_data.py script (so if - # this directory doesn't already exist, that script will fail). Creating - # this directory also ensures that the raw base directory (basedir_raw) - # exists before we change location to it below. - mkdir -p ${qrtrday_dir_raw} + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ndas tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ndas tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} - # Pull NDAS data from HPSS. This will get all 7 obs files in the current - # archive and place them in the raw quarter-daily directory, although we - # will make use of only 6 of these (we will not use the tm00 file). + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -270,43 +290,56 @@ arcv_hr = ${arcv_hr}" --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path ${qrtrday_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed NDAS prepbufr files. This consists of simply - # copying or moving (and in the process renaming) them from the raw - # quarter-daily directory to the processed directory. Note that the - # tm06 files contain more/better observations than tm00 for the - # equivalent time, so we use those. - for hrs_ago in $(seq --format="%02g" 6 -1 1); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For NDAS obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + # + # Note that the tm06 file in a given archive contain more/better observations + # than the tm00 file in the next archive (their valid times are equivalent), + # so we use the tm06 files. + for hrs_ago in $(seq --format="%02g" 6 -${NDAS_OBS_AVAIL_INTVL_HRS} 1); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" - fp_raw="${qrtrday_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}" - mkdir -p ${day_dir_proc} - fn_proc="prepbufr.ndas.${yyyymmddhh}" - fp_proc="${day_dir_proc}/${fn_proc}" + fp_raw="${arcv_dir_raw}/${fn_raw}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi done - else - - print_info_msg " -None of the current day's observation retrieval times fall in the range -spanned by the current 6-hourly archive file. The bounds of the current -archive are: - yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" - yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The observation retrieval times are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - fi done diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh index c71266ed07..5c56f8a8df 100755 --- a/ush/get_obs_nohrsc.sh +++ b/ush/get_obs_nohrsc.sh @@ -55,7 +55,7 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} - mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % NOHRSC_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # Accumulation period to use when getting obs files. This is simply (a @@ -66,11 +66,10 @@ accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -119,16 +118,26 @@ else arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) fi -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}" - fn_proc="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -205,7 +214,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -218,11 +228,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# NOHRSC grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -234,10 +244,10 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the grib2 files retrieved from the current - # archive file. We refer to this as the "raw" archive directory because - # it will contain the files as they are in the archive before any processing - # by this script. + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" # Check whether any of the obs retrieval times for the day associated with @@ -279,12 +289,14 @@ The times at which obs need to be retrieved are: # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ccpa tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ccpa tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} # Pull obs from HPSS. This will get all the obs files in the current @@ -303,23 +315,43 @@ The times at which obs need to be retrieved are: print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed NOHRSC grib2 files. This consists of simply copying - # or moving them from the raw daily directory to the processed directory. - for hrs in $(seq 0 6 18); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For NOHRSC obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + for hrs in $(seq 0 ${NOHRSC_OBS_AVAIL_INTVL_HRS} 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed grib2 obs file from the raw one (by moving, copying, - # or otherwise) only if the time of the current file in the current archive + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_raw="${arcv_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}" - mkdir -p ${day_dir_proc} - fn_proc="${fn_raw}" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi done From 2357cd3d041ea005ad88f4451c9b963e11d4e243 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 3 Oct 2024 03:00:52 -0600 Subject: [PATCH 096/131] Fix bug introduced in previous set of changes. --- ush/get_obs_mrms.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 71eae52b9c..d13e374620 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -116,6 +116,7 @@ num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do for (( i=0; i<${num_mrms_fields}; i++ )); do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) @@ -139,13 +140,16 @@ File does not exist on disk: Will attempt to retrieve all obs files." break 2 fi + done done # If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. +# number of obs files needed (which is num_mrms_fields times the number +# of obs retrieval times in the current day), then there is no need to +# retrieve any files. num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then +if [[ ${num_existing_files} -eq $((num_mrms_fields*num_obs_retrieve_times_crnt_day)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist From f36a86628ba8f2949fb3c83b6e5b4acd445eeca9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 3 Oct 2024 03:02:15 -0600 Subject: [PATCH 097/131] Modify existing vx WE2E test so it is able to find the staged NOHRSC obs file. --- .../config.MET_ensemble_verification_winter_wx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 85a515f293..1845255f54 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -32,3 +32,6 @@ global: NUM_ENS_MEMBERS: 10 verification: VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' + From 6ba25960f3239b8b51706366dc0a64561f2474b8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 12:27:46 -0600 Subject: [PATCH 098/131] Bug fix. --- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 77cdb1221d..55244b7fdf 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -192,9 +192,9 @@ done if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then print_err_msg_exit "\ The number of missing ${OBTYPE} obs files (num_missing_files) is greater -than the maximum allowed number (num_missing_files_max): +than the maximum allowed number (NUM_MISSING_FILES_MAX): num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" + NUM_MISSING_OBS_FILES_MAX = ${NUM_MISSING_OBS_FILES_MAX}" fi # Remove leading comma from LEADHR_LIST. From e3a05bf92a7a4fc05e85662b7f5585b2eec74945 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 12:31:24 -0600 Subject: [PATCH 099/131] Change config variable name for clarity. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 2 +- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- ...exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 6 +++--- ush/config_defaults.yaml | 8 ++++---- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 9e1d0bd390..67ae70c8b9 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -159,7 +159,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index aca2795018..e16b06cb46 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -200,7 +200,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 5ad0560f28..adecb68bcd 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -144,7 +144,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 6a8da3166d..2c27a9a597 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -143,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 55244b7fdf..e93387ed0a 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -136,11 +136,11 @@ set_vx_params \ vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) OBS_INPUT_DIR="${OBS_DIR}" -OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE} ) +OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" -OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) +OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- @@ -163,7 +163,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do eval_METplus_timestr_tmpl \ init_time="${yyyymmdd_task}00" \ fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ outvarname_evaluated_timestr="fp" if [[ -f "${fp}" ]]; then diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 56bd15b814..8a02964cc2 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2448,7 +2448,7 @@ verification: # OBS_MRMS_RETOP_FN_TEMPLATE: # File name template for MRMS echo top observations. # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: # File name template for NDAS surface and upper air observations. # This template is used by the workflow tasks that call the METplus Pb2nc # tool on NDAS obs to find the input observation files containing ADP @@ -2461,7 +2461,7 @@ verification: {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2470,7 +2470,7 @@ verification: # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: # Template used to specify the names of the output NetCDF observation # files generated by the worfklow verification tasks that call the # METplus Pb2nc tool on NDAS observations. (These files will contain @@ -2480,7 +2480,7 @@ verification: {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. From 84c54ba7ae450e0c9504ff11eeb944e24847cf1f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 16:37:40 -0600 Subject: [PATCH 100/131] Use new python script to get any of the allowed kinds of obs instead of the four separate shell scripts. --- scripts/exregional_get_verif_obs.sh | 102 +-- ush/get_obs.py | 885 +++++++++++++++++++++++++++ ush/get_obs_ccpa.sh | 484 --------------- ush/get_obs_mrms.sh | 341 ----------- ush/get_obs_ndas.sh | 357 ----------- ush/get_obs_nohrsc.sh | 372 ----------- ush/run_eval_METplus_timestr_tmpl.sh | 28 + 7 files changed, 926 insertions(+), 1643 deletions(-) create mode 100644 ush/get_obs.py delete mode 100755 ush/get_obs_ccpa.sh delete mode 100755 ush/get_obs_mrms.sh delete mode 100755 ush/get_obs_ndas.sh delete mode 100755 ush/get_obs_nohrsc.sh create mode 100755 ush/run_eval_METplus_timestr_tmpl.sh diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 4e981b3958..158218889e 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -46,92 +46,8 @@ done # #----------------------------------------------------------------------- # -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will be automatically staged by this -# script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accounted -# for. See in-line comments below for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will be automatically staged by this -# this script. -# -# -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 -# -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# Make sure the obs type is valid. Then call the python script get_obs.py +# to get the obs files. # #----------------------------------------------------------------------- # @@ -144,8 +60,15 @@ Valid observation types are: $(printf "\"%s\" " ${valid_obtypes[@]}) " fi -script_bn="get_obs_$(echo_lowercase ${OBTYPE})" -$USHdir/${script_bn}.sh + +script_bn="get_obs" +cmd="\ +python3 -u ${USHdir}/${script_bn}.py \ +--var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ +--obtype ${OBTYPE} \ +--obs_day ${PDY}" +print_info_msg "CALLING: ${cmd}" +${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." # #----------------------------------------------------------------------- # @@ -155,7 +78,8 @@ $USHdir/${script_bn}.sh #----------------------------------------------------------------------- # mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/${script_bn}_${PDY}_complete.txt" +file_bn="get_obs_$(echo_lowercase ${OBTYPE})" +touch "${WFLOW_FLAG_FILES_DIR}/${file_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py new file mode 100644 index 0000000000..f6e2fed265 --- /dev/null +++ b/ush/get_obs.py @@ -0,0 +1,885 @@ +#!/usr/bin/env python3 + +import os +import sys +import shutil +import argparse +import logging +from pathlib import Path +import datetime as dt +from textwrap import dedent +from pprint import pprint +from math import ceil, floor +import subprocess +from python_utils import ( + load_yaml_config, +) + + +def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): + """ + This file defines a function that, for the given observation type, obs + archive interval, and hour of day, returns the hour (counting from the + start of the day) corresponding to the archive file in which the obs file + for the given hour of day is included. + + Note that for cumulative fields (like CCPA and NOHRSC, as opposed to + instantaneous ones like MRMS and NDAS), the archive files corresponding + to hour 0 of the day represent accumulations over the previous day. Thus, + here, we never return an achive hour of 0 for cumulative fields. Instead, + if the specified hour-of-day is 0, we consider that to represent the 0th + hour of the NEXT day (i.e. the 24th hour of the current day) and set the + archive hour to 24. + + Args: + obtype: + The observation type. A string. + + arcv_intvl_hrs: + Time interval (in hours) between archive files. An integer. For example, + if the obs files are bundled into 6-hourly archives, then this will be + set to 6. This must be between 1 and 24 and must divide evenly into 24 + (this is checked for elsewhere). + + hod: + The hour of the day. An integer. This must be between 0 and 23. For + cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the + next day, i.e. as the 24th hour of the current day. + + Returns: + arcv_hr: + The hour since the start of day corresponding to the archive file containing + the obs file for the given hour of day. An integer. + """ + + valid_obtypes = ['CCPA', 'ccpa', 'NOHRSC', 'nohrsc', 'MRMS', 'mrms', 'NDAS', 'ndas'] + if obtype not in valid_obtypes: + msg = dedent(f""" + The specified observation type is not supported: + obtype = {obtype} + Valid observation types are: + {valid_obtypes} + """) + logging.error(msg) + raise Exception(msg) + + if (hod < 0) or (hod > 23): + msg = dedent(f""" + The specified hour-of-day must be between 0 and 23, inclusive but isn't: + hod = {hod} + """) + logging.error(msg) + raise Exception(msg) + + obtype_upper = obtype.upper() + if obtype_upper in ['CCPA']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype_upper in ['NOHRSC']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype_upper in ['MRMS']: + arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs + elif obtype_upper in ['NDAS']: + arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs + + return arcv_hr + + +def get_obs(config, obtype, yyyymmdd_task): + """ +This script performs several important tasks for preparing data for +verification tasks. Depending on the value of the environment variable +OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +set. + +If data is not available on disk (in the location specified by +CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +the script attempts to retrieve the data from HPSS using the retrieve_data.py +script. Depending on the data set, there are a few strange quirks and/or +bugs in the way data is organized; see in-line comments for details. + + +CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 + +If data is retrieved from HPSS, it will be automatically staged by this +script. + +Notes about the data and how it's used for verification: + +1. Accumulation is currently hardcoded to 01h. The verification will +use MET/pcp-combine to sum 01h files into desired accumulations. + +2. There is a problem with the valid time in the metadata for files +valid from 19 - 00 UTC (or files under the '00' directory). This is +accounted for in this script for data retrieved from HPSS, but if you +have manually staged data on disk you should be sure this is accounted +for. See in-line comments below for details. + + +MRMS (Multi-Radar Multi-Sensor) radar observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, + +Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity +data and EchoTop_18_00.50_ for echo top data. If data is not available +at the top of the hour, you should rename the file closest in time to +your hour(s) of interest to the above naming format. A script +"ush/mrms_pull_topofhour.py" is provided for this purpose. + +If data is retrieved from HPSS, it will automatically staged by this +this script. + + +NDAS (NAM Data Assimilation System) conventional observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} + +Note that data retrieved from HPSS and other sources may be in a +different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +either 00, 06, 12, or 18, and prevhour is the number of hours prior to +hh (00 through 05). If using custom staged data, you will have to +rename the files accordingly. + +If data is retrieved from HPSS, it will be automatically staged by this +this script. + + +NOHRSC snow accumulation observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 + +where AA is the 2-digit accumulation duration in hours: 06 or 24 + +METplus is configured to verify snowfall using 06- and 24-h accumulated +snowfall from 6- and 12-hourly NOHRSC files, respectively. + +If data is retrieved from HPSS, it will automatically staged by this +this script. + """ + + # Convert obtype to upper case to simplify code below. + obtype = obtype.upper() + + # For convenience, get the verification portion of the configuration + # dictionary. + vx_config = cfg['verification'] + + # Get the time interval (in hours) at which the obs are available. + key = obtype + '_OBS_AVAIL_INTVL_HRS' + obs_avail_intvl_hrs = vx_config[key] + + # The obs availability inerval must divide evenly into 24 hours. Otherwise, + # different days would have obs available at different hours-of-day. Make + # sure this is the case. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder} + """) + raise Exception(msg) + + # For convenience, get obs availability interval as a datetime object. + obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) + + # Get the base directory for the observations. + key = obtype + '_OBS_DIR' + obs_dir = config['platform'][key] + + # Set the group of fields for each observation type. We assume there is + # a separate obs file type for each such field group in the observations. + if obtype == 'CCPA': + field_groups_in_obs = ['APCP'] + elif obtype == 'NOHRSC': + field_groups_in_obs = ['ASNOW'] + elif obtype == 'MRMS': + field_groups_in_obs = ['REFC', 'RETOP'] + elif obtype == 'NDAS': + field_groups_in_obs = ['ADPSFCandADPUPA'] + num_field_groups = len(field_groups_in_obs) + + # For each field group in the observations, get the METplus file name + # template for the observation files. Then combine these with the base + # directory to get the METplus template for the full path to the processed + # obs files. + fp_proc_templates = [] + for fg in field_groups_in_obs: + key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' + fn_proc_template = vx_config[key] + fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) + # + #----------------------------------------------------------------------- + # + # Set variables that are only needed for some obs types. + # + #----------------------------------------------------------------------- + # + + # For cumulative obs, set the accumulation period to use when getting obs + # files. This is simply (a properly formatted version of) the obs + # availability interval. + accum_obs_formatted = None + if obtype == 'CCPA': + accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' + elif obtype == 'NOHRSC': + accum_obs_formatted = f'{obs_avail_intvl_hrs:d}' + + # For MRMS obs, set field-dependent parameters needed in forming grib2 + # file names. + fields_in_filenames = [] + levels_in_filenames = [] + if obtype == 'MRMS': + for fg in field_groups_in_obs: + if fg == 'REFC': + fields_in_filenames.append('MergedReflectivityQCComposite') + levels_in_filenames.append('00.50') + elif fg == 'RETOP': + fields_in_filenames.append('EchoTop') + levels_in_filenames.append('18_00.50') + else: + msg = dedent(f""" + Invalid field specified for obs type: + obtype = {obtype} + field = {field} + """) + logging.error(msg) + raise Exception(msg) + + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. Set these starting + # and ending dates as datetime objects for later use. + yyyymmdd_bad_metadata_start_str = None + yyyymmdd_bad_metadata_end_str = None + yyyymmdd_bad_metadata_start = None + yyyymmdd_bad_metadata_end = None + if obtype == 'CCPA': + yyyymmdd_bad_metadata_start_str = '20180718' + yyyymmdd_bad_metadata_end_str = '20210504' + yyyymmdd_bad_metadata_start = dt.datetime.strptime(yyyymmdd_bad_metadata_start_str, '%Y%m%d') + yyyymmdd_bad_metadata_end = dt.datetime.strptime(yyyymmdd_bad_metadata_end_str, '%Y%m%d') + # + #----------------------------------------------------------------------- + # + # Get the list of all the times in the current day at which to retrieve + # obs. This is an array with elements having format "YYYYMMDDHH". + # + #----------------------------------------------------------------------- + # + yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') + key = 'OBS_RETRIEVE_TIMES_' + obtype + '_' + yyyymmdd_task_str + obs_retrieve_times_crnt_day_str = vx_config[key] + obs_retrieve_times_crnt_day \ + = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] + # + #----------------------------------------------------------------------- + # + # Obs files will be obtained by extracting them from the relevant n-hourly + # archives, where n is the archive interval in hours (denoted below by the + # variable arcv_intvl_hrs). Thus, we must first obtain the sequence of + # hours (since hour 0 of the task day) corresponding to the archive files + # from which we must extract obs files. We refer to this as the sequence + # of archive hours. + # + # To generate this sequence, we first set the archive interval and then + # set the starting and ending archive hour values. + # + # For CCPA, the archive interval is 6 hours, i.e. the obs files are + # bundled into 6-hourly archives. This implies 4 archives per day. The + # archives are organized such that each one contains 6 files, so that the + # obs availability interval is + # + # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + # = 1 hr/file + # + # i.e. there is one obs file for each hour of the day containing the + # accumulation over that one hour. The archive corresponding to hour 0 + # of the current day contains 6 files representing accumulations during + # the 6 hours of the previous day. The archive corresponding to hour 6 + # of the current day corresponds to accumulations during the first 6 + # hours of the current day, and the archives corresponding to hours 12 + # and 18 of the current day correspond to accumulations during the 2nd + # and 3rd 6-hourly intervals of the current day. Thus, to obtain all the + # one-hour accumulations for the current day, we must extract all the obs + # files from the archives corresponding to hours 6, 12, and 18 of the + # current day and hour 0 of the next day. This corresponds to an archive + # hour sequence set below of [6, 12, 18, 24]. Thus, in the simplest case + # in which the observation retrieval times include all hours of the + # current task's day at which obs files are available and none of the obs + # files for this day already exist on disk, this sequence will be [6, 12, + # 18, 24]. In other cases, the sequence we loop over will be a subset of + # [6, 12, 18, 24]. + # + # For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + # bundled into 24-hourly archives. This implies just 1 archive per day. + # The archives are organized such that each one contains 4 files, so that + # the obs availability interval is + # + # obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] + # = 6 hr/file + # + # i.e. there is one obs file for each 6-hour interval of the day containing + # the accumulation over those 6 hours. The 4 obs files within each archive + # correspond to hours 0, 6, 12, and 18 of the current day. The obs file + # for hour 0 contains accumulations during the last 6 hours of the previous + # day, while those for hours 6, 12, and 18 contain accumulations for the + # first, second, and third 6-hour chunks of the current day. Thus, to + # obtain all the 6-hour accumulations for the current day, we must extract + # from the archive for the current day the obs files for hours 6, 12, and + # 18 and from the archive for the next day the obs file for hour 0. This + # corresponds to an archive hour sequence set below of [0, 24]. Thus, in + # the simplest case in which the observation retrieval times include all + # hours of the current task's day at which obs files are available and + # none of the obs files for this day already exist on disk, this sequence + # will be [0, 24]. In other cases, the sequence we loop over will be a + # subset of [0, 24]. + # + # For NDAS, the archive interval is 6 hours, i.e. the obs files are + # bundled into 6-hourly archives. This implies 4 archives per day. The + # archives are organized such that each one contains 7 files (not say 6). + # The archive associated with time yyyymmddhh_arcv contains the hourly + # files at + # + # yyyymmddhh_arcv - 6 hours + # yyyymmddhh_arcv - 5 hours + # ... + # yyyymmddhh_arcv - 2 hours + # yyyymmddhh_arcv - 1 hours + # yyyymmddhh_arcv - 0 hours + # + # These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + # respectively. Thus, the tm06 file from the current archive, say the + # one associated with time yyyymmddhh_arcv, has the same valid time as + # the tm00 file from the previous archive, i.e. the one associated with + # time (yyyymmddhh_arcv - 6 hours). It turns out the tm06 file from the + # current archive contains more/better observations than the tm00 file + # from the previous archive. Thus, for a given archive time yyyymmddhh_arcv, + # we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, + # effectively resulting in an 6 files per archive for NDAS obs. The obs + # availability interval is then + # + # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + # = 1 hr/file + # + # i.e. there is one obs file for each hour of the day containing values + # at that hour. The archive corresponding to hour 0 of the current day + # contains 6 files valid at hours 18 through 23 of the previous day. The + # archive corresponding to hour 6 of the current day contains 6 files + # valid at hours 0 through 5 of the current day, and the archives + # corresponding to hours 12 and 18 of the current day each contain 6 + # files valid at hours 6 through 11 and 12 through 17 of the current day. + # Thus, to obtain all the hourly values for the current day (from hour + # 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + # ones) from the archives corresponding to hours 6, 12, and 18 of the + # current day and the archive corresponding to hour 0 of the next day. + # This corresponds to an archive hour sequence set below of [6, 12, 18, + # 24]. Thus, in the simplest case in which the observation retrieval + # times include all hours of the current task's day at which obs files + # are available and none of the obs files for this day already exist on + # disk, this sequence will be [6, 12, 18, 24]. In other cases, the + # sequence we loop over will be a subset of [6, 12, 18, 24]. + # + #----------------------------------------------------------------------- + # + if obtype == 'CCPA': + arcv_intvl_hrs = 6 + elif obtype == 'NOHRSC': + arcv_intvl_hrs = 24 + elif obtype == 'MRMS': + arcv_intvl_hrs = 24 + elif obtype == 'NDAS': + arcv_intvl_hrs = 6 + arcv_intvl = dt.timedelta(hours=arcv_intvl_hrs) + + # Number of obs files within each archive. + num_obs_files_per_arcv = int(arcv_intvl/obs_avail_intvl) + + # Initial guess for starting archive hour. This is set to the archive + # hour containing obs at the first obs retrieval time of the day. + hod_first = obs_retrieve_times_crnt_day[0].hour + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_first) + + # Ending archive hour. This is set to the archive hour containing obs at + # the last obs retrieval time of the day. + hod_last = obs_retrieve_times_crnt_day[-1].hour + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_last) + + # Set other variables needed below when evaluating the METplus template for + # the full path to the processed observation files. + one_hour = dt.timedelta(hours=1) + ushdir = config['user']['USHdir'] + + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. Here, by "processed" we mean after any renaming + # and rearrangement of files that this script may do to the "raw" files, + # i.e. the files as they are named and arranged within the archive (tar) + # files on HPSS. If so, adjust the starting archive hour. In the process, + # keep a count of the number of obs files that already exist on disk. + num_existing_files = 0 + do_break = False + for yyyymmddhh in obs_retrieve_times_crnt_day: + + for fp_proc_templ in fp_proc_templates: + # Set the full path to the final processed obs file (fp_proc). + lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + cmd = '; '.join(['export USHdir=' + ushdir, + 'export yyyymmdd_task=' + yyyymmdd_task_str, + 'export lhr=' + str(lhr), + 'export METplus_timestr_tmpl=' + fp_proc_templ, + os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + fp_proc = result.stdout.strip() + + # Check whether file already exists. + if os.path.isfile(fp_proc): + num_existing_files += 1 + msg = dedent(f""" + File already exists on disk: + fp_proc = {fp_proc} + """) + logging.info(msg) + else: + hod = yyyymmddhh.hour + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) + msg = dedent(f""" + File does not exist on disk: + fp_proc = {fp_proc} + Setting the hour (since hour 0 of the current task day) of the first + archive to retrieve to: + arcv_hr_start = {arcv_hr_start} + """) + logging.info(msg) + do_break = True + break + + if do_break: break + + # If the number of obs files that already exist on disk is equal to the + # number of obs files needed, then there is no need to retrieve any files. + num_obs_retrieve_times_crnt_day = len(obs_retrieve_times_crnt_day) + num_files_needed = num_obs_retrieve_times_crnt_day*num_field_groups + if num_existing_files == num_files_needed: + + msg = dedent(f""" + All obs files needed for the current day (yyyymmdd_task) already exist + on disk: + yyyymmdd_task = {yyyymmdd_task} + Thus, there is no need to retrieve any files. + """) + logging.info(msg) + return True + + # If the number of obs files that already exist on disk is not equal to + # the number of obs files needed, then we will need to retrieve files. + # In this case, set the sequence of hours corresponding to the archives + # from which files will be retrieved. + else: + + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed needed for the current day (yyyymmdd_task) + do not exist on disk: + yyyymmdd_task = {yyyymmdd_task} + The number of obs files needed for the current day is: + num_files_needed = {num_files_needed} + The number of obs files that already exist on disk is: + num_existing_files = {num_existing_files} + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + arcv_hrs = {arcv_hrs} + """) + logging.info(msg) + # + #----------------------------------------------------------------------- + # + # At this point, at least some obs files for the current day need to be + # retrieved. Thus, loop over the relevant archives that contain obs for + # the day given by yyyymmdd_task and retrieve files as needed. + # + # Note that the NOHRSC data on HPSS are archived by day, with the archive + # for a given day containing 6-hour as well as 24-hour grib2 files. As + # described above, the four 6-hour files are for accumulated snowfall at + # hour 0 of the current day (which represents accumulation over the last + # 6 hours of the previous day) as well as hours 6, 12, and 18, while the + # two 24-hour files are at hour 0 (which represents accumulation over all + # 24 hours of the previous day) and 12 (which represents accumulation over + # the last 12 hours of the previous day plus the first 12 hours of the + # current day). Here, we will only obtain the 6-hour files. In other + # workflow tasks, the values in these 6-hour files will be added as + # necessary to obtain accumulations over longer periods (e.g. 24 hours). + # Since the four 6-hour files are in one archive and are relatively small + # (on the order of kilobytes), we get them all with a single call to the + # retrieve_data.py script. + # + #----------------------------------------------------------------------- + # + + # Whether to move the files or copy them from their raw to their processed + # locations. + mv_or_cp = 'cp' + # Whether to remove raw observations after processed directories have + # been created from them. + key = 'REMOVE_RAW_OBS_' + obtype + remove_raw_obs = config['platform'][key] + # If the raw directories and files are to be removed at the end of this + # script, no need to copy the files since the raw directories are going + # to be removed anyway. + if remove_raw_obs: + mv_or_cp = 'mv' + + # Base directory that will contain the archive subdirectories in which + # the files extracted from each archive (tar) file will be placed. We + # refer to this as the "raw" base directory because it contains files + # as they are found in the archives before any processing by this script. + basedir_raw = os.path.join(obs_dir, 'raw_' + yyyymmdd_task_str) + + for arcv_hr in arcv_hrs: + + msg = dedent(f""" + Processing archive hour {arcv_hr} ... + """) + logging.info(msg) + + # Calculate the time information for the current archive. + yyyymmddhh_arcv = yyyymmdd_task + dt.timedelta(hours=arcv_hr) + yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') + yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') + + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. + if obtype == 'CCPA': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + # For NOHRSC, the hour-of-day for the archive is irrelevant since there + # is only one archive per day, so don't include it in the raw archive + # directory's name. + elif obtype == 'NOHRSC': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmdd_arcv_str) + # Since for MRMS data there is only one archive per day, that directory + # is redundant, so for simplicity we set arcv_dir_raw to just basedir_raw. + elif obtype == 'MRMS': + arcv_dir_raw = basedir_raw + elif obtype == 'NDAS': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + if obtype == 'CCPA': + arcv_contents_start = yyyymmddhh_arcv - (num_obs_files_per_arcv - 1)*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv + elif obtype == 'NOHRSC': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'MRMS': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'NDAS': + arcv_contents_start = yyyymmddhh_arcv - num_obs_files_per_arcv*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv - obs_avail_intvl + + do_retrieve = False + for obs_retrieve_time in obs_retrieve_times_crnt_day: + if (obs_retrieve_time >= arcv_contents_start) and \ + (obs_retrieve_time <= arcv_contents_end): + do_retrieve = True + break + + if not do_retrieve: + msg = dedent(f""" + None of the current day's observation retrieval times (possibly including + hour 0 of the next day if considering a cumulative obs type) fall in the + range spanned by the current {arcv_intvl_hrs}-hourly archive file. The + bounds of the data in the current archive are: + arcv_contents_start = {arcv_contents_start} + arcv_contents_end = {arcv_contents_end} + The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + """) + logging.info(msg) + + else: + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + Path(arcv_dir_raw).mkdir(parents=True, exist_ok=True) + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. + os.chdir(basedir_raw) + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). + parmdir = config['user']['PARMdir'] + cmd = ' '.join(['python3', \ + '-u', os.path.join(ushdir, 'retrieve_data.py'), \ + '--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + rc = result.returncode + + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For CCPA obs, for most dates this consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. However, for dates between + # 20180718 and 20210504 and hours-of-day 19 through the end of the day + # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + # + # For NOHRSC obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + # + # For NDAS obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. Note that the tm06 file in a given archive contain + # more/better observations than the tm00 file in the next archive (their + # valid times are equivalent), so we use the tm06 files. + if obtype == 'CCPA': + in_arcv_times = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + in_arcv_times = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + in_arcv_times.sort() + + for yyyymmddhh in in_arcv_times: + + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if yyyymmddhh in obs_retrieve_times_crnt_day: + + for i, fp_proc_templ in enumerate(fp_proc_templates): + + # For MRMS obs, first select from the set of raw files for the current day + # those that are nearest in time to the current hour. Unzip these in a + # temporary subdirectory under the raw base directory. + # + # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possibile templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # For now, we hard-code the file name here. + if obtype == 'MRMS': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + cmd = ' '.join(['python3', \ + '-u', os.path.join(ushdir, 'mrms_pull_topofhour.py'), \ + '--valid_time', yyyymmddhh_str, \ + '--source', basedir_raw, \ + '--outdir', os.path.join(basedir_raw, 'topofhour'), \ + '--product', fields_in_filenames[i], \ + '--no-add_vdate_subdir']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + rc = result.returncode + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. + if obtype == 'CCPA': + hr = yyyymmddhh.hour + fn_raw = 'ccpa.t' + f'{hr:02d}' + 'z.' + accum_obs_formatted + 'h.hrap.conus.gb2' + elif obtype == 'NOHRSC': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + fn_raw = 'sfav2_CONUS_' + accum_obs_formatted + 'h_' + yyyymmddhh_str + '_grid184.grb2' + elif obtype == 'MRMS': + hr = yyyymmddhh.hour + fn_raw = fields_in_filenames[i] + '_' + levels_in_filenames[i] \ + + '_' + yyyymmdd_task_str + '-' + f'{hr:02d}' + '0000.grib2' + fn_raw = os.path.join('topofhour', fn_raw) + elif obtype == 'NDAS': + time_ago = yyyymmddhh_arcv - yyyymmddhh + hrs_ago = int(time_ago.seconds/3600) + hh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%H') + fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' + fp_raw = os.path.join(arcv_dir_raw, fn_raw) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + cmd = '; '.join(['export USHdir=' + ushdir, + 'export yyyymmdd_task=' + yyyymmdd_task_str, + 'export lhr=' + str(lhr), + 'export METplus_timestr_tmpl=' + fp_proc_templ, + os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + fp_proc = result.stdout.strip() + + # Make sure the directory in which the processed file will be created exists. + dir_proc = os.path.dirname(fp_proc) + Path(dir_proc).mkdir(parents=True, exist_ok=True) + + msg = dedent(f""" + Creating the processed obs file + {fp_proc} + from the raw file + {fp_raw} + ... + """) + logging.info(msg) + + yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if (obtype == 'CCPA') and \ + ((yyyymmdd >= yyyymmdd_bad_metadata_start) and (yyyymmdd <= yyyymmdd_bad_metadata_end)) and \ + (((hr >= 19) and (hr <= 23)) or (hr == 0)): + cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + elif mv_or_cp == 'mv': + shutil.move(fp_raw, fp_proc) + elif mv_or_cp == 'cp': + shutil.copy(fp_raw, fp_proc) + # + #----------------------------------------------------------------------- + # + # Clean up raw obs directories. + # + #----------------------------------------------------------------------- + # + if remove_raw_obs: + msg = dedent(f""" + Removing raw obs directories ..." + """) + logging.info(msg) + shutil.rmtree(basedir_raw) + + return True + + + +def parse_args(argv): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description="Get observations." + ) + + parser.add_argument( + "--obtype", + dest="obtype", + type=str, + required=True, + choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], + help="Cumulative observation type.", + ) + + parser.add_argument( + "--obs_day", + dest="obs_day", + type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), + required=True, + help="Date of observation day, in the form 'YYYMMDD'.", + ) + + parser.add_argument( + "--var_defns_path", + dest="var_defns_path", + type=str, + required=True, + help="Path to variable definitions file.", + ) + + choices_log_level = [pair for lvl in list(logging._nameToLevel.keys()) + for pair in (str.lower(lvl), str.upper(lvl))] + parser.add_argument( + "--log_level", + dest="log_level", + type=str, + required=False, + default='info', + choices=choices_log_level, + help=dedent(f""" + Logging level to use with the 'logging' module. + """)) + + parser.add_argument( + "--log_fp", + dest="log_fp", + type=str, + required=False, + default='', + help=dedent(f""" + Name of or path (absolute or relative) to log file. If not specified, + the output goes to screen. + """)) + + return parser.parse_args(argv) + + +if __name__ == "__main__": + args = parse_args(sys.argv[1:]) + + # Set up logging. + # If the name/path of a log file has been specified in the command line + # arguments, place the logging output in it (existing log files of the + # same name are overwritten). Otherwise, direct the output to the screen. + log_level = str.upper(args.log_level) + msg_format = "[%(levelname)s:%(name)s: %(filename)s, line %(lineno)s: %(funcName)s()] %(message)s" + if args.log_fp: + logging.basicConfig(level=log_level, format=msg_format, filename=args.log_fp, filemode='w') + else: + logging.basicConfig(level=log_level, format=msg_format) + + cfg = load_yaml_config(args.var_defns_path) + get_obs(cfg, args.obtype, args.obs_day) + + diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh deleted file mode 100755 index d3c486c607..0000000000 --- a/ush/get_obs_ccpa.sh +++ /dev/null @@ -1,484 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will be automatically staged by this -# script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accounted -# for. See in-line comments below for details. -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Below, we will use the retrieve_data.py script to retrieve the CCPA -# grib2 file from a data store (e.g. HPSS). Before doing so, note the -# following: -# -# * The daily archive (tar) file containing CCPA obs has a name of the -# form -# -# [PREFIX].YYYYMMDD.tar -# -# where YYYYMMDD is a given year, month, and day combination, and -# [PREFIX] is a string that is not relevant to the discussion here -# (the value it can take on depends on which of several time periods -# YYYYMMDD falls in, and the retrieve_data.py tries various values -# until it finds one for which a tar file exists). Unintuitively, this -# archive file contains accumulation data for valid times starting at -# hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current -# day (YYYYMMDD). In other words, the valid times of the contents of -# this archive file are shifted back by 6 hours relative to the time -# string appearing in the name of the file. See section "DETAILS..." -# for a detailed description of the directory structure in the CCPA -# archive files. -# -# * We call retrieve_data.py in a temporary cycle-specific subdirectory -# in order to prevent get_obs_ccpa tasks for different cycles from -# clobbering each other's output. We refer to this as the "raw" CCPA -# base directory because it contains files as they are found in the -# archives before any processing by this script. -# -# * In each (cycle-specific) raw base directory, the data is arranged in -# daily subdirectories with the same timing as in the archive (tar) -# files (which are described in the section "DETAILS..." below). In -# particular, each daily subdirectory has the form YYYYMDD, and it may -# contain CCPA grib2 files for accumulations valid at hour 19 of the -# previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). -# (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the -# daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer -# to these as raw daily (sub)directories to distinguish them from the -# processed daily subdirectories under the processed (final) CCPA base -# directory (basedir_proc). -# -# * For a given cycle, some of the valid times at which there is forecast -# output may not have a corresponding file under the raw base directory -# for that cycle. This is because another cycle that overlaps this cycle -# has already obtained the grib2 CCPA file for that valid time and placed -# it in its processed location; as a result, the retrieveal of that grib2 -# file for this cycle is skipped. -# -# * To obtain a more intuitive temporal arrangement of the data in the -# processed CCPA directory structure than the temporal arrangement used -# in the archives and raw directories, we process the raw files such -# that the data in the processed directory structure is shifted forward -# in time 6 hours relative to the data in the archives and raw directories. -# This results in a processed base directory that, like the raw base -# directory, also contains daily subdirectories of the form YYYYMMDD, -# but each such subdirectory may only contain CCPA data at valid hours -# within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but -# may not contain data that is valid on the previous, next, or any other -# day). -# -# * For data between 20180718 and 20210504, the 01h accumulation data -# (which is the only accumulation we are retrieving) have incorrect -# metadata under the "00" directory in the archive files (meaning for -# hour 00 and hours 19-23, which are the ones in the "00" directory). -# Below, we use wgrib2 to make a correction for this when transferring -# (moving or copying) grib2 files from the raw daily directories to -# the processed daily directories. -# -# -# DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES -# ---------------------------------------------------------- -# -# The daily archive file containing CCPA obs is named -# -# [PREFIX].YYYYMMDD.tar -# -# This file contains accumulation data for valid times starting at hour -# 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day -# (YYYYMMDD). In particular, when untarred, the daily archive file -# expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and -# 18 subdirectories contain grib2 files for accumulations valid at or -# below the hour-of-day given by the subdirectory name (and on YYYYMMDD). -# For example, the 06 directory contains data valid at: -# -# * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; -# * YYYYMMDD[03, 06] for 03h accumulations; -# * YYYYMMDD[06] for 06h accumulations. -# -# The valid times for the data in the 12 and 18 subdirectories are -# analogous. However, the 00 subdirectory is different in that it -# contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE -# this time, i.e. the data for valid times other than YYYYMMDD00 are on -# the PREVIOUS day. Thus, the 00 subdirectory contains data valid at -# (note the DD-1, meaning one day prior): -# -# * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; -# * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; -# * YYYYMMDD00 for 06h accumulations. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % CCPA_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} - 24 % CCPA_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Accumulation period to use when getting obs files. This is simply (a -# properly formatted version of) the obs availability interval. -accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 6-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the observation retrieval times include all hours -# of the task's day and if none of the obs files for this day already -# exist on disk. In other cases, the sequence we loop over will be a -# subset of "6 12 18 24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 6 hours because the archives are 6-hourly. -arcv_hr_incr=6 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) -arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -if [[ ${hr_last} -eq 0 ]]; then - arcv_hr_end=24 -else - arcv_hr_end=$(ceil ${hr_last} ${arcv_hr_incr}) - arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) -fi - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) - arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. Thus, loop over the relevant archives that contain obs for -# the day given by yyyymmdd_task and retrieve files as needed. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_CCPA}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - hrs_ago=$((arcv_hr_incr - 1)) - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - arcv_contents_yyyymmddhh_end=${yyyymmddhh_arcv} - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the times in the current day (or hour 00 of the next day) at which -obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly -archive file. The bounds of the data in the current archive file are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For CCPA obs, for most dates this consists of simply copying or moving - # the files from the raw archive directory to the processed directory, - # possibly renaming them in the process. However, for dates between - # 20180718 and 20210504 and hours-of-day 19 through the end of the day - # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an - # error in the metadata of the raw file and writing the corrected data - # to a new grib2 file in the processed location. - for hrs_ago in $(seq 5 -${CCPA_OBS_AVAIL_INTVL_HRS} 0); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - # CCPA files for 1-hour accumulation have incorrect metadata in the files - # under the "00" directory from 20180718 to 20210504. After the data is - # pulled, reorganize into correct yyyymmdd structure. - hh_noZero=$((10#${hh})) - if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ - [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then - wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s - else - ${mv_or_cp} ${fp_raw} ${fp_proc} - fi - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh deleted file mode 100755 index d13e374620..0000000000 --- a/ush/get_obs_mrms.sh +++ /dev/null @@ -1,341 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % MRMS_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval MRMS_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - MRMS_OBS_AVAIL_INTVL_HRS = ${MRMS_OBS_AVAIL_INTVL_HRS} - 24 % MRMS_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an -# environment variable created in the ROCOTO XML. It is a scalar variable -# because there doesn't seem to be a way to pass a bash array from the -# XML to the task's script. -mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) - -# Loop over the fields (REFC and RETOP) and set the file base name -# corresponding to each. -fields_in_filenames=() -levels_in_filenames=() -obs_mrms_fp_templates=() -for field in ${mrms_fields[@]}; do - # Set field-dependent parameters needed in forming grib2 file names. - if [ "${field}" = "REFC" ]; then - fields_in_filenames+=("MergedReflectivityQCComposite") - levels_in_filenames+=("00.50") - obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_REFC_FN_TEMPLATE}") - elif [ "${field}" = "RETOP" ]; then - fields_in_filenames+=("EchoTop") - levels_in_filenames+=("18_00.50") - obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_RETOP_FN_TEMPLATE}") - else - print_err_msg_exit "\ -Invalid field specified: - field = \"${field}\" -Valid options are 'REFC', 'RETOP'." - fi -done - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) - -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. -num_existing_files=0 -num_mrms_fields=${#mrms_fields[@]} -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - for (( i=0; i<${num_mrms_fields}; i++ )); do - - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Will attempt to retrieve all obs files." - break 2 - fi - - done -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed (which is num_mrms_fields times the number -# of obs retrieval times in the current day), then there is no need to -# retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq $((num_mrms_fields*num_obs_retrieve_times_crnt_day)) ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit -# Otherwise, will need to retrieve files. -else - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files. -" -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_MRMS}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -# Time associated with the archive. MRMS data have daily archives that -# have the hour-of-day set to "00". -yyyymmddhh_arcv="${yyyymmdd_task}00" - -# Directory that will contain the files retrieved from the current archive -# file. We refer to this as the "raw" archive directory because it will -# contain the files as they are in the archive before any processing by -# this script. -# -# Note: -# Normally, arcv_dir_raw should consist of basedir_raw and a subdirectory -# that depends on the archive date, e.g. -# -# arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" -# -# but since for MRMS data there is only one archive per day, that directory -# is redundant, so simplicity we set arcv_dir_raw to just basedir_raw. -arcv_dir_raw="${basedir_raw}" - -# Make sure the raw archive directory exists because it is used below as -# the output directory of the retrieve_data.py script (so if this directory -# doesn't already exist, that script will fail). Creating this directory -# also ensures that the raw base directory (basedir_raw) exists before we -# change location to it below. -mkdir -p ${arcv_dir_raw} - -# The retrieve_data.py script first extracts the contents of the archive -# file into the directory it was called from and then moves them to the -# specified output location (via the --output_path option). Note that -# the relative paths of obs files within archives associted with different -# days may be the same. Thus, if files with the same archive-relative -# paths are being simultaneously extracted from multiple archive files -# (by multiple get_obs tasks), they will likely clobber each other if the -# extracton is being carried out into the same location on disk. To avoid -# this, we first change location to the raw base directory (whose name is -# obs-day dependent) and then call the retrieve_data.py script. -cd ${basedir_raw} - -# Pull obs from HPSS. This will get all the obs files in the current -# archive and place them in the raw archive directory. -cmd=" -python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - -print_info_msg "CALLING: ${cmd}" -$cmd || print_err_msg_exit "Could not retrieve obs from HPSS." -# -#----------------------------------------------------------------------- -# -# Loop over the 24 hour period starting with the zeroth hour of the day -# associated with this task and ending with the 23rd hour. -# -#----------------------------------------------------------------------- -# - -# Loop over the raw obs files extracted from the current archive and -# generate from them the processed obs files. -# -# For MRMS obs, the raw obs consist of gzipped grib2 files that are -# usually a few minutes apart in time. However, because forecast data -# is available at most every hour, the SRW App configuration parameter -# MRMS_OBS_AVAIL_INTVL_HRS is set to 1 hour instead of a few minutes. -# Below, we loop over the whole day using this 1-hourly interval. For -# each hour of the day, we call the script mrms_pull_topofhour.py to find -# the gzipped grib2 file in the raw archive directory that is closest in -# time to the hour and unzip it in a temporary directory. We then copy -# or move it to the processed directory, possibly renaming it in the -# process. -for hr in $(seq 0 ${MRMS_OBS_AVAIL_INTVL_HRS} 23); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - for (( i=0; i<${num_mrms_fields}; i++ )); do - - # First, select from the set of raw files for the current day those that - # are nearest in time to the current hour. Unzip these in a temporary - # subdirectory under the raw base directory. - # - # Note that the script we call to do this (mrms_pull_topofhour.py) assumes - # a certain file naming convention. That convention must match the names - # of the files that the retrieve_data.py script called above ends up - # retrieving. The list of possibile templates for these names is given - # in parm/data_locations.yml, but which of those is actually used is not - # known until retrieve_data.py completes. Thus, that information needs - # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. - # For now, we hard-code the file name here. - python ${USHdir}/mrms_pull_topofhour.py \ - --valid_time ${yyyymmddhh} \ - --source ${basedir_raw} \ - --outdir ${basedir_raw}/topofhour \ - --product ${fields_in_filenames[$i]} \ - --no-add_vdate_subdir - - # Set the name of and the full path to the raw obs file created by the - # mrms_pull_topofhour.py script. This name is currently hard-coded to - # the output of that script. In the future, it should be set in a more - # general way (e.g. obtain from a settings file). - fn_raw="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd_task}-${hh}0000.grib2" - fp_raw="${basedir_raw}/topofhour/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - mv ${fp_raw} ${fp_proc} - - done - fi -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh deleted file mode 100755 index 45338714a2..0000000000 --- a/ush/get_obs_ndas.sh +++ /dev/null @@ -1,357 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will be automatically staged by this -# this script. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % NDAS_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} - 24 % NDAS_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 6-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the observation retrieval times include all hours -# of the task's day and if none of the obs files for this day already -# exist on disk. In other cases, the sequence we loop over will be a -# subset of "6 12 18 24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 6 hours because the archives are 6-hourly. -arcv_hr_incr=6 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. Thus, loop over the relevant archives that contain obs for -# the day given by yyyymmdd_task and retrieve files as needed. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_NDAS}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) - arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the current day's observation retrieval times fall in the range -spanned by the current ${arcv_hr_incr}-hourly archive file. The bounds of the current -archive are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - # Note that for the specific case of NDAS obs, this will get all 7 obs - # files in the current archive, although we will make use of only 6 of - # these (we will not use the tm00 file). - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For NDAS obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - # - # Note that the tm06 file in a given archive contain more/better observations - # than the tm00 file in the next archive (their valid times are equivalent), - # so we use the tm06 files. - for hrs_ago in $(seq --format="%02g" 6 -${NDAS_OBS_AVAIL_INTVL_HRS} 1); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - ${mv_or_cp} ${fp_raw} ${fp_proc} - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh deleted file mode 100755 index 5c56f8a8df..0000000000 --- a/ush/get_obs_nohrsc.sh +++ /dev/null @@ -1,372 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 -# -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} - 24 % NOHRSC_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Accumulation period to use when getting obs files. This is simply (a -# properly formatted version of) the obs availability interval. -accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 24-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "0 24". This will -# be the case if the observation retrieval times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "0 24", e.g. just "0" or just "24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 24 hours because the archives are 24-hourly. -arcv_hr_incr=24 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(( hr_first/arcv_hr_incr )) -arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -if [[ ${hr_last} -eq 0 ]]; then - arcv_hr_end=24 -else - arcv_hr_end=$(( hr_last/arcv_hr_incr )) - arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) -fi - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(( hr/arcv_hr_incr )) - arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. The NOHRSC data on HPSS are archived by day, with the -# archive for a given day containing 6-hour as well as 24-hour grib2 -# files. The four 6-hour files are for accumulated snowfall at 00z -# (which represents accumulation over the last 6 hours of the previous -# day), 06z, 12z, and 18z, while the two 24-hour files are at 00z (which -# represents accumulation over all 24 hours of the previous day) and 12z -# (which represents accumulation over the last 12 hours of the previous -# day plus the first 12 hours of the current day). -# -# Here, we will only obtain the 6-hour files. In other workflow tasks, -# the values in these 6-hour files will be added as necessary to obtain -# accumulations over longer periods (e.g. 24 hours). Since the four -# 6-hour files are in one archive and are relatively small (on the order -# of kilobytes), we get them all with a single call to the retrieve_data.py -# script. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_NOHRSC}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv}" +%Y%m%d%H) - hrs=$((arcv_hr_incr - 1)) - arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the times in the current day (or hour 00 of the next day) at which -obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly -archive file. The bounds of the data in the current archive file are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For NOHRSC obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - for hrs in $(seq 0 ${NOHRSC_OBS_AVAIL_INTVL_HRS} 23); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - ${mv_or_cp} ${fp_raw} ${fp_proc} - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh new file mode 100755 index 0000000000..b2df03c56c --- /dev/null +++ b/ush/run_eval_METplus_timestr_tmpl.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +#OBS_DIR="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa" +#OBS_CCPA_APCP_FN_TEMPLATE="{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" + +#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush" +#yyyymmdd_task="20230217" +#lhr="22" +#METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" + +#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush"; yyyymmdd_task="20230217"; lhr="22"; METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" +set -u +. $USHdir/source_util_funcs.sh +eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ + outvarname_evaluated_timestr="fp_proc" +echo "${fp_proc}" + +# METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ From 7d684057f7e73b75804549735fdd1fbf3830b5e7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 12:51:33 -0600 Subject: [PATCH 101/131] Clean up and clarify comments; calculate list of processed obs file paths only once and save for later use; other minor code improvements. --- ush/get_obs.py | 202 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 132 insertions(+), 70 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index f6e2fed265..84e49e6f40 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -19,8 +19,8 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ This file defines a function that, for the given observation type, obs - archive interval, and hour of day, returns the hour (counting from the - start of the day) corresponding to the archive file in which the obs file + archive interval, and hour of day, returns the hour (counting from hour + zero of the day) corresponding to the archive file in which the obs file for the given hour of day is included. Note that for cumulative fields (like CCPA and NOHRSC, as opposed to @@ -38,8 +38,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): arcv_intvl_hrs: Time interval (in hours) between archive files. An integer. For example, if the obs files are bundled into 6-hourly archives, then this will be - set to 6. This must be between 1 and 24 and must divide evenly into 24 - (this is checked for elsewhere). + set to 6. This must be between 1 and 24 and must divide evenly into 24. hod: The hour of the day. An integer. This must be between 0 and 23. For @@ -52,26 +51,45 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): the obs file for the given hour of day. An integer. """ - valid_obtypes = ['CCPA', 'ccpa', 'NOHRSC', 'nohrsc', 'MRMS', 'mrms', 'NDAS', 'ndas'] - if obtype not in valid_obtypes: + valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obtype_upper = obtype.upper() + if obtype_upper not in valid_obtypes: msg = dedent(f""" - The specified observation type is not supported: - obtype = {obtype} + The specified observation type (after converting to upper case) is not + supported: + obtype_upper = {obtype_upper} Valid observation types are: - {valid_obtypes} + {valid_obtypes} """) logging.error(msg) raise Exception(msg) + # Ensure that the archive inerval divides evenly into 24 hours. + remainder = 24 % arcv_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The archive interval for obs of type {obtype} must divide evenly into 24 + but doesn't: + arcv_intvl_hrs = {arcv_intvl_hrs} + 24 % arcv_intvl_hrs = {remainder} + """) + logging.error(msg) + raise Exception(msg) + if (hod < 0) or (hod > 23): msg = dedent(f""" - The specified hour-of-day must be between 0 and 23, inclusive but isn't: - hod = {hod} + The specified hour-of-day must be between 0 and 23, inclusive, but isn't: + hod = {hod} """) logging.error(msg) raise Exception(msg) - obtype_upper = obtype.upper() + # Set the archive hour. This depends on the obs type because each obs + # type can organize its observation files into archives in a different + # way, e.g. a cumulative obs type may put the obs files for hours 1 + # through 6 of the day in the archive labeled with hour 6 while an + # instantaneous obs type may put the obs files for hours 0 through 5 of + # the day in the archive labeled with hour 6. if obtype_upper in ['CCPA']: if hod == 0: arcv_hr = 24 @@ -199,20 +217,48 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder} """) + logging.error(msg) raise Exception(msg) - # For convenience, get obs availability interval as a datetime object. + # For convenience, convert the obs availability interval to a datetime + # object. obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) # Get the base directory for the observations. key = obtype + '_OBS_DIR' obs_dir = config['platform'][key] - # Set the group of fields for each observation type. We assume there is - # a separate obs file type for each such field group in the observations. + # For each observation type, set the group of fields contained in those + # observation files that we need for verification. Each group of fields + # is one that is verified together in the workflow. We assume there is + # a separate set of obs files for each such field group in the observations, + # and in the code below we loop over these sets of files as necessary. + # There are several scenarios to consider: + # + # * An obs type consists of only one set of files containing only one + # field. + # This is the case for CCPA and NOHRSC obs. CCPA obs consist only one + # set of files that contain APCP data, and NOHRSC obs consist of only + # one set of files that contain ASNOW data. + # + # * An obs type consists of more than one set of files, with each file + # containing a different field. + # This is the case for MRMS obs. These consist of two sets of files. + # The first set contains REFC data, and the second contains RETOP data. + # + # * An obs type consists of only one set of files, but each file contains + # multiple groups of fields needed for verification. + # This is the case for NDAS obs. These consist of a single set of files, + # but each file contains both the ADPSFC fields (like 2-m temperature) + # and ADPUPA fields (like 500-mb temperature) that are verified separately + # in the workflow tasks and thus are considered separate field groups. + # + # Other obs type and field group scenarios are also possible, but we do + # not describe them since they are not applicable to any of the obs types + # considered here. if obtype == 'CCPA': field_groups_in_obs = ['APCP'] elif obtype == 'NOHRSC': @@ -225,8 +271,9 @@ def get_obs(config, obtype, yyyymmdd_task): # For each field group in the observations, get the METplus file name # template for the observation files. Then combine these with the base - # directory to get the METplus template for the full path to the processed - # obs files. + # directory to get the METplus template for the full path on disk to + # the processed obs files. If obs files do not already exist at these + # locations, they will be retrieved from HPSS and placed at these locations. fp_proc_templates = [] for fg in field_groups_in_obs: key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' @@ -241,8 +288,8 @@ def get_obs(config, obtype, yyyymmdd_task): # # For cumulative obs, set the accumulation period to use when getting obs - # files. This is simply (a properly formatted version of) the obs - # availability interval. + # files. This is simply a properly formatted version of the obs availability + # interval. accum_obs_formatted = None if obtype == 'CCPA': accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' @@ -264,8 +311,8 @@ def get_obs(config, obtype, yyyymmdd_task): else: msg = dedent(f""" Invalid field specified for obs type: - obtype = {obtype} - field = {field} + obtype = {obtype} + field = {field} """) logging.error(msg) raise Exception(msg) @@ -433,19 +480,23 @@ def get_obs(config, obtype, yyyymmdd_task): one_hour = dt.timedelta(hours=1) ushdir = config['user']['USHdir'] - # Check whether any obs files already exist on disk in their processed - # (i.e. final) locations. Here, by "processed" we mean after any renaming - # and rearrangement of files that this script may do to the "raw" files, - # i.e. the files as they are named and arranged within the archive (tar) - # files on HPSS. If so, adjust the starting archive hour. In the process, - # keep a count of the number of obs files that already exist on disk. - num_existing_files = 0 - do_break = False - for yyyymmddhh in obs_retrieve_times_crnt_day: - - for fp_proc_templ in fp_proc_templates: - # Set the full path to the final processed obs file (fp_proc). + # Create dictionary containing the paths to all the processed obs files + # that should exist once this script successfully completes. In this + # dictionary, the keys are the field groups, and the values are lists of + # paths. Here, by "paths to processed files" we mean the paths after any + # renaming and rearrangement of files that this script may do to the "raw" + # files, i.e. the files as they are named and arranged within the archive + # (tar) files on HPSS. + all_fp_proc_dict = {} + for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): + all_fp_proc_dict[fg] = [] + for yyyymmddhh in obs_retrieve_times_crnt_day: + # Set the lead hour, i.e. the number of hours from the beginning of the + # day at which the file is valid. lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + # Call a bash script to evaluate the template for the full path to the + # file containing METplus timestrings at the current time. This should + # be upgraded to a python script at some point. cmd = '; '.join(['export USHdir=' + ushdir, 'export yyyymmdd_task=' + yyyymmdd_task_str, 'export lhr=' + str(lhr), @@ -453,13 +504,22 @@ def get_obs(config, obtype, yyyymmdd_task): os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) fp_proc = result.stdout.strip() + all_fp_proc_dict[fg].append(fp_proc) - # Check whether file already exists. + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. If so, adjust the starting archive hour. In + # the process, keep a count of the number of obs files that already exist + # on disk. + num_existing_files = 0 + do_break = False + for fg in field_groups_in_obs: + for yyyymmddhh, fp_proc in zip(obs_retrieve_times_crnt_day, all_fp_proc_dict[fg]): + # Check whether the processed file already exists. if os.path.isfile(fp_proc): num_existing_files += 1 msg = dedent(f""" File already exists on disk: - fp_proc = {fp_proc} + fp_proc = {fp_proc} """) logging.info(msg) else: @@ -467,15 +527,14 @@ def get_obs(config, obtype, yyyymmdd_task): arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) msg = dedent(f""" File does not exist on disk: - fp_proc = {fp_proc} + fp_proc = {fp_proc} Setting the hour (since hour 0 of the current task day) of the first archive to retrieve to: - arcv_hr_start = {arcv_hr_start} + arcv_hr_start = {arcv_hr_start} """) logging.info(msg) do_break = True break - if do_break: break # If the number of obs files that already exist on disk is equal to the @@ -487,7 +546,7 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" All obs files needed for the current day (yyyymmdd_task) already exist on disk: - yyyymmdd_task = {yyyymmdd_task} + yyyymmdd_task = {yyyymmdd_task} Thus, there is no need to retrieve any files. """) logging.info(msg) @@ -503,14 +562,14 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: - yyyymmdd_task = {yyyymmdd_task} + yyyymmdd_task = {yyyymmdd_task} The number of obs files needed for the current day is: - num_files_needed = {num_files_needed} + num_files_needed = {num_files_needed} The number of obs files that already exist on disk is: - num_existing_files = {num_existing_files} + num_existing_files = {num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since hour 0 of the current day): - arcv_hrs = {arcv_hrs} + arcv_hrs = {arcv_hrs} """) logging.info(msg) # @@ -617,10 +676,10 @@ def get_obs(config, obtype, yyyymmdd_task): hour 0 of the next day if considering a cumulative obs type) fall in the range spanned by the current {arcv_intvl_hrs}-hourly archive file. The bounds of the data in the current archive are: - arcv_contents_start = {arcv_contents_start} - arcv_contents_end = {arcv_contents_end} + arcv_contents_start = {arcv_contents_start} + arcv_contents_end = {arcv_contents_end} The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} """) logging.info(msg) @@ -665,6 +724,18 @@ def get_obs(config, obtype, yyyymmdd_task): result = subprocess.run(cmd, shell=True, capture_output=True, text=True) rc = result.returncode + # Get the list of times corresponding to the obs files in the current + # archive. This is a list of datetime objects. + if obtype == 'CCPA': + obs_times_in_arcv = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + obs_times_in_arcv = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + obs_times_in_arcv.sort() + # Loop over the raw obs files extracted from the current archive and # generate from them the processed obs files. # @@ -685,24 +756,21 @@ def get_obs(config, obtype, yyyymmdd_task): # them in the process. Note that the tm06 file in a given archive contain # more/better observations than the tm00 file in the next archive (their # valid times are equivalent), so we use the tm06 files. - if obtype == 'CCPA': - in_arcv_times = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'NOHRSC': - in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'MRMS': - in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'NDAS': - in_arcv_times = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - in_arcv_times.sort() - - for yyyymmddhh in in_arcv_times: + for yyyymmddhh in obs_times_in_arcv: # Create the processed obs file from the raw one (by moving, copying, or # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. + # also exists in the list of obs retrieval times for the current day. We + # need to check this because it is possible that some of the obs retrieval + # times come before the range of times spanned by the current archive while + # the others come after, but none fall within that range. This can happen + # because the set of archive hours over which we are looping were constructed + # above without considering whether there are obs retrieve time gaps that + # make it unnecessary to retrieve some of the archives between the first + # and last ones that must be retrieved. if yyyymmddhh in obs_retrieve_times_crnt_day: - for i, fp_proc_templ in enumerate(fp_proc_templates): + for i, fg in enumerate(field_groups_in_obs): # For MRMS obs, first select from the set of raw files for the current day # those that are nearest in time to the current hour. Unzip these in a @@ -752,16 +820,10 @@ def get_obs(config, obtype, yyyymmdd_task): fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' fp_raw = os.path.join(arcv_dir_raw, fn_raw) - # Set the full path to the final processed obs file (fp_proc) we want to + # Get the full path to the final processed obs file (fp_proc) we want to # create. - lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) - cmd = '; '.join(['export USHdir=' + ushdir, - 'export yyyymmdd_task=' + yyyymmdd_task_str, - 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, - os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - fp_proc = result.stdout.strip() + indx = obs_retrieve_times_crnt_day.index(yyyymmddhh) + fp_proc = all_fp_proc_dict[fg][indx] # Make sure the directory in which the processed file will be created exists. dir_proc = os.path.dirname(fp_proc) From 2b4c9569bcbb70656ed99edc0c16b4162ea61b6c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:22:52 -0600 Subject: [PATCH 102/131] Minor cleanup. --- scripts/exregional_get_verif_obs.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 158218889e..d1ee4116e8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -46,8 +46,8 @@ done # #----------------------------------------------------------------------- # -# Make sure the obs type is valid. Then call the python script get_obs.py -# to get the obs files. +# Make sure the obs type is valid. Then call a python script to check +# for the presence of obs files on disk and get them if needed. # #----------------------------------------------------------------------- # @@ -67,7 +67,8 @@ python3 -u ${USHdir}/${script_bn}.py \ --var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ --obtype ${OBTYPE} \ --obs_day ${PDY}" -print_info_msg "CALLING: ${cmd}" +print_info_msg " +CALLING: ${cmd}" ${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." # #----------------------------------------------------------------------- From a35f240400709838ebda05198891e2fde5e804cc Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:32:47 -0600 Subject: [PATCH 103/131] Remove unneeded environment variables. --- parm/wflow/verify_pre.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 220b029412..567f045188 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -29,7 +29,6 @@ task_get_obs_ccpa: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -41,7 +40,6 @@ task_get_obs_nohrsc: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -53,7 +51,6 @@ task_get_obs_mrms: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' MRMS_FIELDS: 'REFC RETOP' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' @@ -66,7 +63,6 @@ task_get_obs_ndas: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' From 9d7c0478b09e3637ad8e8967cb5ea4f0582030af Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:38:18 -0600 Subject: [PATCH 104/131] Move the two sets of variables [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR and REMOVE_RAW_OBS_[CCPA|NOHRSC|MRMS|NDAS] in the default app configuration file from the "platform" section to the "verification" section so that they are closer to the METplus file name template variables OBS_[...]_FN_TEMPLATE that they are closely coupled with. --- parm/wflow/default_workflow.yaml | 8 +- tests/WE2E/run_WE2E_tests.py | 6 +- ush/config_defaults.yaml | 186 +++++++++++++++---------------- ush/get_obs.py | 4 +- 4 files changed, 99 insertions(+), 105 deletions(-) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index 39b66fc95c..4ffb6f288a 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -4,7 +4,7 @@ rocoto: entities: ACCOUNT: '{{ user.ACCOUNT }}' - CCPA_OBS_DIR: '{{ platform.CCPA_OBS_DIR }}' + CCPA_OBS_DIR: '{{ verification.CCPA_OBS_DIR }}' COLDSTART: '{{ workflow.COLDSTART }}' COMINgfs: '{{ platform.get("COMINgfs") }}' GLOBAL_VAR_DEFNS_FP: '{{ workflow.GLOBAL_VAR_DEFNS_FP }}' @@ -14,10 +14,10 @@ rocoto: LOAD_MODULES_RUN_TASK: '{{ workflow.LOAD_MODULES_RUN_TASK_FP }} {{ user.MACHINE }}' LOGEXT: ".log" NET: '{{ nco.NET_default }}' - MRMS_OBS_DIR: '{{ platform.MRMS_OBS_DIR }}' + MRMS_OBS_DIR: '{{ verification.MRMS_OBS_DIR }}' NCORES_PER_NODE: '{{ platform.NCORES_PER_NODE }}' - NDAS_OBS_DIR: '{{ platform.NDAS_OBS_DIR }}' - NOHRSC_OBS_DIR: '{{ platform.NOHRSC_OBS_DIR }}' + NDAS_OBS_DIR: '{{ verification.NDAS_OBS_DIR }}' + NOHRSC_OBS_DIR: '{{ verification.NOHRSC_OBS_DIR }}' PARTITION_DEFAULT: '{{ platform.get("PARTITION_DEFAULT") }}' PARTITION_FCST: '{{ platform.get("PARTITION_FCST") }}' PARTITION_HPSS: '{{ platform.get("PARTITION_HPSS") }}' diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index d3c2cb98ab..6a3e3bc7f4 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -207,13 +207,11 @@ def run_we2e_tests(homedir, args) -> None: # obs. If so, and if the config file does not explicitly set the observation locations, # fill these in with defaults from the machine files obs_vars = ['CCPA_OBS_DIR','MRMS_OBS_DIR','NDAS_OBS_DIR','NOHRSC_OBS_DIR'] - if 'platform' not in test_cfg: - test_cfg['platform'] = {} for obvar in obs_vars: mach_path = machine_defaults['platform'].get('TEST_'+obvar) - if not test_cfg['platform'].get(obvar) and mach_path: + if not test_cfg['verification'].get(obvar) and mach_path: logging.debug(f'Setting {obvar} = {mach_path} from machine file') - test_cfg['platform'][obvar] = mach_path + test_cfg['verification'][obvar] = mach_path if args.compiler == "gnu": # 2D decomposition doesn't work with GNU compilers. Deactivate 2D decomposition for GNU diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 8a02964cc2..9750724494 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -276,72 +276,6 @@ platform: # #----------------------------------------------------------------------- # - # Set METplus parameters. Definitions: - # - # CCPA_OBS_DIR: - # User-specified location of the directory where CCPA hourly - # precipitation files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure, as well as important caveats about - # errors in the metadata and file names. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NOHRSC_OBS_DIR: - # User-specified location of top-level directory where NOHRSC 6- and - # 24-hour snowfall accumulation files used by METplus are located (or, - # if retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # MRMS_OBS_DIR: - # User-specified location of the directory where MRMS composite - # reflectivity and echo top files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in the scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NDAS_OBS_DIR: - # User-specified location of top-level directory where NDAS prepbufr - # files used by METplus are located (or, if retrieved by the workflow, - # where they will be placed). See comments in file - # scripts/exregional_get_verif_obs.sh for more details about files - # and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - #----------------------------------------------------------------------- - # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # - #----------------------------------------------------------------------- - # - # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: - # Boolean flag specifying whether to remove the "raw" observation - # directories after pulling the specified type of obs (CCPA, MRMS, - # NDAS, or NOHRSC). The raw directories are the ones in which the - # observation files are placed immediately after pulling them from - # a data store (e.g. NOAA's HPSS) but before performing any processing - # on them (e.g. renaming the files or reorganizing their directory - # structure). - # - #----------------------------------------------------------------------- - # - REMOVE_RAW_OBS_CCPA: true - REMOVE_RAW_OBS_MRMS: true - REMOVE_RAW_OBS_NDAS: true - REMOVE_RAW_OBS_NOHRSC: true - # - #----------------------------------------------------------------------- - # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, @@ -2423,37 +2357,66 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # - # Time interval (in hours) at which various types of obs are available on - # NOAA's HPSS. - CCPA_OBS_AVAIL_INTVL_HRS: 1 - NOHRSC_OBS_AVAIL_INTVL_HRS: 6 - MRMS_OBS_AVAIL_INTVL_HRS: 1 - NDAS_OBS_AVAIL_INTVL_HRS: 1 + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR: + # Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + # the verification tasks are located. If the files do not exist, they + # will be retrieved and placed under this directory. # - # Templates for CCPA, MRMS, and NDAS observation files. - # - # OBS_CCPA_APCP_FN_TEMPLATE: - # File name template for CCPA accumulated precipitation (APCP) observations. - # This template is used by the workflow tasks that call the METplus PcpCombine - # tool on CCPA obs to find the input observation files containing 1-hour - # APCP and then generate NetCDF files containing either 1-hour or greater - # than 1-hour APCP. - # - # OBS_NOHRSC_ASNOW_FN_TEMPLATE: - # File name template for NOHRSC snow observations. - # - # OBS_MRMS_REFC_FN_TEMPLATE: - # File name template for MRMS reflectivity observations. - # - # OBS_MRMS_RETOP_FN_TEMPLATE: - # File name template for MRMS echo top observations. + # Notes: # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: - # File name template for NDAS surface and upper air observations. - # This template is used by the workflow tasks that call the METplus Pb2nc - # tool on NDAS obs to find the input observation files containing ADP - # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate - # NetCDF versions of these files. + # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), then + # the user must have write permission to this directory. Otherwise, + # the get_obs tasks that attempt to create these files will fail. + # + # * Do not set two or more of these directories to the same location. + # Otherwise, unexpected results and data loss may occur. + # + # * The script ush/get_obs.py contains further details on the files and + # directory structure of each obs type. + # + # * CCPA obs contain errors in the metadata for a certain range of dates + # that need to be corrected during obs retrieval. This is described + # in more detail in ush/get_obs.py. + # + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # + # OBS_[CCPA_APCP|NOHRSC_ASNOW|MRMS_[REFC|RETOP]|NDAS_ADPSFCandADPUPA]_FN_TEMPLATE: + # File name templates for various obs type and vx field group combinations. + # + # Notes: + # + # * These are relative to the obs base directories + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # defined above. Thus, the full template to the obs files is given, e.g. + # for CCPA obs, by {CCPA_OBS_DIR}/{OBS_CCPA_APCP_FN_TEMPLATE}. + # + # * These may represent file names only, or they may include relative paths + # before the file names. + # + # * These templates must contain full information about the year, month, + # day, and hour by including METplus time strings that serve as templates + # for this information. Some of this information may be in the relative + # directory portion and the rest in the file name, or there may be no + # relative directory portion and all of it may be in the file name, but + # all four pieces of timing information must be present somewhere in + # this template as METplus time strings. Otherwise, obs files created + # by the get_obs tasks for different days might overwrite each other. + # + # * If one or more of the obs files specified by this full path do not + # exist on disk, all the files will be created by first retrieving "raw" + # versions of them from a data store (e.g. NOAA's HPSS) and then placing + # these raw files in the locations specified by this full path template. + # + # * The raw obs files, i.e. the obs files as they are named and arranged + # in the data stores, may be different than the file path/name specified + # in these variables. The list of templates for raw files to search + # for in the data stores is given in the data retrieval configuration + # file at parm/data_locations.yml. Once retrieved, these raw files are + # renamed and relocated on disk to the locations specified by + # {..._OBS_DIR}/{..._FN_TEMPLATE}. # OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' @@ -2462,6 +2425,32 @@ verification: OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + # + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + # + # Note that MRMS files are in fact available every few minutes, but here + # we set the obs availability interval to 1 hour because the forecast + # cannot (yet) support sub-hourly output. + # + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # + # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # Boolean flag specifying whether to remove the "raw" observation + # directories after pulling the specified type of obs (CCPA, NOHRSC, + # MRMS, or NOHRSC). The raw directories are the ones in which the + # observation files are placed immediately after pulling them from + # a data store (e.g. NOAA's HPSS) but before performing any processing + # on them (e.g. renaming the files or reorganizing their directory + # structure). + # + REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_MRMS: true + REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_NOHRSC: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2470,6 +2459,13 @@ verification: # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # Template used to specify the names of the output NetCDF observation + # files generated by the worfklow verification tasks that call the METplus + # PcpCombine tool on NOHRSC observations. (These files will contain obs + # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF + # format.) + # # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: # Template used to specify the names of the output NetCDF observation # files generated by the worfklow verification tasks that call the diff --git a/ush/get_obs.py b/ush/get_obs.py index 84e49e6f40..b70d8c3ea9 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -229,7 +229,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Get the base directory for the observations. key = obtype + '_OBS_DIR' - obs_dir = config['platform'][key] + obs_dir = vx_config[key] # For each observation type, set the group of fields contained in those # observation files that we need for verification. Each group of fields @@ -603,7 +603,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Whether to remove raw observations after processed directories have # been created from them. key = 'REMOVE_RAW_OBS_' + obtype - remove_raw_obs = config['platform'][key] + remove_raw_obs = vx_config[key] # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. From 9ac85a30cf2cb20682398c9b488acb160e34ee08 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 17:21:33 -0600 Subject: [PATCH 105/131] Fixes to WE2E test config files to reflect moving of variables from "platform" section to "verification" section in config_defaults.yaml. --- ...g.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 5 +-- ...nsemble_verification_only_vx_time_lag.yaml | 10 ++--- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 37 ++++++++++--------- ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 37 ++++++++++--------- ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 37 ++++++++++--------- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 37 ++++++++++--------- 10 files changed, 159 insertions(+), 152 deletions(-) diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index d773c632e2..0caffe5a46 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -55,10 +55,9 @@ task_run_fcst: task_run_post: POST_OUTPUT_DOMAIN_NAME: custom_ESGgrid_Michigan_Ontario verification: - VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] -platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' + VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km + VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index d0edccca01..f26ae7db21 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -28,14 +28,14 @@ global: NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' -# If the following is commented out, then the obs files staged on each -# platform will be (found and) used. -platform: + +verification: + # If the following is commented out, then the obs files staged on each + # platform will be (found and) used. CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ndas/proc' - -verification: + # VX_FCST_MODEL_NAME: FV3_GFS_v15p2_CONUS_25km VX_FCST_INPUT_BASEDIR: '{{ platform.get("TEST_VX_FCST_INPUT_BASEDIR") }}' VX_NDIGITS_ENSMEM_NAMES: 1 diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 418e47e95e..3286066021 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 913d5093bb..3963a616b4 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index a859a03ac8..23035f3a92 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 2e180e2714..10ceddd9a8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 37c3eceb24..c4f62a679d 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -16,24 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -55,6 +37,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 563b8852a8..55cbf5b13f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 514dbed8d3..20cab966ef 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index 6069ce8212..10ff318dd9 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -16,24 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' verification: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 09f8531580c6eee8f806aa6cb8bb99f110bdd7aa Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 19:51:05 -0600 Subject: [PATCH 106/131] Fix bug found in latest develop branch. --- ush/get_crontab_contents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index 5c651f3b0c..82bb350a0e 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -224,7 +224,7 @@ def _parse_args(argv): ) # Check that inputs are correct and consistent - args = parser._parse_args(argv) + args = parser.parse_args(argv) if args.remove: if args.line is None: From b43a9d223b5054e74dd7e0f6d2a3e89e78ed9574 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 10:15:22 -0600 Subject: [PATCH 107/131] Fix up documentation and comments. Minor code changes. --- ush/config_defaults.yaml | 3 +- ush/get_obs.py | 465 ++++++++++++++++++++++----------------- 2 files changed, 263 insertions(+), 205 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index e50e51406d..1e967ef9e4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2419,7 +2419,8 @@ verification: # NOAA's HPSS. # # Note that MRMS files are in fact available every few minutes, but here - # we set the obs availability interval to 1 hour because the forecast + # we set the obs availability interval to 1 hour because currently that + # is the shortest output interval for the forecast, i.e. the forecast # cannot (yet) support sub-hourly output. # CCPA_OBS_AVAIL_INTVL_HRS: 1 diff --git a/ush/get_obs.py b/ush/get_obs.py index b70d8c3ea9..c831ad909e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -110,92 +110,184 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): def get_obs(config, obtype, yyyymmdd_task): """ -This script performs several important tasks for preparing data for -verification tasks. Depending on the value of the environment variable -OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -set. + This script checks for the existence of obs files of the specified type + at the locations specified by variables in the SRW App's configuration + file. If one or more of these files do not exist, it retrieves them from + a data store and places them in the locations specified by the configuration + variables, renaming them if necessary. -If data is not available on disk (in the location specified by -CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -the script attempts to retrieve the data from HPSS using the retrieve_data.py -script. Depending on the data set, there are a few strange quirks and/or -bugs in the way data is organized; see in-line comments for details. - - -CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 - -If data is retrieved from HPSS, it will be automatically staged by this -script. - -Notes about the data and how it's used for verification: - -1. Accumulation is currently hardcoded to 01h. The verification will -use MET/pcp-combine to sum 01h files into desired accumulations. - -2. There is a problem with the valid time in the metadata for files -valid from 19 - 00 UTC (or files under the '00' directory). This is -accounted for in this script for data retrieved from HPSS, but if you -have manually staged data on disk you should be sure this is accounted -for. See in-line comments below for details. - - -MRMS (Multi-Radar Multi-Sensor) radar observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, - -Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -data and EchoTop_18_00.50_ for echo top data. If data is not available -at the top of the hour, you should rename the file closest in time to -your hour(s) of interest to the above naming format. A script -"ush/mrms_pull_topofhour.py" is provided for this purpose. - -If data is retrieved from HPSS, it will automatically staged by this -this script. - - -NDAS (NAM Data Assimilation System) conventional observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} - -Note that data retrieved from HPSS and other sources may be in a -different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -either 00, 06, 12, or 18, and prevhour is the number of hours prior to -hh (00 through 05). If using custom staged data, you will have to -rename the files accordingly. - -If data is retrieved from HPSS, it will be automatically staged by this -this script. - - -NOHRSC snow accumulation observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 + Args: + config: + The final configuration dictionary (obtained from var_defns.yaml). -where AA is the 2-digit accumulation duration in hours: 06 or 24 + obtype: + The observation type. A string. -METplus is configured to verify snowfall using 06- and 24-h accumulated -snowfall from 6- and 12-hourly NOHRSC files, respectively. + yyyymmdd_task: + The date for which obs may be needed. A datetime object. -If data is retrieved from HPSS, it will automatically staged by this -this script. + Returns: + True if all goes well. + + + Detailed Description: + + In this script, the main (outer) loop to obtain obs files is over a + sequence of archive hours, where each archive hour in the sequence + represents one archive (tar) file in the data store, and archive hours + are with respect to hour 0 of the day. The number of archive hours in + this sequence depends on how the obs files are arranged into archives + for the given obs type. For example, if the obs files for a given day + are arranged into four archives, then the archive interval is 6 hours, + and in order to get all the obs files for that day, the loop must + iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, + 18, 24] (which of these it will be depends on how the obs files are + arranged into the archives). + + Below, we give a description of archive layout for each obs type and + give the archive hours to loop over for the case in which we need to + obtain all available obs for the current day. + + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation + accumulation obs: + ---------- + For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled + into 6-hourly archives. The archives are organized such that each one + contains 6 files, so that the obs availability interval is + + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + = 1 hr/file + + i.e. there is one obs file for each hour of the day containing the + accumulation over that one hour. The archive corresponding to hour 0 + of the current day contains 6 files representing accumulations during + the 6 hours of the previous day. The archive corresponding to hour 6 + of the current day contains 6 files for the accumulations during the + first 6 hours of the current day, and the archives corresponding to + hours 12 and 18 of the current day each contain 6 files for accumulations + during hours 6-12 and 12-18, respectively, of the current day. Thus, + to obtain all the one-hour accumulations for the current day, we must + extract all the obs files from the three archives corresponding to hours + 6, 12, and 18 of the current day and from the archive corresponding to + hour 0 of the next day. This corresponds to an archive hour sequence + of [6, 12, 18, 24]. Thus, in the simplest case in which the observation + retrieval times include all hours of the current task's day at which + obs files are available and none of the obs files for this day already + exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, + the sequence we loop over will be a subset of [6, 12, 18, 24]. + + Note that CCPA files for 1-hour accumulation have incorrect metadata in + the files under the "00" directory (i.e. for hours-of-day 19 to 00 of + the next day) from 20180718 to 20210504. This script corrects these + errors if getting CCPA obs at these times. + + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow + accumulation observations: + ---------- + For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each one contains 4 files, so that the obs availability interval is + + obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] + = 6 hr/file + + i.e. there is one obs file for each 6-hour interval of the day containing + the accumulation over those 6 hours. The 4 obs files within each archive + correspond to hours 0, 6, 12, and 18 of the current day. The obs file + for hour 0 contains accumulations during the last 6 hours of the previous + day, while those for hours 6, 12, and 18 contain accumulations for the + first, second, and third 6-hour chunks of the current day. Thus, to + obtain all the 6-hour accumulations for the current day, we must extract + from the archive for the current day the obs files for hours 6, 12, and + 18 and from the archive for the next day the obs file for hour 0. This + corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest + case in which the observation retrieval times include all hours of the + current task's day at which obs files are available and none of the obs + files for this day already exist on disk, this sequence will be [0, 24]. + In other cases, the sequence we loop over will be a subset of [0, 24]. + + + MRMS (Multi-Radar Multi-Sensor) radar observations: + ---------- + For MRMS, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each contains gzipped grib2 files for that day that are usually only a + few minutes apart. However, since the forecasts cannot (yet) perform + sub-hourly output, we filter this data in time by using only those obs + files that are closest to each hour of the day for which obs are needed. + This effectively sets the obs availability interval for MRMS to one + hour, i.e. + + obs_avail_intvl_hrs = 1 hr/file + + i.e. there is one obs file for each hour of the day containing values + at that hour (but only after filtering in time; also see notes for + MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the + obs at all hours of the day, we only need to extract files from one + archive. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, the sequence of archive hours over which we loop will be just + [0]. Note that: + + * For cases in which MRMS data are not needed for all hours of the day, + we still need to retrieve and extract from this single daily archive. + Thus, the archive hour sequence over which we loop over will always + be just [0] for MRMS obs. + + * Because MRMS obs are split into two sets of archives -- one for + composite reflectivity (REFC) and another for echo top (RETOP) -- + on any given day (and with an archive hour of 0) we actually retrive + and extract two different archive files (one per field). + + + NDAS (NAM Data Assimilation System) conventional observations: + ---------- + For NDAS, the archive interval is 6 hours, i.e. the obs files are + bundled into 6-hourly archives. The archives are organized such that + each one contains 7 files (not say 6). The archive associated with + time yyyymmddhh_arcv contains the hourly files at + + yyyymmddhh_arcv - 6 hours + yyyymmddhh_arcv - 5 hours + ... + yyyymmddhh_arcv - 2 hours + yyyymmddhh_arcv - 1 hours + yyyymmddhh_arcv - 0 hours + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + respectively. Thus, the tm06 file from the current archive, say the + one associated with time yyyymmddhh_arcv, has the same valid time as + the tm00 file from the previous archive, i.e. the one associated with + time (yyyymmddhh_arcv - 6 hours). It turns out that the tm06 file from + the current archive contains more/better observations than the tm00 + file from the previous archive. Thus, for a given archive time + yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not + the one at tm00, effectively resulting in 6 files per archive for NDAS + obs. The obs availability interval is then + + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + = 1 hr/file + + i.e. there is one obs file for each hour of the day containing values + at that hour. The archive corresponding to hour 0 of the current day + contains 6 files valid at hours 18 through 23 of the previous day. The + archive corresponding to hour 6 of the current day contains 6 files + valid at hours 0 through 5 of the current day, and the archives + corresponding to hours 12 and 18 of the current day each contain 6 + files valid at hours 6 through 11 and 12 through 17 of the current day. + Thus, to obtain all the hourly values for the current day (from hour + 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + ones) from the three archives corresponding to hours 6, 12, and 18 of + the current day and the archive corresponding to hour 0 of the next + day. This corresponds to an archive hour sequence set below of [6, 12, + 18, 24]. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, this sequence will be [6, 12, 18, 24]. In other cases, the + sequence we loop over will be a subset of [6, 12, 18, 24]. """ # Convert obtype to upper case to simplify code below. @@ -355,100 +447,6 @@ def get_obs(config, obtype, yyyymmdd_task): # To generate this sequence, we first set the archive interval and then # set the starting and ending archive hour values. # - # For CCPA, the archive interval is 6 hours, i.e. the obs files are - # bundled into 6-hourly archives. This implies 4 archives per day. The - # archives are organized such that each one contains 6 files, so that the - # obs availability interval is - # - # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - # = 1 hr/file - # - # i.e. there is one obs file for each hour of the day containing the - # accumulation over that one hour. The archive corresponding to hour 0 - # of the current day contains 6 files representing accumulations during - # the 6 hours of the previous day. The archive corresponding to hour 6 - # of the current day corresponds to accumulations during the first 6 - # hours of the current day, and the archives corresponding to hours 12 - # and 18 of the current day correspond to accumulations during the 2nd - # and 3rd 6-hourly intervals of the current day. Thus, to obtain all the - # one-hour accumulations for the current day, we must extract all the obs - # files from the archives corresponding to hours 6, 12, and 18 of the - # current day and hour 0 of the next day. This corresponds to an archive - # hour sequence set below of [6, 12, 18, 24]. Thus, in the simplest case - # in which the observation retrieval times include all hours of the - # current task's day at which obs files are available and none of the obs - # files for this day already exist on disk, this sequence will be [6, 12, - # 18, 24]. In other cases, the sequence we loop over will be a subset of - # [6, 12, 18, 24]. - # - # For NOHRSC, the archive interval is 24 hours, i.e. the obs files are - # bundled into 24-hourly archives. This implies just 1 archive per day. - # The archives are organized such that each one contains 4 files, so that - # the obs availability interval is - # - # obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] - # = 6 hr/file - # - # i.e. there is one obs file for each 6-hour interval of the day containing - # the accumulation over those 6 hours. The 4 obs files within each archive - # correspond to hours 0, 6, 12, and 18 of the current day. The obs file - # for hour 0 contains accumulations during the last 6 hours of the previous - # day, while those for hours 6, 12, and 18 contain accumulations for the - # first, second, and third 6-hour chunks of the current day. Thus, to - # obtain all the 6-hour accumulations for the current day, we must extract - # from the archive for the current day the obs files for hours 6, 12, and - # 18 and from the archive for the next day the obs file for hour 0. This - # corresponds to an archive hour sequence set below of [0, 24]. Thus, in - # the simplest case in which the observation retrieval times include all - # hours of the current task's day at which obs files are available and - # none of the obs files for this day already exist on disk, this sequence - # will be [0, 24]. In other cases, the sequence we loop over will be a - # subset of [0, 24]. - # - # For NDAS, the archive interval is 6 hours, i.e. the obs files are - # bundled into 6-hourly archives. This implies 4 archives per day. The - # archives are organized such that each one contains 7 files (not say 6). - # The archive associated with time yyyymmddhh_arcv contains the hourly - # files at - # - # yyyymmddhh_arcv - 6 hours - # yyyymmddhh_arcv - 5 hours - # ... - # yyyymmddhh_arcv - 2 hours - # yyyymmddhh_arcv - 1 hours - # yyyymmddhh_arcv - 0 hours - # - # These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, - # respectively. Thus, the tm06 file from the current archive, say the - # one associated with time yyyymmddhh_arcv, has the same valid time as - # the tm00 file from the previous archive, i.e. the one associated with - # time (yyyymmddhh_arcv - 6 hours). It turns out the tm06 file from the - # current archive contains more/better observations than the tm00 file - # from the previous archive. Thus, for a given archive time yyyymmddhh_arcv, - # we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, - # effectively resulting in an 6 files per archive for NDAS obs. The obs - # availability interval is then - # - # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - # = 1 hr/file - # - # i.e. there is one obs file for each hour of the day containing values - # at that hour. The archive corresponding to hour 0 of the current day - # contains 6 files valid at hours 18 through 23 of the previous day. The - # archive corresponding to hour 6 of the current day contains 6 files - # valid at hours 0 through 5 of the current day, and the archives - # corresponding to hours 12 and 18 of the current day each contain 6 - # files valid at hours 6 through 11 and 12 through 17 of the current day. - # Thus, to obtain all the hourly values for the current day (from hour - # 0 to hour 23), we must extract the 6 obs files (excluding the tm00 - # ones) from the archives corresponding to hours 6, 12, and 18 of the - # current day and the archive corresponding to hour 0 of the next day. - # This corresponds to an archive hour sequence set below of [6, 12, 18, - # 24]. Thus, in the simplest case in which the observation retrieval - # times include all hours of the current task's day at which obs files - # are available and none of the obs files for this day already exist on - # disk, this sequence will be [6, 12, 18, 24]. In other cases, the - # sequence we loop over will be a subset of [6, 12, 18, 24]. # #----------------------------------------------------------------------- # @@ -628,24 +626,62 @@ def get_obs(config, obtype, yyyymmdd_task): yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. + # Set the subdirectory under the raw base directory that will contain the + # files retrieved from the current archive. We refer to this as the "raw" + # archive sudirectory because it will contain the files as they are in + # the archive before any processing by this script. Later below, this + # will be combined with the raw base directory (whose name depends on the + # year, month, and day of the current obs day) to obtain the full path to + # the raw archive directory (arcv_dir_raw). + # + # Notes on each obs type: + # + # CCPA: + # The raw subdirectory name must include the year, month, day, and hour + # in order to avoid get_obs tasks for different days clobbering each + # others' obs files. + # + # NOHRSC: + # The hour-of-day of the archive is irrelevant because there is only one + # archive per day, so we don't include it in the raw archive subdirectory's + # name. However, we still need a subdirectory that contains the year, + # month, and day information of the archive because in the simplest case + # of having to get the NOHRSC obs for all hours of the current obs day, + # we need to extract obs files from two archives -- one for the current + # day (which includes the files for accumulations over hours 0-6, 6-12, + # and 12-18 of the current day) and another for the next day (which + # includes the file for accumulations over hours 18-24 of the current + # day). To distinguish between the raw obs files from these two archives, + # we create an archive-time dependent raw subdirectory for each possible + # archive. + # + # MRMS: + # There is only one archive per day, and it contains all the raw obs + # files needed to generate processed obs files for all hours of the + # current day. Thus, we will only ever need this one archive, so there + # is no need to include the archive's hour information (there really + # isn't any) in the raw subdirectory name. In addition, the archive's + # year, month, and day is the same as that of the obs day's, so it is + # already included in the name of the raw base directory. Sine this is + # the only info we need to avoid differnt get_obs tasks clobbering each + # other's output obs files, for simplicity we simply do not create a raw + # archive subdirectory. + # + # NDAS: + # Same as for CCPA. if obtype == 'CCPA': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) - # For NOHRSC, the hour-of-day for the archive is irrelevant since there - # is only one archive per day, so don't include it in the raw archive - # directory's name. + arcv_subdir_raw = yyyymmddhh_arcv_str elif obtype == 'NOHRSC': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmdd_arcv_str) - # Since for MRMS data there is only one archive per day, that directory - # is redundant, so for simplicity we set arcv_dir_raw to just basedir_raw. + arcv_subdir_raw = yyyymmdd_arcv_str elif obtype == 'MRMS': - arcv_dir_raw = basedir_raw + arcv_subdir_raw = '' elif obtype == 'NDAS': arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + # Combine the raw archive base directory with the raw archive subdirectory + # name to obtain the full path to the raw archive directory. + arcv_dir_raw = os.path.join(basedir_raw, arcv_subdir_raw) + # Check whether any of the obs retrieval times for the day associated with # this task fall in the time interval spanned by the current archive. If # so, set the flag (do_retrieve) to retrieve the files in the current @@ -739,23 +775,44 @@ def get_obs(config, obtype, yyyymmdd_task): # Loop over the raw obs files extracted from the current archive and # generate from them the processed obs files. # - # For CCPA obs, for most dates this consists of simply copying or moving - # the files from the raw archive directory to the processed directory, - # possibly renaming them in the process. However, for dates between - # 20180718 and 20210504 and hours-of-day 19 through the end of the day - # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # Notes on each obs type: + # + # CCPA: + # For most dates, generating the processed obs files consists of simply + # copying or moving the files from the raw archive directory to the processed + # directory, possibly renaming them in the process. However, for dates + # between 20180718 and 20210504 and hours-of-day 19 through the end of the + # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an # error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. - # - # For NOHRSC obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - # - # For NDAS obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. Note that the tm06 file in a given archive contain - # more/better observations than the tm00 file in the next archive (their - # valid times are equivalent), so we use the tm06 files. + # + # NOHRSC: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. + # + # MRMS: + # The MRMS obs are in fact available every few minutes, but the smallest + # value we allow the obs availability interval to be set to is 1 hour + # because the forecasts cannot (yet) perform sub-hourly output (also see + # notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). For this + # reason, MRMS obs require an extra processing step on the raw files (before + # creating the processed files). In this step, at each obs retrieval time + # we first generate an intermediate grib2 file from the set of all raw (and + # gzipped) grib2 files for the current day (the latter usually being only a + # few minutes apart) the file that is nearest in time to the obs retrieval + # time. After selecting this gzipped grib2 file, we unzip it and place it + # in a temporary subdirectory under the raw base directory. Only after this + # step do we then generate the processed file by moving this intermediate + # file to the processed directory, possibly renaming it in the process. + # + # NDAS: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. Note that for a given NDAS archive, + # the tm06 file in a contains more/better observations than the tm00 file + # in the previous archive (their valid times being equivalent), so we always + # use the tm06 files. for yyyymmddhh in obs_times_in_arcv: # Create the processed obs file from the raw one (by moving, copying, or @@ -870,7 +927,7 @@ def get_obs(config, obtype, yyyymmdd_task): def parse_args(argv): - """Parse command line arguments""" + """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Get observations." ) From 50729f5dc02a20c982653a97025bbdfecd256d90 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 10:56:36 -0600 Subject: [PATCH 108/131] Bug fix. --- ush/get_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index c831ad909e..d7833a70f7 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -676,7 +676,7 @@ def get_obs(config, obtype, yyyymmdd_task): elif obtype == 'MRMS': arcv_subdir_raw = '' elif obtype == 'NDAS': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + arcv_subdir_raw = yyyymmddhh_arcv_str # Combine the raw archive base directory with the raw archive subdirectory # name to obtain the full path to the raw archive directory. From 601284359644fd5547d91be0dc6c828bfa26814a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 11:30:29 -0600 Subject: [PATCH 109/131] Remove commented-out lines; remove trailing whitespace. --- parm/metplus/PcpCombine.conf | 1 - ush/get_obs.py | 76 ++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index de99871bed..04562dc14b 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -126,7 +126,6 @@ FCST_PCP_COMBINE_RUN = False # # Accumulation interval available in the input data. # -#{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 {{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. diff --git a/ush/get_obs.py b/ush/get_obs.py index d7833a70f7..50b7c45ae3 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -42,7 +42,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): hod: The hour of the day. An integer. This must be between 0 and 23. For - cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the + cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the next day, i.e. as the 24th hour of the current day. Returns: @@ -132,7 +132,7 @@ def get_obs(config, obtype, yyyymmdd_task): Detailed Description: - In this script, the main (outer) loop to obtain obs files is over a + In this script, the main (outer) loop to obtain obs files is over a sequence of archive hours, where each archive hour in the sequence represents one archive (tar) file in the data store, and archive hours are with respect to hour 0 of the day. The number of archive hours in @@ -143,22 +143,22 @@ def get_obs(config, obtype, yyyymmdd_task): iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, 18, 24] (which of these it will be depends on how the obs files are arranged into the archives). - + Below, we give a description of archive layout for each obs type and give the archive hours to loop over for the case in which we need to obtain all available obs for the current day. - - + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs: ---------- For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 6 files, so that the obs availability interval is - + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] = 1 hr/file - + i.e. there is one obs file for each hour of the day containing the accumulation over that one hour. The archive corresponding to hour 0 of the current day contains 6 files representing accumulations during @@ -176,23 +176,23 @@ def get_obs(config, obtype, yyyymmdd_task): obs files are available and none of the obs files for this day already exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, the sequence we loop over will be a subset of [6, 12, 18, 24]. - + Note that CCPA files for 1-hour accumulation have incorrect metadata in the files under the "00" directory (i.e. for hours-of-day 19 to 00 of the next day) from 20180718 to 20210504. This script corrects these errors if getting CCPA obs at these times. - - + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow accumulation observations: ---------- For NOHRSC, the archive interval is 24 hours, i.e. the obs files are bundled into 24-hourly archives. The archives are organized such that - each one contains 4 files, so that the obs availability interval is - + each one contains 4 files, so that the obs availability interval is + obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] = 6 hr/file - + i.e. there is one obs file for each 6-hour interval of the day containing the accumulation over those 6 hours. The 4 obs files within each archive correspond to hours 0, 6, 12, and 18 of the current day. The obs file @@ -201,14 +201,14 @@ def get_obs(config, obtype, yyyymmdd_task): first, second, and third 6-hour chunks of the current day. Thus, to obtain all the 6-hour accumulations for the current day, we must extract from the archive for the current day the obs files for hours 6, 12, and - 18 and from the archive for the next day the obs file for hour 0. This + 18 and from the archive for the next day the obs file for hour 0. This corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest case in which the observation retrieval times include all hours of the current task's day at which obs files are available and none of the obs files for this day already exist on disk, this sequence will be [0, 24]. In other cases, the sequence we loop over will be a subset of [0, 24]. - - + + MRMS (Multi-Radar Multi-Sensor) radar observations: ---------- For MRMS, the archive interval is 24 hours, i.e. the obs files are @@ -219,9 +219,9 @@ def get_obs(config, obtype, yyyymmdd_task): files that are closest to each hour of the day for which obs are needed. This effectively sets the obs availability interval for MRMS to one hour, i.e. - + obs_avail_intvl_hrs = 1 hr/file - + i.e. there is one obs file for each hour of the day containing values at that hour (but only after filtering in time; also see notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the @@ -231,33 +231,33 @@ def get_obs(config, obtype, yyyymmdd_task): are available and none of the obs files for this day already exist on disk, the sequence of archive hours over which we loop will be just [0]. Note that: - + * For cases in which MRMS data are not needed for all hours of the day, we still need to retrieve and extract from this single daily archive. Thus, the archive hour sequence over which we loop over will always be just [0] for MRMS obs. - + * Because MRMS obs are split into two sets of archives -- one for composite reflectivity (REFC) and another for echo top (RETOP) -- on any given day (and with an archive hour of 0) we actually retrive and extract two different archive files (one per field). - - + + NDAS (NAM Data Assimilation System) conventional observations: ---------- For NDAS, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 7 files (not say 6). The archive associated with - time yyyymmddhh_arcv contains the hourly files at - + time yyyymmddhh_arcv contains the hourly files at + yyyymmddhh_arcv - 6 hours yyyymmddhh_arcv - 5 hours ... yyyymmddhh_arcv - 2 hours yyyymmddhh_arcv - 1 hours yyyymmddhh_arcv - 0 hours - - These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, respectively. Thus, the tm06 file from the current archive, say the one associated with time yyyymmddhh_arcv, has the same valid time as the tm00 file from the previous archive, i.e. the one associated with @@ -267,10 +267,10 @@ def get_obs(config, obtype, yyyymmdd_task): yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, effectively resulting in 6 files per archive for NDAS obs. The obs availability interval is then - + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] = 1 hr/file - + i.e. there is one obs file for each hour of the day containing values at that hour. The archive corresponding to hour 0 of the current day contains 6 files valid at hours 18 through 23 of the previous day. The @@ -327,7 +327,7 @@ def get_obs(config, obtype, yyyymmdd_task): # observation files that we need for verification. Each group of fields # is one that is verified together in the workflow. We assume there is # a separate set of obs files for each such field group in the observations, - # and in the code below we loop over these sets of files as necessary. + # and in the code below we loop over these sets of files as necessary. # There are several scenarios to consider: # # * An obs type consists of only one set of files containing only one @@ -336,7 +336,7 @@ def get_obs(config, obtype, yyyymmdd_task): # set of files that contain APCP data, and NOHRSC obs consist of only # one set of files that contain ASNOW data. # - # * An obs type consists of more than one set of files, with each file + # * An obs type consists of more than one set of files, with each file # containing a different field. # This is the case for MRMS obs. These consist of two sets of files. # The first set contains REFC data, and the second contains RETOP data. @@ -344,13 +344,13 @@ def get_obs(config, obtype, yyyymmdd_task): # * An obs type consists of only one set of files, but each file contains # multiple groups of fields needed for verification. # This is the case for NDAS obs. These consist of a single set of files, - # but each file contains both the ADPSFC fields (like 2-m temperature) + # but each file contains both the ADPSFC fields (like 2-m temperature) # and ADPUPA fields (like 500-mb temperature) that are verified separately # in the workflow tasks and thus are considered separate field groups. # # Other obs type and field group scenarios are also possible, but we do # not describe them since they are not applicable to any of the obs types - # considered here. + # considered here. if obtype == 'CCPA': field_groups_in_obs = ['APCP'] elif obtype == 'NOHRSC': @@ -659,7 +659,7 @@ def get_obs(config, obtype, yyyymmdd_task): # There is only one archive per day, and it contains all the raw obs # files needed to generate processed obs files for all hours of the # current day. Thus, we will only ever need this one archive, so there - # is no need to include the archive's hour information (there really + # is no need to include the archive's hour information (there really # isn't any) in the raw subdirectory name. In addition, the archive's # year, month, and day is the same as that of the obs day's, so it is # already included in the name of the raw base directory. Sine this is @@ -785,12 +785,12 @@ def get_obs(config, obtype, yyyymmdd_task): # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an # error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. - # + # # NOHRSC: # Generating the processed obs files consists of simply copying or moving # the files from the raw archive directory to the processed directory, # possibly renaming them in the process. - # + # # MRMS: # The MRMS obs are in fact available every few minutes, but the smallest # value we allow the obs availability interval to be set to is 1 hour @@ -800,12 +800,12 @@ def get_obs(config, obtype, yyyymmdd_task): # creating the processed files). In this step, at each obs retrieval time # we first generate an intermediate grib2 file from the set of all raw (and # gzipped) grib2 files for the current day (the latter usually being only a - # few minutes apart) the file that is nearest in time to the obs retrieval + # few minutes apart) the file that is nearest in time to the obs retrieval # time. After selecting this gzipped grib2 file, we unzip it and place it # in a temporary subdirectory under the raw base directory. Only after this # step do we then generate the processed file by moving this intermediate # file to the processed directory, possibly renaming it in the process. - # + # # NDAS: # Generating the processed obs files consists of simply copying or moving # the files from the raw archive directory to the processed directory, @@ -832,7 +832,7 @@ def get_obs(config, obtype, yyyymmdd_task): # For MRMS obs, first select from the set of raw files for the current day # those that are nearest in time to the current hour. Unzip these in a # temporary subdirectory under the raw base directory. - # + # # Note that the script we call to do this (mrms_pull_topofhour.py) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up From 1c924a896664e6a815398c11299709bf4bf82465 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:13:08 -0600 Subject: [PATCH 110/131] Remove unnecessary bash utility function. --- ush/bash_utils/ceil.sh | 122 --------------------------------------- ush/source_util_funcs.sh | 9 --- 2 files changed, 131 deletions(-) delete mode 100644 ush/bash_utils/ceil.sh diff --git a/ush/bash_utils/ceil.sh b/ush/bash_utils/ceil.sh deleted file mode 100644 index dc8a21c90d..0000000000 --- a/ush/bash_utils/ceil.sh +++ /dev/null @@ -1,122 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This function returns the ceiling of the quotient of two numbers. The -# ceiling of a number is the number rounded up to the nearest integer. -# -#----------------------------------------------------------------------- -# -function ceil() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Check number of arguments. -# -#----------------------------------------------------------------------- -# - if [ "$#" -ne 2 ]; then - - print_err_msg_exit " -Incorrect number of arguments specified: - - Function name: \"${func_name}\" - Number of arguments specified: $# - -Usage: - - ${func_name} numer denom - -where denom is a nonnegative integer and denom is a positive integer. -" - - fi -# -#----------------------------------------------------------------------- -# -# Make sure arguments are of the right form. -# -#----------------------------------------------------------------------- -# - local numer="$1" - local denom="$2" - - if ! [[ "${numer}" =~ ^[0-9]+$ ]]; then - print_err_msg_exit " -The first argument to the \"${func_name}\" function (numer) must be a nonnegative -integer but isn't: - numer = ${numer} -" - fi - - if [[ "${denom}" -eq 0 ]]; then - print_err_msg_exit " -The second argument to the \"${func_name}\" function (denom) cannot be zero: - denom = ${denom} -" - fi - - if ! [[ "${denom}" =~ ^[0-9]+$ ]]; then - print_err_msg_exit " -The second argument to the \"${func_name}\" function (denom) must be a positive -integer but isn't: - denom = ${denom} -" - fi -# -#----------------------------------------------------------------------- -# -# Let ceil(a,b) denote the ceiling of the quotient of a and b. It can be -# shown that for two positive integers a and b, we have: -# -# ceil(a,b) = floor((a+b-1)/b) -# -# where floor(a,b) is the integer obtained by rounding the quotient of -# a and b (i.e. a/b) down to the nearest integer. Since in bash a -# division returns only the integer part of the result, it is effectively -# the floor function. Thus the following. -# -#----------------------------------------------------------------------- -# - result=$(( (numer+denom-1)/denom )) - print_info_msg "${result}" -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/func- -# tion. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index ef7c669910..9feceaf68e 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -96,15 +96,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that returns the ceiling of -# the quotient of two positive integers. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/ceil.sh -# -#----------------------------------------------------------------------- -# # Source the file containing the functions that will echo given strings # as uppercase or lowercase # From 9435f7f29accbee047ac5a8c0c679996e03ac9d7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:14:10 -0600 Subject: [PATCH 111/131] Clean up comments, remove commented-out code. --- ush/run_eval_METplus_timestr_tmpl.sh | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh index b2df03c56c..f5438be2f4 100755 --- a/ush/run_eval_METplus_timestr_tmpl.sh +++ b/ush/run_eval_METplus_timestr_tmpl.sh @@ -1,21 +1,13 @@ #!/usr/bin/env bash - # #----------------------------------------------------------------------- # -# Source the variable definitions file and the bash utility functions. +# This script is simply a wrapper to the eval_METplus_timestr_tmpl bash +# function. It is needed in order to enable the function to be called +# from a python script. # #----------------------------------------------------------------------- # -#OBS_DIR="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa" -#OBS_CCPA_APCP_FN_TEMPLATE="{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" - -#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush" -#yyyymmdd_task="20230217" -#lhr="22" -#METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" - -#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush"; yyyymmdd_task="20230217"; lhr="22"; METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" set -u . $USHdir/source_util_funcs.sh eval_METplus_timestr_tmpl \ @@ -24,5 +16,3 @@ eval_METplus_timestr_tmpl \ METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ outvarname_evaluated_timestr="fp_proc" echo "${fp_proc}" - -# METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ From 2218ca4e1ceabc9949fe8c1901066f8dcb1b0899 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:44:40 -0600 Subject: [PATCH 112/131] Remove unneeded variable from task. --- parm/wflow/verify_pre.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 567f045188..c239eae8d3 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -52,7 +52,6 @@ task_get_obs_mrms: envars: <<: *default_vars OBTYPE: 'MRMS' - MRMS_FIELDS: 'REFC RETOP' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" From ee5566b1aac7ee9ca27a2f98d50a3159c58031bd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 10:22:19 -0600 Subject: [PATCH 113/131] Fix typo. --- ush/set_cycle_and_obs_timeinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 634e646745..cae3bc37ee 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -72,7 +72,7 @@ def check_temporal_consistency_cumul_fields( This function reads in a subset of the parameters in the verification configuration dictionary and ensures that certain temporal constraints on these parameters are satisfied. It then returns an updated version of - the verification configuration dictionary that satisfies these constranints. + the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the cumulative forecast fields and corresponding observation type pairs that From befe769c7a8fe8c01b3119f18ee17744713fc7e3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 16:43:31 -0600 Subject: [PATCH 114/131] Fix typo. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 3286066021..ced46215d0 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 3963a616b4..97e1393864 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 23035f3a92..3ce4ff5f08 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 10ceddd9a8..3264c93eca 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index c4f62a679d..a7af3f27c9 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -38,7 +38,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 55cbf5b13f..a0f10d8b05 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 20cab966ef..429e8e0086 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index 10ff318dd9..aa4b731e3a 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -39,7 +39,7 @@ task_run_post: verification: # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these From 6dd8e20723f39d5e2cf628d18729614bea824b99 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 10 Oct 2024 09:01:14 -0600 Subject: [PATCH 115/131] Modify old test for set_cycle_dates to fit new version of this function. This includes adding a new test for the case in which the output should be a list of datetime objects (the default is for the output to be a list of strings). --- tests/test_python/test_set_cycle_dates.py | 39 +++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/tests/test_python/test_set_cycle_dates.py b/tests/test_python/test_set_cycle_dates.py index eb76f579c6..8baae643ac 100644 --- a/tests/test_python/test_set_cycle_dates.py +++ b/tests/test_python/test_set_cycle_dates.py @@ -1,20 +1,22 @@ """ Test set_cycle_dates.py """ -from datetime import datetime +from datetime import datetime, timedelta import unittest -from set_cycle_dates import set_cycle_dates +from set_cycle_and_obs_timeinfo import set_cycle_dates class Testing(unittest.TestCase): """ Define the tests""" - def test_set_cycle_dates(self): + + def test_set_cycle_dates_string(self): """ Test that the proper list of dates are produced given the - intput data""" + input data and return_type left to its default value (so the + output should be a list of strings)""" cdates = set_cycle_dates( - date_start=datetime(2022, 1, 1, 6), - date_end=datetime(2022, 1, 2, 12), - incr_cycl_freq=6, + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), ) self.assertEqual( cdates, @@ -27,3 +29,26 @@ def test_set_cycle_dates(self): "2022010212", ], ) + + def test_set_cycle_dates_datetime(self): + + """ Test that the proper list of dates are produced given the + input data and return_type left set to "datetime" (so the output + should be a list of datetime objects)""" + cdates = set_cycle_dates( + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), + return_type="datetime", + ) + self.assertEqual( + cdates, + [ + datetime(2022, 1, 1, 6), + datetime(2022, 1, 1, 12), + datetime(2022, 1, 1, 18), + datetime(2022, 1, 2, 0), + datetime(2022, 1, 2, 6), + datetime(2022, 1, 2, 12), + ], + ) From 03d2ab6f4b7ae2d9de74fe355019d9cf8611f6d4 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 10 Oct 2024 12:04:35 -0600 Subject: [PATCH 116/131] First attempt at modifying documentation to see if I can view it in the PR page. --- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 85 ++++++++++--------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 4d88173028..14fccdd5e5 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -168,48 +168,6 @@ These settings define platform-specific run commands. Users should set run comma ``PRE_TASK_CMDS``: (Default: "") Pre-task commands such as ``ulimit`` needed by tasks. For example: ``'{ ulimit -s unlimited; ulimit -a; }'`` -METplus Parameters ----------------------- - -:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. - -.. _METParamNote: - -.. note:: - Where a date field is required: - * ``YYYY`` refers to the 4-digit valid year - * ``MM`` refers to the 2-digit valid month - * ``DD`` refers to the 2-digit valid day of the month - * ``HH`` refers to the 2-digit valid hour of the day - * ``mm`` refers to the 2-digit valid minutes of the hour - * ``SS`` refers to the two-digit valid seconds of the hour - -``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) - User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) - User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - - .. note:: - Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. - -``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) - User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) - User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. Other Platform-Specific Directories -------------------------------------- @@ -1635,6 +1593,49 @@ General Verification Parameters ``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loud. +METplus Parameters +---------------------- + +:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. + +.. _METParamNote: + +.. note:: + Where a date field is required: + * ``YYYY`` refers to the 4-digit valid year + * ``MM`` refers to the 2-digit valid month + * ``DD`` refers to the 2-digit valid day of the month + * ``HH`` refers to the 2-digit valid hour of the day + * ``mm`` refers to the 2-digit valid minutes of the hour + * ``SS`` refers to the two-digit valid seconds of the hour + +``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) + User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + +``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) + User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + + .. note:: + Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. + +``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) + User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + +``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) + User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + Templates for Observation Files --------------------------------- From c0a841e712a3555a93677cc1e6ad982efc9f5303 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 06:42:51 -0600 Subject: [PATCH 117/131] Bug fix. --- ush/set_cycle_and_obs_timeinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index cae3bc37ee..9a7644ed29 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -261,7 +261,7 @@ def check_temporal_consistency_cumul_fields( field_fcst = {field_fcst} obtype = {obtype} accum_hrs = {accum_hrs} hr - fcst_output_intvl_hrs = {forecast_output_intvl} hr + fcst_output_intvl_hrs = {fcst_output_intvl} hr accum_hrs % fcst_output_intvl_hrs = {rem_fcst} Thus, this forecast field cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation From d3485729fec1b4699ecb6f5f4f045c34a67fdfd7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 09:23:43 -0600 Subject: [PATCH 118/131] Fix up comments. --- ush/set_cycle_and_obs_timeinfo.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9a7644ed29..52271d2362 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -75,7 +75,7 @@ def check_temporal_consistency_cumul_fields( the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the - cumulative forecast fields and corresponding observation type pairs that + cumulative forecast fields (and corresponding observation type pairs) that are to be verified. The constraints on each such accumulation interval are as follows: @@ -85,14 +85,18 @@ def check_temporal_consistency_cumul_fields( 2) The obs availability interval evenly divides the accumulation interval. This ensures that the obs can be added together to obtain accumulated - values of the obs field, e.g. the 6-hourly NOHRSC obs can be added - to obtain 24-hour observed snowfall accumulations. + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added to + obtain 24-hour observed snowfall accumulations. Note that this also + ensures that the accumulation interval is greater than or equal to the + obs availability interval. 3) The forecast output interval evenly divides the accumulation interval. This ensures that the forecast output can be added together to obtain accumulated values of the forecast field, e.g. if the forecast output - interval is 3 hours, the resulting 3-hourly APCP outputs from the - forecast can be added to obtain 6-hourly forecast APCP. + interval is 3 hours, the resulting 3-hourly APCP outputs from the forecast + can be added to obtain 6-hourly forecast APCP. Note that this also ensures + that the accumulation interval is greater than or equal to the forecast + output interval. 4) The hour-of-day at which the accumulated forecast values will be available are a subset of the ones at which the accumulated obs @@ -207,7 +211,8 @@ def check_temporal_consistency_cumul_fields( # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] # - # Check that accumulation inervals are shorter than the forecast length. + # Make sure that the accumulation interval is less than or equal to the + # forecast length. # if accum_hrs > fcst_len_hrs: msg = dedent(f""" @@ -225,7 +230,7 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that accumulation inervals are evenly divisible by the observation + # Make sure that accumulation interval is evenly divisible by the observation # availability interval. # if accum_hrs in accum_intvls_hrs: @@ -248,7 +253,7 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that accumulation inervals are evenly divisible by the forecast + # Make sure that accumulation interval is evenly divisible by the forecast # output interval. # if accum_hrs in accum_intvls_hrs: @@ -270,9 +275,9 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that the hours-of-day at which the current cumulative field will - # be output are a subset of the hours-of-day at which the corresponding - # obs type is output. + # Make sure that the hours-of-day at which the current cumulative field + # will be output are a subset of the hours-of-day at which the corresponding + # obs type is available. # if accum_hrs in accum_intvls_hrs: From 28140699caa8045b246998993d8183f11ffc6c9b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 16:24:33 -0600 Subject: [PATCH 119/131] In config.community.yaml, move [CCPA|MRMS|NDAS]_OBS_DIR variables from the "platform" to the "verification" section to be consistent with the changes in config_defaults.yaml. --- ush/config.community.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ush/config.community.yaml b/ush/config.community.yaml index 417b9edb91..f380bd28cc 100644 --- a/ush/config.community.yaml +++ b/ush/config.community.yaml @@ -5,10 +5,6 @@ user: RUN_ENVIR: community MACHINE: hera ACCOUNT: an_account -platform: - CCPA_OBS_DIR: "" - MRMS_OBS_DIR: "" - NDAS_OBS_DIR: "" workflow: USE_CRON_TO_RELAUNCH: false EXPT_SUBDIR: test_community @@ -35,6 +31,9 @@ global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 verification: + CCPA_OBS_DIR: "" + MRMS_OBS_DIR: "" + NDAS_OBS_DIR: "" VX_FCST_MODEL_NAME: FV3_GFS_v16_CONUS_25km rocoto: tasks: From 52ebd99c75daf96097df4579c92adf49d0a0adce Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 15 Oct 2024 14:00:19 -0600 Subject: [PATCH 120/131] Bug fix: the get_obs_nohrsc tasks need to be based on obs days for cumulative fields, not obs days for instantaneous fields (which is the default cycledef in verify_pre.yaml). --- parm/wflow/verify_pre.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c239eae8d3..a3b49cc169 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -37,6 +37,9 @@ task_get_obs_ccpa: task_get_obs_nohrsc: <<: *default_task_verify_pre + attrs: + cycledefs: cycledefs_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars From 42c3d6c06f0ebdc4c6b6b4111d5e410b40ded419 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 13:13:28 -0600 Subject: [PATCH 121/131] Add logging statements when exceptions occur; fix comments and code indentation. --- ush/set_cycle_and_obs_timeinfo.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 52271d2362..ded2f92fe2 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -49,6 +49,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, Valid values are: valid_values = {valid_values} """) + logging.error(msg) raise Exception(msg) # iterate over cycles @@ -184,6 +185,7 @@ def check_temporal_consistency_cumul_fields( obs_avail_intvl_hrs = {obs_avail_intvl_hrs} 24 % obs_avail_intvl_hrs = {remainder}" """) + logging.error(msg) raise Exception(msg) # Assume that the obs are available at hour 0 of the day regardless # of obs type. @@ -630,12 +632,12 @@ def get_obs_retrieve_times_by_day( """ # Convert string contents of input dictionaries to datetime objects. for time_type in ['cumul', 'inst']: - fcst_output_times_all_cycles[time_type] \ - = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles[time_type]))] - obs_days_all_cycles[time_type] \ - = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") - for i in range(len(obs_days_all_cycles[time_type]))] + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] # Get list of forecast fields to be verified. vx_fields = vx_config['VX_FIELDS'] @@ -650,8 +652,9 @@ def get_obs_retrieve_times_by_day( {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] } - # Keep only those items in the dictionary above that have forecast fields - # that appear in the list of forecast fields to be verified. + # Keep only those items in the dictionary vx_field_info defined above + # that have forecast fields that appear in the list of forecast fields to + # be verified. for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): obtype = obtypes_to_fcst_fields_dict['obtype'] From 5a6da53c82088208589f8361eee302708e384257 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 10:03:31 -0600 Subject: [PATCH 122/131] Minor moving of config variable. --- ush/config_defaults.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 1e967ef9e4..b0a6438111 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2428,7 +2428,7 @@ verification: MRMS_OBS_AVAIL_INTVL_HRS: 1 NDAS_OBS_AVAIL_INTVL_HRS: 1 # - # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Boolean flag specifying whether to remove the "raw" observation # directories after pulling the specified type of obs (CCPA, NOHRSC, # MRMS, or NOHRSC). The raw directories are the ones in which the @@ -2438,9 +2438,9 @@ verification: # structure). # REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_NOHRSC: true REMOVE_RAW_OBS_MRMS: true REMOVE_RAW_OBS_NDAS: true - REMOVE_RAW_OBS_NOHRSC: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation From 7dc7db309eee5de53087e74d273647b182a8701d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 15:59:07 -0600 Subject: [PATCH 123/131] Add new parameter VX_FCST_OUTPUT_INTVL_HRS into config_defaults.yaml and use it as the forecast output interval when performing vx. --- scripts/exregional_check_post_output.sh | 2 +- ...egional_run_met_genensprod_or_ensemblestat.sh | 2 +- ...xregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- ...l_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...l_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 2 +- ush/config_defaults.yaml | 9 +++++++++ ush/setup.py | 16 ++++++---------- 8 files changed, 21 insertions(+), 16 deletions(-) diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 433aba1e4e..f176c9a12e 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -126,7 +126,7 @@ set_leadhrs \ yyyymmddhh_init="${CDATE}" \ lhr_min="0" \ lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${FCST_OUTPUT_INTVL_HRS}" \ + lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ base_dir="${VX_FCST_INPUT_BASEDIR}" \ fn_template="${FCST_INPUT_FN_TEMPLATE}" \ num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 67ae70c8b9..475417ee53 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -226,7 +226,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index e16b06cb46..a6130ba50d 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -227,7 +227,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index adecb68bcd..75332e4929 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -170,7 +170,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 2c27a9a597..382bd71ac8 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -169,7 +169,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 43da23ca2e..590ceb43ef 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -229,7 +229,7 @@ set_leadhrs_no_missing \ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - subintvl="${FCST_OUTPUT_INTVL_HRS}" + subintvl="${VX_FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b0a6438111..b216ccdd72 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2498,6 +2498,15 @@ verification: VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # + # Set the forecast output interval to use for verification purposes. + # If the forecasts to be verified are being run in the SRW (i.e. they + # are not staged from another forecast model), then this should be set + # set to the SRW's forecast output interval, but such a variable is + # currently not available in this configuration file. Instead, for + # now we set it to a default value of 1 hour. + # + VX_FCST_OUTPUT_INTVL_HRS: 1 + # # VX_FCST_INPUT_BASEDIR: # Template for top-level directory containing forecast (but not obs) # files that will be used as input into METplus for verification. diff --git a/ush/setup.py b/ush/setup.py index 3a034f7476..dfc59ffaba 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -578,22 +578,19 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + vx_config = expt_config["verification"] + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - - # Set the forecast output interval. Ideally, this should be obtained - # from the SRW App's configuration file, but such a variable doesn't - # yet exist in that file. - fcst_output_intvl_hrs = 1 - workflow_config['FCST_OUTPUT_INTVL_HRS'] = fcst_output_intvl_hrs + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") # To enable arithmetic with dates and times, convert various time # intervals from integer to datetime.timedelta objects. cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - fcst_output_intvl_dt = datetime.timedelta(hours=fcst_output_intvl_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) # # ----------------------------------------------------------------------- # @@ -605,12 +602,11 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - vx_config = expt_config["verification"] vx_config, fcst_obs_matched_times_all_cycles_cumul \ = check_temporal_consistency_cumul_fields( vx_config, date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, fcst_output_intvl_dt) + fcst_len_dt, vx_fcst_output_intvl_dt) expt_config["verification"] = vx_config # # ----------------------------------------------------------------------- @@ -628,7 +624,7 @@ def _remove_tag(tasks, tag): fcst_output_times_all_cycles, obs_days_all_cycles, \ = set_fcst_output_times_and_obs_days_all_cycles( date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, fcst_output_intvl_dt) + fcst_len_dt, vx_fcst_output_intvl_dt) workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] From 57fcbc6e04ce57569fb10b00ff66861611c7279b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 13:29:26 -0600 Subject: [PATCH 124/131] Change arguments so the cycle start times don't need to be called multiple times by different functions. --- ush/set_cycle_and_obs_timeinfo.py | 12 +++--------- ush/setup.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index ded2f92fe2..f345008f04 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -361,8 +361,7 @@ def check_temporal_consistency_cumul_fields( def set_fcst_output_times_and_obs_days_all_cycles( - start_time_first_cycl, start_time_last_cycl, cycl_intvl, - fcst_len, fcst_output_intvl): + cycle_start_times, fcst_len, fcst_output_intvl): """ This function returns forecast output times and observation days (i.e. days on which obs are needed because there is forecast output on those @@ -401,12 +400,6 @@ def set_fcst_output_times_and_obs_days_all_cycles( Each element of these lists is a string of the form 'YYYYMMDD'. """ - # Get the list containing the starting times of the cycles. Each element - # of the list will be a datetime object. - cycle_start_times \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, - return_type='datetime') - # Get the number of forecast output times per cycle/forecast. num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) @@ -603,7 +596,8 @@ def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): def get_obs_retrieve_times_by_day( - vx_config, fcst_output_times_all_cycles, obs_days_all_cycles): + vx_config, cycle_start_times, fcst_len, + fcst_output_times_all_cycles, obs_days_all_cycles): """ This function generates dictionary of dictionaries that, for each combination of obs type needed and each obs day, contains a string list diff --git a/ush/setup.py b/ush/setup.py index dfc59ffaba..ce5dad28f3 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -611,6 +611,18 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # # Generate a list of forecast output times and a list of obs days (i.e. # days on which observations are needed to perform verification because # there is forecast output on those days) over all cycles, both for @@ -623,8 +635,7 @@ def _remove_tag(tasks, tag): # fcst_output_times_all_cycles, obs_days_all_cycles, \ = set_fcst_output_times_and_obs_days_all_cycles( - date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, vx_fcst_output_intvl_dt) + cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] @@ -658,7 +669,8 @@ def _remove_tag(tasks, tag): vx_config = expt_config["verification"] obs_retrieve_times_by_day \ = get_obs_retrieve_times_by_day( - vx_config, fcst_output_times_all_cycles, obs_days_all_cycles) + vx_config, cycle_start_times, fcst_len_dt, + fcst_output_times_all_cycles, obs_days_all_cycles) for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): for obs_day, obs_retrieve_times in obs_days_dict.items(): From a3a7996844acb26f440a4da5278ecc828d983f01 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 14:02:49 -0600 Subject: [PATCH 125/131] Further changes to avoid calling the function that calculates the cycle start times multiple times. --- ush/set_cycle_and_obs_timeinfo.py | 40 +++++++++++-------------------- ush/setup.py | 28 ++++++++++------------ 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index f345008f04..a354139352 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -66,9 +66,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, def check_temporal_consistency_cumul_fields( - vx_config, - start_time_first_cycl, start_time_last_cycl, cycl_intvl, - fcst_len, fcst_output_intvl): + vx_config, cycle_start_times, fcst_len, fcst_output_intvl): """ This function reads in a subset of the parameters in the verification configuration dictionary and ensures that certain temporal constraints on @@ -113,14 +111,9 @@ def check_temporal_consistency_cumul_fields( vx_config: The verification configuration dictionary. - start_time_first_cycl: - Starting time of first cycle; a datetime object. - - start_time_last_cycl: - Starting time of last cycle; a datetime object. - - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. fcst_len: The length of each forecast; a timedelta object. @@ -147,13 +140,6 @@ def check_temporal_consistency_cumul_fields( fcst_len_hrs = int(fcst_len/one_hour) fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - cycle_start_times \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, - return_type='datetime') - # Initialize one of the variables that will be returned to an empty # dictionary. fcst_obs_matched_times_all_cycles_cumul = dict() @@ -372,14 +358,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( accumulation interval smaller than this are obviously not allowed). Args: - start_time_first_cycl: - Starting time of first cycle; a datetime object. - - start_time_last_cycl: - Starting time of last cycle; a datetime object. - - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. fcst_len: The length of each forecast; a timedelta object. @@ -608,6 +589,13 @@ def get_obs_retrieve_times_by_day( vx_config: The verification configuration dictionary. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. + + fcst_len: + The length of each forecast; a timedelta object. + fcst_output_times_all_cycles: Dictionary containing a list of forecast output times over all cycles for instantaneous fields and a second analogous list for cumulative fields. diff --git a/ush/setup.py b/ush/setup.py index ce5dad28f3..a4ba2f0001 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -594,6 +594,18 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # # Ensure that the configuration parameters associated with cumulative # fields (e.g. APCP) in the verification section of the experiment # dicitonary are temporally consistent, e.g. that accumulation intervals @@ -604,25 +616,11 @@ def _remove_tag(tasks, tag): # vx_config, fcst_obs_matched_times_all_cycles_cumul \ = check_temporal_consistency_cumul_fields( - vx_config, - date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, vx_fcst_output_intvl_dt) + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) expt_config["verification"] = vx_config # # ----------------------------------------------------------------------- # - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - # - # ----------------------------------------------------------------------- - # - cycle_start_times \ - = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, - return_type='datetime') - # - # ----------------------------------------------------------------------- - # # Generate a list of forecast output times and a list of obs days (i.e. # days on which observations are needed to perform verification because # there is forecast output on those days) over all cycles, both for From 2685e37382d1da9a5a5de07bb3b5b917636ee115 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:43:00 -0600 Subject: [PATCH 126/131] Remove trailing whitespace. --- ush/setup.py | 56 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index a4ba2f0001..899f05586f 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -51,24 +51,24 @@ from link_fix import link_fix def load_config_for_setup(ushdir, default_config, user_config): - """Updates a Python dictionary in place with experiment configuration settings from the - default, machine, and user configuration files. + """Updates a Python dictionary in place with experiment configuration settings from the + default, machine, and user configuration files. Args: ushdir (str): Path to the ``ush`` directory for the SRW App default_config (str): Path to ``config_defaults.yaml`` - user_config (str): Path to the user-provided config YAML (usually named + user_config (str): Path to the user-provided config YAML (usually named ``config.yaml``) Returns: None - + Raises: - FileNotFoundError: If the user-provided configuration file or the machine file does not + FileNotFoundError: If the user-provided configuration file or the machine file does not exist. - Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains - invalid sections/keys or (3) it does not contain mandatory information or (4) - an invalid datetime format is used. + Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains + invalid sections/keys or (3) it does not contain mandatory information or (4) + an invalid datetime format is used. """ # Load the default config. @@ -268,7 +268,7 @@ def _add_jobname(tasks): Mandatory variable "{val}" not found in: user config file {user_config} OR - machine file {machine_file} + machine file {machine_file} """ ) ) @@ -300,17 +300,17 @@ def set_srw_paths(ushdir, expt_config): Other paths for the SRW App are set as defaults in ``config_defaults.yaml``. Args: - ushdir (str) : Path to the system location of the ``ush`` directory under the + ushdir (str) : Path to the system location of the ``ush`` directory under the SRW App clone expt_config (dict): Contains the configuration settings for the user-defined experiment Returns: Dictionary of configuration settings and system paths as keys/values - + Raises: - KeyError: If the external repository required is not listed in the externals + KeyError: If the external repository required is not listed in the externals configuration file (e.g., ``Externals.cfg``) - FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has + FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has not been cloned properly """ @@ -371,23 +371,23 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): time. Args: - USHdir (str): The full path of the ``ush/`` directory where this script + USHdir (str): The full path of the ``ush/`` directory where this script (``setup.py``) is located - user_config_fn (str): The name of a user-provided configuration YAML (usually + user_config_fn (str): The name of a user-provided configuration YAML (usually ``config.yaml``) debug (bool): Enable extra output for debugging Returns: None - - Raises: - ValueError: If checked configuration values are invalid (e.g., forecast length, + + Raises: + ValueError: If checked configuration values are invalid (e.g., forecast length, ``EXPTDIR`` path) - FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not + FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not set to a compatible handling method - FileNotFoundError: If the path to a particular file does not exist or if the file itself + FileNotFoundError: If the path to a particular file does not exist or if the file itself does not exist at the expected path - TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no + TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no corresponding custom configuration file or CRTM fix file directory is set KeyError: If an invalid value is provided (i.e., for ``GRID_GEN_METHOD``) """ @@ -480,7 +480,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): f""" EXPTDIR ({exptdir}) already exists, and PREEXISTING_DIR_METHOD = {preexisting_dir_method} - To ignore this error, delete the directory, or set + To ignore this error, delete the directory, or set PREEXISTING_DIR_METHOD = delete, or PREEXISTING_DIR_METHOD = rename in your config file. @@ -667,7 +667,7 @@ def _remove_tag(tasks, tag): vx_config = expt_config["verification"] obs_retrieve_times_by_day \ = get_obs_retrieve_times_by_day( - vx_config, cycle_start_times, fcst_len_dt, + vx_config, cycle_start_times, fcst_len_dt, fcst_output_times_all_cycles, obs_days_all_cycles) for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): @@ -922,7 +922,7 @@ def _get_location(xcs, fmt, expt_cfg): if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does not divide evenly into a 24 hour day or the number of cycles - in your experiment! + in your experiment! FCST_LEN_CYCL = {fcst_len_cycl} """ ) @@ -1303,7 +1303,7 @@ def _get_location(xcs, fmt, expt_cfg): post_output_domain_name = lowercase(post_output_domain_name) # Write updated value of POST_OUTPUT_DOMAIN_NAME back to dictionary - post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name + post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name # # ----------------------------------------------------------------------- @@ -1591,7 +1591,7 @@ def _dict_find(user_dict, substring): workflow_config["SDF_USES_THOMPSON_MP"] = has_tag_with_value(ccpp_suite_xml, "scheme", "mp_thompson") if workflow_config["SDF_USES_THOMPSON_MP"]: - + logging.debug(f'Selected CCPP suite ({workflow_config["CCPP_PHYS_SUITE"]}) uses Thompson MP') logging.debug(f'Setting up links for additional fix files') @@ -1701,8 +1701,8 @@ def clean_rocoto_dict(rocotodict): 1. A task dictionary containing no "command" key 2. A metatask dictionary containing no task dictionaries - - Args: + + Args: rocotodict (dict): A dictionary containing Rocoto workflow settings """ From dbcbcaf679e404b7d1a6553ed4e3c95042816eca Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:45:29 -0600 Subject: [PATCH 127/131] Remove trailing whitespace. --- ush/set_cycle_and_obs_timeinfo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index a354139352..108615516c 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -39,7 +39,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, """ print_input_args(locals()) - + valid_values = ['string', 'datetime'] if return_type not in valid_values: raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") @@ -72,7 +72,7 @@ def check_temporal_consistency_cumul_fields( configuration dictionary and ensures that certain temporal constraints on these parameters are satisfied. It then returns an updated version of the verification configuration dictionary that satisfies these constraints. - + The constraints are on the accumulation intervals associated with the cumulative forecast fields (and corresponding observation type pairs) that are to be verified. The constraints on each such accumulation interval @@ -181,7 +181,7 @@ def check_temporal_consistency_cumul_fields( obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] # - # Get the array of accumulation intervals for the current cumulative field. + # Get the array of accumulation intervals for the current cumulative field. # Then loop over them to ensure that the constraints listed above are # satisfied. If for a given accumulation one or more of the constraints # is not satisfied, remove that accumulation from the list of accumulations @@ -199,7 +199,7 @@ def check_temporal_consistency_cumul_fields( # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] # - # Make sure that the accumulation interval is less than or equal to the + # Make sure that the accumulation interval is less than or equal to the # forecast length. # if accum_hrs > fcst_len_hrs: @@ -386,7 +386,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Initialize dictionaries that will contain the various forecast output # time and obs day information. Note that we initialize the contents of - # these dictionaries as sets because that better suites the data manipulation + # these dictionaries as sets because that better suites the data manipulation # we will need to do, but these sets will later be converted to lists. fcst_output_times_all_cycles = dict() fcst_output_times_all_cycles['inst'] = set() From 7545d253888786b6d6ee3d8171dbd0b48ca29bdd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 14:19:52 -0600 Subject: [PATCH 128/131] In order for the temporal consistency checks on various vx parameters and corresponding adjustments to them to be effective (i.e. in order for any necessary adjustments to make it into the rocoto xml file), move the call to the function that performs these checks and adjustments to a place BEFORE the call to extend_yaml() that "freezes" (hard-codes) the accumulations for which the PcpCombine and other tasks are run (this freezing should happen AFTER any adjustments are made to the list of user-specified accumulations). --- ush/setup.py | 59 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index 899f05586f..975c22264a 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -241,7 +241,46 @@ def _add_jobname(tasks): except: pass cfg_d["workflow"]["EXPT_BASEDIR"] = os.path.abspath(expt_basedir) + # + # ----------------------------------------------------------------------- + # + # Ensure that the configuration parameters associated with cumulative + # fields (e.g. APCP) in the verification section of the experiment + # dicitonary are temporally consistent, e.g. that accumulation intervals + # are less than or equal to the forecast length. Update the verification + # section of the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + vx_config = cfg_d["verification"] + workflow_config = cfg_d["workflow"] + + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # Convert various times and time intervals from integers or strings to + # datetime or timedelta objects. + date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") + date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + + # Generate a list containing the starting times of the cycles. + cycle_start_times \ + = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, + return_type='datetime') + + # Call function that runs the consistency checks on the vx parameters. + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + cfg_d['verification'] = vx_config extend_yaml(cfg_d) # Do any conversions of data types @@ -603,21 +642,11 @@ def _remove_tag(tasks, tag): cycle_start_times \ = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, return_type='datetime') - # - # ----------------------------------------------------------------------- - # - # Ensure that the configuration parameters associated with cumulative - # fields (e.g. APCP) in the verification section of the experiment - # dicitonary are temporally consistent, e.g. that accumulation intervals - # are less than or equal to the forecast length. Update the verification - # section of the dictionary to remove inconsistencies. - # - # ----------------------------------------------------------------------- - # - vx_config, fcst_obs_matched_times_all_cycles_cumul \ - = check_temporal_consistency_cumul_fields( - vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) - expt_config["verification"] = vx_config + print(f"") + print(f"IIIIIIIIIIIIIII") + print(f"cycle_start_times = ") + pprint(cycle_start_times) + #mnmnmnmnmnmnmn # # ----------------------------------------------------------------------- # From 21374ca6c643363cc09f8094a7704774a1816921 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:54:27 -0600 Subject: [PATCH 129/131] Remove debugging code and add a blank line. --- ush/set_cycle_and_obs_timeinfo.py | 1 + ush/setup.py | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 108615516c..ddc948b583 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -612,6 +612,7 @@ def get_obs_retrieve_times_by_day( Dictionary of dictionaries containing times at which each type of obs is needed on each obs day. """ + # Convert string contents of input dictionaries to datetime objects. for time_type in ['cumul', 'inst']: fcst_output_times_all_cycles[time_type] \ diff --git a/ush/setup.py b/ush/setup.py index 975c22264a..0aae872b68 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -642,11 +642,6 @@ def _remove_tag(tasks, tag): cycle_start_times \ = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, return_type='datetime') - print(f"") - print(f"IIIIIIIIIIIIIII") - print(f"cycle_start_times = ") - pprint(cycle_start_times) - #mnmnmnmnmnmnmn # # ----------------------------------------------------------------------- # From 5401569f1904d9d1940b7a532b05bd3a778325b5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 20 Oct 2024 08:28:23 -0600 Subject: [PATCH 130/131] Drop the "_NDAS" and "_ndas" suffixes from pb2nc tasks since prepbufr files can come from sources other than NDAS (e.g. GDAS). --- ...IONAL_RUN_MET_PB2NC_OBS_NDAS => JREGIONAL_RUN_MET_PB2NC_OBS} | 2 +- parm/wflow/verify_pre.yaml | 2 +- ...un_met_pb2nc_obs_ndas.sh => exregional_run_met_pb2nc_obs.sh} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename jobs/{JREGIONAL_RUN_MET_PB2NC_OBS_NDAS => JREGIONAL_RUN_MET_PB2NC_OBS} (98%) rename scripts/{exregional_run_met_pb2nc_obs_ndas.sh => exregional_run_met_pb2nc_obs.sh} (100%) diff --git a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS similarity index 98% rename from jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS rename to jobs/JREGIONAL_RUN_MET_PB2NC_OBS index a6ed90a1a3..89c9bb73f4 100755 --- a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS +++ b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS @@ -76,7 +76,7 @@ NDAS observations. # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_run_met_pb2nc_obs_ndas.sh || \ +$SCRIPTSdir/exregional_run_met_pb2nc_obs.sh || \ print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index a3b49cc169..d5ce7885e2 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -76,7 +76,7 @@ task_run_MET_Pb2nc_obs_NDAS: attrs: cycledefs: cycledefs_obs_days_inst maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: <<: *default_vars VAR: ADPSFC diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs.sh similarity index 100% rename from scripts/exregional_run_met_pb2nc_obs_ndas.sh rename to scripts/exregional_run_met_pb2nc_obs.sh From 88e48e29bb06f2c829ce2eb5119bf3fbe1a39bf7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 21 Oct 2024 15:53:10 -0600 Subject: [PATCH 131/131] Modifications to address Mike K's PR review comments. --- scripts/exregional_get_verif_obs.sh | 3 +- ush/get_obs.py | 207 ++++++++++++---------------- ush/set_cycle_and_obs_timeinfo.py | 81 ++++++----- ush/setup.py | 1 - 4 files changed, 126 insertions(+), 166 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index d1ee4116e8..a07deecc25 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -61,9 +61,8 @@ Valid observation types are: " fi -script_bn="get_obs" cmd="\ -python3 -u ${USHdir}/${script_bn}.py \ +python3 -u ${USHdir}/get_obs.py \ --var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ --obtype ${OBTYPE} \ --obs_day ${PDY}" diff --git a/ush/get_obs.py b/ush/get_obs.py index 50b7c45ae3..666c6f1298 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -11,6 +11,7 @@ from pprint import pprint from math import ceil, floor import subprocess +import retrieve_data from python_utils import ( load_yaml_config, ) @@ -26,7 +27,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): Note that for cumulative fields (like CCPA and NOHRSC, as opposed to instantaneous ones like MRMS and NDAS), the archive files corresponding to hour 0 of the day represent accumulations over the previous day. Thus, - here, we never return an achive hour of 0 for cumulative fields. Instead, + here, we never return an archive hour of 0 for cumulative fields. Instead, if the specified hour-of-day is 0, we consider that to represent the 0th hour of the NEXT day (i.e. the 24th hour of the current day) and set the archive hour to 24. @@ -57,32 +58,32 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): msg = dedent(f""" The specified observation type (after converting to upper case) is not supported: - obtype_upper = {obtype_upper} + {obtype_upper = } Valid observation types are: {valid_obtypes} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) - # Ensure that the archive inerval divides evenly into 24 hours. + # Ensure that the archive interval divides evenly into 24 hours. remainder = 24 % arcv_intvl_hrs if remainder != 0: msg = dedent(f""" The archive interval for obs of type {obtype} must divide evenly into 24 but doesn't: - arcv_intvl_hrs = {arcv_intvl_hrs} + {arcv_intvl_hrs = } 24 % arcv_intvl_hrs = {remainder} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) if (hod < 0) or (hod > 23): msg = dedent(f""" The specified hour-of-day must be between 0 and 23, inclusive, but isn't: - hod = {hod} + {hod = } """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # Set the archive hour. This depends on the obs type because each obs # type can organize its observation files into archives in a different @@ -113,8 +114,10 @@ def get_obs(config, obtype, yyyymmdd_task): This script checks for the existence of obs files of the specified type at the locations specified by variables in the SRW App's configuration file. If one or more of these files do not exist, it retrieves them from - a data store and places them in the locations specified by the configuration - variables, renaming them if necessary. + a data store (using the retrieve_data.py script and as specified by the + configuration file parm/data_locations.yml for that script) and places + them in the locations specified by the App's configuration variables, + renaming them if necessary. Args: config: @@ -298,10 +301,9 @@ def get_obs(config, obtype, yyyymmdd_task): vx_config = cfg['verification'] # Get the time interval (in hours) at which the obs are available. - key = obtype + '_OBS_AVAIL_INTVL_HRS' - obs_avail_intvl_hrs = vx_config[key] + obs_avail_intvl_hrs = vx_config[f'{obtype}_OBS_AVAIL_INTVL_HRS'] - # The obs availability inerval must divide evenly into 24 hours. Otherwise, + # The obs availability interval must divide evenly into 24 hours. Otherwise, # different days would have obs available at different hours-of-day. Make # sure this is the case. remainder = 24 % obs_avail_intvl_hrs @@ -309,19 +311,18 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + {obs_avail_intvl_hrs = } 24 % obs_avail_intvl_hrs = {remainder} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # For convenience, convert the obs availability interval to a datetime # object. obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) # Get the base directory for the observations. - key = obtype + '_OBS_DIR' - obs_dir = vx_config[key] + obs_dir = vx_config[f'{obtype}_OBS_DIR'] # For each observation type, set the group of fields contained in those # observation files that we need for verification. Each group of fields @@ -368,8 +369,7 @@ def get_obs(config, obtype, yyyymmdd_task): # locations, they will be retrieved from HPSS and placed at these locations. fp_proc_templates = [] for fg in field_groups_in_obs: - key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' - fn_proc_template = vx_config[key] + fn_proc_template = vx_config[f'OBS_{obtype}_{fg}_FN_TEMPLATE'] fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) # #----------------------------------------------------------------------- @@ -393,45 +393,41 @@ def get_obs(config, obtype, yyyymmdd_task): fields_in_filenames = [] levels_in_filenames = [] if obtype == 'MRMS': + valid_mrms_field_groups = ['REFC', 'RETOP'] for fg in field_groups_in_obs: + if fg not in valid_mrms_field_groups: + msg = dedent(f""" + Invalid field group specified for obs type: + {obtype = } + {fg = } + Valid field group are: + {valid_mrms_field_groups} + """) + logging.error(msg) + raise ValueError(msg) if fg == 'REFC': fields_in_filenames.append('MergedReflectivityQCComposite') levels_in_filenames.append('00.50') elif fg == 'RETOP': fields_in_filenames.append('EchoTop') levels_in_filenames.append('18_00.50') - else: - msg = dedent(f""" - Invalid field specified for obs type: - obtype = {obtype} - field = {field} - """) - logging.error(msg) - raise Exception(msg) # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. Set these starting # and ending dates as datetime objects for later use. - yyyymmdd_bad_metadata_start_str = None - yyyymmdd_bad_metadata_end_str = None - yyyymmdd_bad_metadata_start = None - yyyymmdd_bad_metadata_end = None - if obtype == 'CCPA': - yyyymmdd_bad_metadata_start_str = '20180718' - yyyymmdd_bad_metadata_end_str = '20210504' - yyyymmdd_bad_metadata_start = dt.datetime.strptime(yyyymmdd_bad_metadata_start_str, '%Y%m%d') - yyyymmdd_bad_metadata_end = dt.datetime.strptime(yyyymmdd_bad_metadata_end_str, '%Y%m%d') + ccpa_bad_metadata_start = dt.datetime.strptime('20180718', '%Y%m%d') + ccpa_bad_metadata_end = dt.datetime.strptime('20210504', '%Y%m%d') + # #----------------------------------------------------------------------- # - # Get the list of all the times in the current day at which to retrieve - # obs. This is an array with elements having format "YYYYMMDDHH". + # Form a string list of all the times in the current day (each in the + # format "YYYYMMDDHH") at which to retrieve obs. # #----------------------------------------------------------------------- # yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') - key = 'OBS_RETRIEVE_TIMES_' + obtype + '_' + yyyymmdd_task_str - obs_retrieve_times_crnt_day_str = vx_config[key] + obs_retrieve_times_crnt_day_str = vx_config[f'OBS_RETRIEVE_TIMES_{obtype}_{yyyymmdd_task_str}'] obs_retrieve_times_crnt_day \ = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] # @@ -465,17 +461,14 @@ def get_obs(config, obtype, yyyymmdd_task): # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first obs retrieval time of the day. - hod_first = obs_retrieve_times_crnt_day[0].hour - arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_first) + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[0].hour) # Ending archive hour. This is set to the archive hour containing obs at # the last obs retrieval time of the day. - hod_last = obs_retrieve_times_crnt_day[-1].hour - arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_last) + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[-1].hour) # Set other variables needed below when evaluating the METplus template for # the full path to the processed observation files. - one_hour = dt.timedelta(hours=1) ushdir = config['user']['USHdir'] # Create dictionary containing the paths to all the processed obs files @@ -491,7 +484,7 @@ def get_obs(config, obtype, yyyymmdd_task): for yyyymmddhh in obs_retrieve_times_crnt_day: # Set the lead hour, i.e. the number of hours from the beginning of the # day at which the file is valid. - lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + lhr = int((yyyymmddhh - yyyymmdd_task)/dt.timedelta(hours=1)) # Call a bash script to evaluate the template for the full path to the # file containing METplus timestrings at the current time. This should # be upgraded to a python script at some point. @@ -517,18 +510,17 @@ def get_obs(config, obtype, yyyymmdd_task): num_existing_files += 1 msg = dedent(f""" File already exists on disk: - fp_proc = {fp_proc} + {fp_proc = } """) - logging.info(msg) + logging.debug(msg) else: - hod = yyyymmddhh.hour - arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, yyyymmddhh.hour) msg = dedent(f""" File does not exist on disk: - fp_proc = {fp_proc} + {fp_proc = } Setting the hour (since hour 0 of the current task day) of the first archive to retrieve to: - arcv_hr_start = {arcv_hr_start} + {arcv_hr_start = } """) logging.info(msg) do_break = True @@ -537,14 +529,13 @@ def get_obs(config, obtype, yyyymmdd_task): # If the number of obs files that already exist on disk is equal to the # number of obs files needed, then there is no need to retrieve any files. - num_obs_retrieve_times_crnt_day = len(obs_retrieve_times_crnt_day) - num_files_needed = num_obs_retrieve_times_crnt_day*num_field_groups + num_files_needed = len(obs_retrieve_times_crnt_day)*num_field_groups if num_existing_files == num_files_needed: msg = dedent(f""" All obs files needed for the current day (yyyymmdd_task) already exist on disk: - yyyymmdd_task = {yyyymmdd_task} + {yyyymmdd_task = } Thus, there is no need to retrieve any files. """) logging.info(msg) @@ -554,22 +545,20 @@ def get_obs(config, obtype, yyyymmdd_task): # the number of obs files needed, then we will need to retrieve files. # In this case, set the sequence of hours corresponding to the archives # from which files will be retrieved. - else: - - arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] - msg = dedent(f""" - At least some obs files needed needed for the current day (yyyymmdd_task) - do not exist on disk: - yyyymmdd_task = {yyyymmdd_task} - The number of obs files needed for the current day is: - num_files_needed = {num_files_needed} - The number of obs files that already exist on disk is: - num_existing_files = {num_existing_files} - Will retrieve remaining files by looping over archives corresponding to - the following hours (since hour 0 of the current day): - arcv_hrs = {arcv_hrs} - """) - logging.info(msg) + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed needed for the current day (yyyymmdd_task) + do not exist on disk: + {yyyymmdd_task = } + The number of obs files needed for the current day is: + {num_files_needed = } + The number of obs files that already exist on disk is: + {num_existing_files = } + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + {arcv_hrs = } + """) + logging.info(msg) # #----------------------------------------------------------------------- # @@ -595,18 +584,9 @@ def get_obs(config, obtype, yyyymmdd_task): #----------------------------------------------------------------------- # - # Whether to move the files or copy them from their raw to their processed - # locations. - mv_or_cp = 'cp' # Whether to remove raw observations after processed directories have # been created from them. - key = 'REMOVE_RAW_OBS_' + obtype - remove_raw_obs = vx_config[key] - # If the raw directories and files are to be removed at the end of this - # script, no need to copy the files since the raw directories are going - # to be removed anyway. - if remove_raw_obs: - mv_or_cp = 'mv' + remove_raw_obs = vx_config[f'REMOVE_RAW_OBS_{obtype}'] # Base directory that will contain the archive subdirectories in which # the files extracted from each archive (tar) file will be placed. We @@ -657,15 +637,9 @@ def get_obs(config, obtype, yyyymmdd_task): # # MRMS: # There is only one archive per day, and it contains all the raw obs - # files needed to generate processed obs files for all hours of the - # current day. Thus, we will only ever need this one archive, so there - # is no need to include the archive's hour information (there really - # isn't any) in the raw subdirectory name. In addition, the archive's - # year, month, and day is the same as that of the obs day's, so it is - # already included in the name of the raw base directory. Sine this is - # the only info we need to avoid differnt get_obs tasks clobbering each - # other's output obs files, for simplicity we simply do not create a raw - # archive subdirectory. + # files needed to generate processed obs files for the current day. + # Since we will only ever need this one archive for a given day, + # for simplicity we simply do not create a raw archive subdirectory. # # NDAS: # Same as for CCPA. @@ -703,8 +677,8 @@ def get_obs(config, obtype, yyyymmdd_task): for obs_retrieve_time in obs_retrieve_times_crnt_day: if (obs_retrieve_time >= arcv_contents_start) and \ (obs_retrieve_time <= arcv_contents_end): - do_retrieve = True - break + do_retrieve = True + break if not do_retrieve: msg = dedent(f""" @@ -712,10 +686,10 @@ def get_obs(config, obtype, yyyymmdd_task): hour 0 of the next day if considering a cumulative obs type) fall in the range spanned by the current {arcv_intvl_hrs}-hourly archive file. The bounds of the data in the current archive are: - arcv_contents_start = {arcv_contents_start} - arcv_contents_end = {arcv_contents_end} + {arcv_contents_start = } + {arcv_contents_end = } The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + {obs_retrieve_times_crnt_day = } """) logging.info(msg) @@ -747,18 +721,15 @@ def get_obs(config, obtype, yyyymmdd_task): # files in the current archive, although we will make use of only 6 of # these (we will not use the tm00 file). parmdir = config['user']['PARMdir'] - cmd = ' '.join(['python3', \ - '-u', os.path.join(ushdir, 'retrieve_data.py'), \ - '--debug', \ - '--file_set', 'obs', \ - '--config', os.path.join(parmdir, 'data_locations.yml'), \ - '--cycle_date', yyyymmddhh_arcv_str, \ - '--data_stores', 'hpss', \ - '--data_type', obtype + '_obs', \ - '--output_path', arcv_dir_raw, \ - '--summary_file', 'retrieve_data.log']) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - rc = result.returncode + args = ['--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log'] + retrieve_data.main(args) # Get the list of times corresponding to the obs files in the current # archive. This is a list of datetime objects. @@ -836,7 +807,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Note that the script we call to do this (mrms_pull_topofhour.py) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up - # retrieving. The list of possibile templates for these names is given + # retrieving. The list of possible templates for these names is given # in parm/data_locations.yml, but which of those is actually used is not # known until retrieve_data.py completes. Thus, that information needs # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. @@ -854,7 +825,7 @@ def get_obs(config, obtype, yyyymmdd_task): rc = result.returncode # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates + # script called above ends up retrieving. The list of possible templates # for this name is given in parm/data_locations.yml, but which of those # is actually used is not known until retrieve_data.py completes. Thus, # that information needs to be passed back by the script and used here. @@ -893,20 +864,20 @@ def get_obs(config, obtype, yyyymmdd_task): {fp_raw} ... """) - logging.info(msg) + logging.debug(msg) yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. After the data is # pulled, reorganize into correct yyyymmdd structure. if (obtype == 'CCPA') and \ - ((yyyymmdd >= yyyymmdd_bad_metadata_start) and (yyyymmdd <= yyyymmdd_bad_metadata_end)) and \ + ((yyyymmdd >= ccpa_bad_metadata_start) and (yyyymmdd <= ccpa_bad_metadata_end)) and \ (((hr >= 19) and (hr <= 23)) or (hr == 0)): cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - elif mv_or_cp == 'mv': + elif remove_raw_obs: shutil.move(fp_raw, fp_proc) - elif mv_or_cp == 'cp': + else: shutil.copy(fp_raw, fp_proc) # #----------------------------------------------------------------------- @@ -916,10 +887,7 @@ def get_obs(config, obtype, yyyymmdd_task): #----------------------------------------------------------------------- # if remove_raw_obs: - msg = dedent(f""" - Removing raw obs directories ..." - """) - logging.info(msg) + logging.info("Removing raw obs directories ...") shutil.rmtree(basedir_raw) return True @@ -934,7 +902,6 @@ def parse_args(argv): parser.add_argument( "--obtype", - dest="obtype", type=str, required=True, choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], @@ -943,7 +910,6 @@ def parse_args(argv): parser.add_argument( "--obs_day", - dest="obs_day", type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), required=True, help="Date of observation day, in the form 'YYYMMDD'.", @@ -951,7 +917,6 @@ def parse_args(argv): parser.add_argument( "--var_defns_path", - dest="var_defns_path", type=str, required=True, help="Path to variable definitions file.", @@ -961,7 +926,6 @@ def parse_args(argv): for pair in (str.lower(lvl), str.upper(lvl))] parser.add_argument( "--log_level", - dest="log_level", type=str, required=False, default='info', @@ -972,7 +936,6 @@ def parse_args(argv): parser.add_argument( "--log_fp", - dest="log_fp", type=str, required=False, default='', diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index ddc948b583..9029731a94 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -42,15 +42,14 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, valid_values = ['string', 'datetime'] if return_type not in valid_values: - raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") msg = dedent(f""" Invalid value for optional argument "return_type": - return_type = {return_type} + {return_type = } Valid values are: - valid_values = {valid_values} + {valid_values = } """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # iterate over cycles all_cdates = [] @@ -168,17 +167,17 @@ def check_temporal_consistency_cumul_fields( msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder}" + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # Assume that the obs are available at hour 0 of the day regardless # of obs type. obs_avail_hr_start = 0 obs_avail_hr_end = obs_avail_hr_start + 24 # Construct list of obs availability hours-of-day. - obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] + obs_avail_hrs_of_day = list(range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)) obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] # # Get the array of accumulation intervals for the current cumulative field. @@ -207,13 +206,13 @@ def check_temporal_consistency_cumul_fields( The accumulation interval (accum_hrs) for the current cumulative forecast field (field_fcst) and corresponding observation type (obtype) is greater than the forecast length (fcst_len_hrs): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} - fcst_len_hrs = {fcst_len_hrs} - Thus, this forecast field cannot be accumulated over this interval. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {field_fcst = } + {obtype = } + {accum_hrs = } + {fcst_len_hrs = } + Thus, this forecast field cannot be accumulated over this interval. Will + remove this accumulation interval from the list of accumulation intervals + to verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -229,11 +228,11 @@ def check_temporal_consistency_cumul_fields( field (field_fcst) and corresponding observation type (obtype) is not evenly divisible by the observation type's availability interval (obs_avail_intvl_hrs): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - accum_hrs % obs_avail_intvl_hrs = {rem_obs} + {field_fcst = } + {obtype = } + {accum_hrs = } + {obs_avail_intvl_hrs = } + accum_hrs % obs_avail_intvl_hrs = {rem_obs} Thus, this observation type cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation intervals to verify for this field/obtype. @@ -251,14 +250,14 @@ def check_temporal_consistency_cumul_fields( The accumulation interval (accum_hrs) for the current cumulative forecast field (field_fcst) and corresponding observation type (obtype) is not evenly divisible by the forecast output interval (fcst_output_intvl): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} hr - fcst_output_intvl_hrs = {fcst_output_intvl} hr - accum_hrs % fcst_output_intvl_hrs = {rem_fcst} - Thus, this forecast field cannot be accumulated over this interval. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {field_fcst = } + {obtype = } + {accum_hrs = } + {fcst_output_intvl_hrs = } + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, this forecast field cannot be accumulated over this interval. Will + remove this accumulation interval from the list of accumulation intervals + to verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -303,17 +302,17 @@ def check_temporal_consistency_cumul_fields( field (field_fcst) is such that the forecast will output the field on at least one of hour-of-day on which the corresponding observation type is not available: - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} hr + {field_fcst = } + {obtype = } + {accum_hrs = } The forecast output hours-of-day for this field/accumulation interval combination are: - fcst_output_hrs_of_day_str = {fcst_output_hrs_of_day_str} + {fcst_output_hrs_of_day_str = } The hours-of-day at which the obs are available are: - obs_avail_hrs_of_day_str = {obs_avail_hrs_of_day_str} - Thus, at least some of the forecast output cannot be verified. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {obs_avail_hrs_of_day_str = } + Thus, at least some of the forecast output cannot be verified. Will remove + this accumulation interval from the list of accumulation intervals to + verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -335,8 +334,8 @@ def check_temporal_consistency_cumul_fields( msg = dedent(f""" The list of accumulation intervals (accum_intvls_hrs) for the current cumulative field to verify (field_fcst) is empty: - field_fcst = {field_fcst} - accum_intvls_hrs = {accum_intvls_hrs} + {field_fcst = } + {accum_intvls_hrs = } Removing this field from the list of fields to verify. The updated list is: {vx_config["VX_FIELDS"]} @@ -683,10 +682,10 @@ def get_obs_retrieve_times_by_day( msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder}" + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" """) - raise Exception(msg) + raise ValueError(msg) obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) diff --git a/ush/setup.py b/ush/setup.py index 0aae872b68..703bc094fd 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -740,7 +740,6 @@ def _remove_tag(tasks, tag): # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. - vx_config = expt_config["verification"] vx_fields = vx_config["VX_FIELDS"] if not vx_fields: metatask = "metatask_check_post_output_all_mems"