From f8c9602cf39d7e8a0afafa5d350cf4830a14662c Mon Sep 17 00:00:00 2001 From: MinsukJi-NOAA Date: Tue, 29 Sep 2020 21:42:58 +0000 Subject: [PATCH 1/4] Remove CNTLMED_DIR --- tests/default_vars.sh | 1 - tests/rt.sh | 2 - tests/rt_utils.sh | 64 ++++++------------- ...d_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt | 1 - ...d_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux | 1 - ...fv3_ccpp_384_mom6_cice_cmeps_bmark_restart | 1 - ...3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf | 1 - ...v3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug | 1 - .../cpld_fv3_ccpp_mom6_cice_cmeps_restart | 1 - 15 files changed, 19 insertions(+), 60 deletions(-) diff --git a/tests/default_vars.sh b/tests/default_vars.sh index 172a6b6e..282ee090 100755 --- a/tests/default_vars.sh +++ b/tests/default_vars.sh @@ -348,6 +348,5 @@ export RESTART_SUFFIX='' export_35d_run () { export CNTL_DIR="" -export CNTLMED_DIR="" export LIST_FILES="" } diff --git a/tests/rt.sh b/tests/rt.sh index c3353491..e4295903 100755 --- a/tests/rt.sh +++ b/tests/rt.sh @@ -457,8 +457,6 @@ if [[ $CREATE_BASELINE == true ]]; then #rsync -a "${RTPWD}"/fv3_stretched/INPUT "${NEW_BASELINE}"/fv3_stretched/ #rsync -a "${RTPWD}"/fv3_stretched_nest/INPUT "${NEW_BASELINE}"/fv3_stretched_nest/ #rsync -a "${RTPWD}"/fv3_stretched_nest_quilt/INPUT "${NEW_BASELINE}"/fv3_stretched_nest_quilt/ - - RTPWD=${NEW_BASELINE} fi COMPILE_LOG=${PATHRT}/Compile_$MACHINE_ID.log diff --git a/tests/rt_utils.sh b/tests/rt_utils.sh index 2d95852c..493d0447 100755 --- a/tests/rt_utils.sh +++ b/tests/rt_utils.sh @@ -324,12 +324,10 @@ check_results() { echo > ${REGRESSIONTEST_LOG} echo "baseline dir = ${RTPWD}/${CNTL_DIR}" >> ${REGRESSIONTEST_LOG} - echo "mediator baseline dir = ${RTPWD}/${CNTLMED_DIR}" >> ${REGRESSIONTEST_LOG} echo "working dir = ${RUNDIR}" >> ${REGRESSIONTEST_LOG} echo "Checking test ${TEST_NR} ${TEST_NAME} results ...." >> ${REGRESSIONTEST_LOG} echo echo "baseline dir = ${RTPWD}/${CNTL_DIR}" - echo "mediator baseline dir = ${RTPWD}/${CNTLMED_DIR}" echo "working dir = ${RUNDIR}" echo "Checking test ${TEST_NR} ${TEST_NAME} results ...." @@ -341,48 +339,29 @@ check_results() { printf %s " Comparing " $i " ....." >> ${REGRESSIONTEST_LOG} printf %s " Comparing " $i " ....." - crst='' - if [[ $i =~ RESTART/ ]]; then - crst=RESTART/$(basename $i) - fi - if [[ ! -f ${RUNDIR}/$i ]] ; then echo ".......MISSING file" >> ${REGRESSIONTEST_LOG} echo ".......MISSING file" test_status='FAIL' - elif [[ ! -f ${RTPWD}/${CNTL_DIR}/$i && ! -f ${RTPWD}/${CNTLMED_DIR}/$i && ! -f ${RTPWD}/${CNTL_DIR}/$crst ]] ; then + elif [[ ! -f ${RTPWD}/${CNTL_DIR}/$i ]] ; then echo ".......MISSING baseline" >> ${REGRESSIONTEST_LOG} echo ".......MISSING baseline" test_status='FAIL' - elif [[ ( $COMPILER == "gnu" || $COMPILER == "pgi" ) && $i == "RESTART/fv_core.res.nc" ]] ; then + elif [[ $COMPILER == "gnu" && $i == "RESTART/fv_core.res.nc" ]] ; then # Although identical in ncdiff, RESTART/fv_core.res.nc differs in byte 469, line 3, # for the fv3_control_32bit test between each run (without changing the source code) # for GNU and PGI compilers - skip comparison. - echo ".......SKIP for gnu/pgi compilers" >> ${REGRESSIONTEST_LOG} - echo ".......SKIP for gnu/pgi compilers" - - elif [[ $COMPILER == "pgi" && ( $i == "RESTART/fv_BC_sw.res.nest02.nc" || $i == "RESTART/fv_BC_ne.res.nest02.nc" ) ]] ; then - - # Although identical in ncdiff, RESTART/fv_BC_sw.res.nest02.nc differs in byte 6897, line 17 - # (similar for fv_BC_ne.res.nest02.nc) for the fv3_stretched_nest test between each run - # (without changing the source code) for the PGI compiler - skip comparison. - echo ".......SKIP for pgi compiler" >> ${REGRESSIONTEST_LOG} - echo ".......SKIP for pgi compiler" + echo ".......SKIP for gnu compilers" >> ${REGRESSIONTEST_LOG} + echo ".......SKIP for gnu compilers" else - if [[ $i =~ ufs.s2s ]]; then - d=$( cmp ${RTPWD}/${CNTLMED_DIR}/$i ${RUNDIR}/$i | wc -l ) - elif [[ $i =~ RESTART/ ]]; then - d=$( cmp ${RTPWD}/${CNTL_DIR}/$crst ${RUNDIR}/$i | wc -l ) - else - d=$( cmp ${RTPWD}/${CNTL_DIR}/$i ${RUNDIR}/$i | wc -l ) - fi + d=$( cmp ${RTPWD}/${CNTL_DIR}/$i ${RUNDIR}/$i | wc -l ) if [[ $d -ne 0 ]] ; then echo ".......NOT OK" >> ${REGRESSIONTEST_LOG} @@ -406,24 +385,17 @@ check_results() { echo " mkdir -p ${NEW_BASELINE}/${CNTL_DIR}/RESTART" >> ${REGRESSIONTEST_LOG} mkdir -p ${NEW_BASELINE}/${CNTL_DIR}/RESTART fi - if [[ ${CNTLMED_DIR} =~ MEDIATOR && ! -d ${NEW_BASELINE}/${CNTLMED_DIR} ]]; then - echo " mkdir -p ${NEW_BASELINE}/${CNTLMED_DIR}" >> ${REGRESSIONTEST_LOG} - mkdir -p ${NEW_BASELINE}/${CNTLMED_DIR} - fi for i in ${LIST_FILES} ; do + printf %s " Moving " $i " ....." printf %s " Moving " $i " ....." >> ${REGRESSIONTEST_LOG} if [[ -f ${RUNDIR}/$i ]] ; then - if [[ $i =~ RESTART/ ]]; then - cp ${RUNDIR}/$i ${NEW_BASELINE}/${CNTL_DIR}/RESTART/$(basename $i) - elif [[ $i =~ ufs.s2s ]]; then - cp ${RUNDIR}/$i ${NEW_BASELINE}/${CNTLMED_DIR} - else - cp ${RUNDIR}/${i} ${NEW_BASELINE}/${CNTL_DIR}/${i} - fi + cp ${RUNDIR}/${i} ${NEW_BASELINE}/${CNTL_DIR}/${i} + echo ".... OK" + echo ".... OK" >> ${REGRESSIONTEST_LOG} else - echo "Missing " ${RUNDIR}/$i " output file" - echo;echo " Set ${TEST_NR} ${TEST_NAME} failed" + echo ".... missing " ${RUNDIR}/$i + echo ".... missing " ${RUNDIR}/$i >> ${REGRESSIONTEST_LOG} test_status='FAIL' fi done @@ -538,7 +510,7 @@ EOF rocoto_create_run_task() { - if [[ $CREATE_BASELINE == true && $DEP_RUN != '' ]] || [[ $WARM_START == .T. && $DEP_RUN != '' ]]; then + if [[ $DEP_RUN != '' ]]; then DEP_STRING=" " else DEP_STRING="" @@ -638,13 +610,15 @@ EOF echo " task ${TEST_NAME}${RT_SUFFIX}" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def echo " inlimit max_jobs" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def - if [[ ${UNIT_TEST} == true && $DEP_RUN != '' ]]; then - echo " trigger compile_${COMPILE_NR} == complete and ${DEP_RUN} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def - elif [[ $CREATE_BASELINE == true && $DEP_RUN != '' ]] || [[ $WARM_START == .T. && $DEP_RUN != '' ]]; then - echo " trigger compile_${COMPILE_NR} == complete and ${DEP_RUN}${RT_SUFFIX} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + if [[ $DEP_RUN != '' ]]; then + if [[ ${UNIT_TEST} == false ]]; then + echo " trigger compile_${COMPILE_NR} == complete and ${DEP_RUN}${RT_SUFFIX} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + else + echo " trigger compile_${COMPILE_NR} == complete and ${DEP_RUN} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + fi else echo " trigger compile_${COMPILE_NR} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def - fi + fi } ecflow_run() { diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt index 6c4fe70f..25fd7793 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS 1 day run" export CNTL_DIR="RT-Baselines_1d_bmrt_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_1d_bmrt_ccpp_cmeps/RESTART" export LIST_FILES="phyf024.tile1.nc \ phyf024.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux index 067dc092..f47da53f 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - 2d_warm" export CNTL_DIR="RT-Baselines_2d_warm_ccpp384_cmeps" -export CNTLMED_DIR="RT-Baselines_2d_warm_ccpp384_cmeps/RESTART" export LIST_FILES="phyf048.tile1.nc \ phyf048.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart index 6c323f09..620b02f8 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS 0.5 day restart run" export CNTL_DIR="RT-Baselines_1d_bmrt_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_1d_bmrt_ccpp_cmeps/RESTART" export LIST_FILES="phyf024.tile1.nc \ phyf024.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt index 3fd070f5..e54755df 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt @@ -5,7 +5,6 @@ export TEST_DESCR="Coupled FV3-CCPP-MOM6-CICE-CMEPS-WW3 system - 1 day" export CNTL_DIR="RT-Baselines_1d_bmwav_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_1d_bmwav_ccpp_cmeps/RESTART" export LIST_FILES="phyf024.tile1.nc \ phyf024.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf index 8b67e508..15f6da3f 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - satmedmf" export CNTL_DIR="RT-Baselines_1d_warm_satmedmf_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_1d_warm_satmedmf_ccpp_cmeps/RESTART" export LIST_FILES="phyf024.tile1.nc \ phyf024.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux index 8f5d37f4..1e794d36 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - 1 step cold start" export CNTL_DIR="RT-Baselines_1stepcold_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_1stepcold_ccpp_cmeps/RESTART" export LIST_FILES="phyf048.tile1.nc \ phyf048.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads index 8c6d5db6..8d6b370d 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - 2 threads" export CNTL_DIR="RT-Baselines_2d_warm_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_2d_warm_ccpp_cmeps/RESTART" export LIST_FILES="phyf048.tile1.nc \ phyf048.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux index c326c2f4..a3a900ba 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - 2d_warm" export CNTL_DIR="RT-Baselines_2d_warm_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_2d_warm_ccpp_cmeps/RESTART" export LIST_FILES="phyf048.tile1.nc \ phyf048.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp index 77a2cc40..ec1b2a0e 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - decomp" export CNTL_DIR="RT-Baselines_2d_warm_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_2d_warm_ccpp_cmeps/RESTART" export LIST_FILES="phyf048.tile1.nc \ phyf048.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux index 20d8b0d9..29738ac6 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - 3d_warm" export CNTL_DIR="RT-Baselines_3d_warm_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_3d_warm_ccpp_cmeps/RESTART" export LIST_FILES="phyf072.tile1.nc \ phyf072.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug index 3f3ba78b..6b3dddcf 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - warm debug" export CNTL_DIR="RT-Baselines_6h_warm_debug_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_6h_warm_debug_ccpp_cmeps/RESTART" export LIST_FILES="phyf006.tile1.nc \ phyf006.tile2.nc \ diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart index 45b99dae..9429f9eb 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart @@ -5,7 +5,6 @@ export TEST_DESCR="Fully coupled FV3-CCPP-MOM6-CICE-CMEPS system - 1d restart" export CNTL_DIR="RT-Baselines_3d_warm_ccpp_cmeps" -export CNTLMED_DIR="RT-Baselines_3d_warm_ccpp_cmeps/RESTART" export LIST_FILES="phyf072.tile1.nc \ phyf072.tile2.nc \ From 101d2ccdee145210603514426be4d4724d62e5d6 Mon Sep 17 00:00:00 2001 From: MinsukJi-NOAA Date: Wed, 30 Sep 2020 20:12:51 -0500 Subject: [PATCH 2/4] Modify RT-related files to closely match the weather-model files --- tests/abort_dep_tasks.py | 9 +- tests/detect_machine.sh | 24 --- tests/head.h | 25 ++- tests/rt_fv3.sh | 97 ---------- tests/rt_utils.sh | 374 +++++++++++++++------------------------ tests/run_test.sh | 121 +++++++++++-- tests/tail.h | 2 +- 7 files changed, 274 insertions(+), 378 deletions(-) delete mode 100755 tests/rt_fv3.sh diff --git a/tests/abort_dep_tasks.py b/tests/abort_dep_tasks.py index b541c7b8..039f1838 100755 --- a/tests/abort_dep_tasks.py +++ b/tests/abort_dep_tasks.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +from __future__ import print_function import ecflow import re @@ -33,7 +34,7 @@ def __check_task(self, node): task = self.__defs.find_abs_node( self.__suite.get_abs_node_path() + "/" + t) if task.get_state() == ecflow.State.aborted: if node.get_state() != ecflow.State.aborted: - print "Will force aborted state for task", node.get_abs_node_path() + print("Will force aborted state for task", node.get_abs_node_path()) self.__ci.force_state(node.get_abs_node_path(), ecflow.State.aborted) try: @@ -48,11 +49,11 @@ def __check_task(self, node): server_defs = ci.get_defs() if server_defs == None : - print "The server has no definition" + print("The server has no definition") exit(1) traverser = DefsTraverser(server_defs, ci) traverser.force_abort() -except RuntimeError, e: - print "failed: " + str(e) +except RuntimeError as e: + print("failed: " + str(e)) diff --git a/tests/detect_machine.sh b/tests/detect_machine.sh index c8ca639b..57e1d055 100755 --- a/tests/detect_machine.sh +++ b/tests/detect_machine.sh @@ -5,30 +5,6 @@ export ACCNR=${ACCNR:-nems} case $(hostname -f) in - g10a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### gyre 1 - g10a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### gyre 2 - g14a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### gyre 3 - g14a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### gyre 4 - - t10a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### tide 1 - t10a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### tide 2 - t14a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### tide 3 - t14a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=1;; ### tide 4 - - g20a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### gyre phase2 - g20a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### gyre phase2 - g20a3.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### gyre phase2 - g21a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### gyre phase2 - g21a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### gyre phase2 - g21a3.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### gyre phase2 - - t20a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### tide phase2 - t20a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### tide phase2 - t20a3.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### tide phase2 - t21a1.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### tide phase2 - t21a2.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### tide phase2 - t21a3.ncep.noaa.gov) MACHINE_ID=wcoss ; export pex=2;; ### tide phase2 - llogin1) MACHINE_ID=wcoss_cray ;; ### luna llogin2) MACHINE_ID=wcoss_cray ;; ### luna llogin3) MACHINE_ID=wcoss_cray ;; ### luna diff --git a/tests/head.h b/tests/head.h index 9935a747..bcae862c 100644 --- a/tests/head.h +++ b/tests/head.h @@ -23,9 +23,26 @@ ecflow_client --init=$$ # Define a error handler -ERROR() { +handle_error() { set +e # Clear -e flag, so we don't fail - kill $(jobs -p) + wait # wait for background process to stop + + ecflow_client --ping --host=${ECF_HOST} --port=${ECF_PORT} + not_running=$? + if [[ $not_running -eq 0 ]]; then + export ECF_TIMEOUT=5 + ecflow_client --abort=error # Notify ecFlow that something went wrong, using 'trap' as the reason + fi + sleep 5 + trap 0 # Remove the trap + exit 0 # End the script +} + +# Define a signal handler +handle_signal() { + set +e # Clear -e flag, so we don't fail + jobs -l + [[ -z "$(jobs -p)" ]] || kill $(jobs -p) wait # wait for background process to stop ecflow_client --ping --host=${ECF_HOST} --port=${ECF_PORT} @@ -41,9 +58,9 @@ ERROR() { # Trap any calls to exit and errors caught by the -e flag -trap ERROR 0 +trap handle_error 0 # Trap any signal that may cause the script to fail -trap '{ echo "$0 Killed by a signal"; ERROR ; }' 1 2 3 4 5 6 7 8 10 12 13 15 +trap '{ echo "$0 Killed by a signal"; handle_signal ; }' 1 2 3 4 5 6 7 8 10 12 13 15 ### head.h end diff --git a/tests/rt_fv3.sh b/tests/rt_fv3.sh deleted file mode 100755 index 0c8d237a..00000000 --- a/tests/rt_fv3.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/bash -set -eux - -source rt_utils.sh -source atparse.bash -source edit_inputs.sh - -mkdir -p ${RUNDIR} -cd $RUNDIR - -############################################################################### -# Make configure and run files -############################################################################### - -# FV3 executable: -cp ${PATHRT}/$FV3X fcst.exe - -# modulefile for FV3 prerequisites: -cp ${PATHRT}/modules.fcst_${COMPILE_NR} modules.fcst - -# Get the shell file that loads the "module" command and purges modules: -cp ${PATHRT}/../NEMS/src/conf/module-setup.sh.inc module-setup.sh -#cp ${PATHTR}/parm/post_itag itag -#cp ${PATHTR}/parm/postxconfig-NT.txt postxconfig-NT.txt -#cp ${PATHTR}/parm/postxconfig-NT_FH00.txt postxconfig-NT_FH00.txt -#cp ${PATHTR}/parm/params_grib2_tbl_new params_grib2_tbl_new - -SRCD="${PATHTR}" -RUND="${RUNDIR}" - -# Set up the run directory -atparse < ${PATHRT}/fv3_conf/${FV3_RUN:-fv3_run.IN} > fv3_run -source ./fv3_run -atparse < ${PATHTR}/parm/${INPUT_NML:-input.nml.IN} > input.nml -atparse < ${PATHTR}/parm/${MODEL_CONFIGURE:-model_configure.IN} > model_configure -atparse < ${PATHTR}/parm/${NEMS_CONFIGURE:-nems.configure} > nems.configure - -edit_ice_in < ${PATHTR}/parm/ice_in_template > ice_in -edit_mom_input < ${PATHTR}/parm/${MOM_INPUT:-MOM_input_template_$OCNRES} > INPUT/MOM_input -edit_diag_table < ${PATHTR}/parm/diag_table_template > diag_table -edit_data_table < ${PATHTR}/parm/data_table_template > data_table -# CMEPS -cp ${PATHTR}/parm/fd_nems.yaml fd_nems.yaml -cp ${PATHTR}/parm/pio_in pio_in -cp ${PATHTR}/parm/med_modelio.nml med_modelio.nml - -if [[ "Q${INPUT_NEST02_NML:-}" != Q ]] ; then - atparse < ${PATHTR}/parm/${INPUT_NEST02_NML} > input_nest02.nml -fi - -if [[ $SCHEDULER = 'moab' ]]; then - atparse < $PATHRT/fv3_conf/fv3_msub.IN > job_card -elif [[ $SCHEDULER = 'pbs' ]]; then - NODES=$(( TASKS / TPN )) - if (( NODES * TPN < TASKS )); then - NODES=$(( NODES + 1 )) - fi - atparse < $PATHRT/fv3_conf/fv3_qsub.IN > job_card -elif [[ $SCHEDULER = 'sbatch' ]]; then - NODES=$(( TASKS / TPN )) - if (( NODES * TPN < TASKS )); then - NODES=$(( NODES + 1 )) - fi - atparse < $PATHRT/fv3_conf/fv3_qsub.IN > job_card -elif [[ $SCHEDULER = 'slurm' ]]; then - NODES=$(( TASKS / TPN )) - if (( NODES * TPN < TASKS )); then - NODES=$(( NODES + 1 )) - fi - atparse < $PATHRT/fv3_conf/fv3_slurm.IN > job_card -elif [[ $SCHEDULER = 'lsf' ]]; then - if (( TASKS < TPN )); then - TPN=${TASKS} - fi - atparse < $PATHRT/fv3_conf/fv3_bsub.IN > job_card -fi - -atparse < ${PATHTR}/parm/${NEMS_CONFIGURE:-nems.configure} > nems.configure - -################################################################################ -# Submit test -################################################################################ - -if [[ $ROCOTO = 'false' ]]; then - submit_and_wait job_card -else - chmod u+x job_card - ./job_card -fi - -check_results - -################################################################################ -# End test -################################################################################ - -exit 0 diff --git a/tests/rt_utils.sh b/tests/rt_utils.sh index 493d0447..ade7413d 100755 --- a/tests/rt_utils.sh +++ b/tests/rt_utils.sh @@ -1,6 +1,3 @@ -# -# DH* TODO - COMBINE SBATCH AND SLURM? -# set -eu if [[ "$0" = "${BASH_SOURCE[0]}" ]]; then @@ -10,6 +7,26 @@ fi UNIT_TEST=${UNIT_TEST:-false} +qsub_id=0 +slurm_id=0 +bsub_id=0 + +interrupt_job() { + set -x + if [[ $SCHEDULER = 'pbs' ]]; then + echo "run_util.sh: interrupt_job qsub_id = ${qsub_id}" + qdel ${qsub_id} + elif [[ $SCHEDULER = 'slurm' ]]; then + echo "run_util.sh: interrupt_job slurm_id = ${slurm_id}" + scancel ${slurm_id} + elif [[ $SCHEDULER = 'lsf' ]]; then + echo "run_util.sh: interrupt_job bsub_id = ${bsub_id}" + bkill ${bsub_id} + else + echo "run_util.sh: interrupt_job unknown SCHEDULER $SCHEDULER" + fi +} + submit_and_wait() { [[ -z $1 ]] && exit 1 @@ -20,41 +37,25 @@ submit_and_wait() { local -r job_card=$1 ROCOTO=${ROCOTO:-false} + ECFLOW=${ECFLOW:-false} local test_status='PASS' - if [[ $SCHEDULER = 'moab' ]]; then - msub $job_card - elif [[ $SCHEDULER = 'pbs' ]]; then + if [[ $SCHEDULER = 'pbs' ]]; then qsubout=$( qsub $job_card ) - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - re='^([0-9]+\.[a-zA-Z0-9\.]+)$' - else - re='^([0-9]+\.[a-zA-Z0-9]+)$' - fi - qsub_id=0 + re='^([0-9]+)(\.[a-zA-Z0-9\.-]+)$' [[ "${qsubout}" =~ $re ]] && qsub_id=${BASH_REMATCH[1]} - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - qsub_id="${qsub_id%.chadm*}" - fi - elif [[ $SCHEDULER = 'sbatch' ]]; then - qsubout=$( sbatch $job_card ) - re='^([0-9]+\.[a-zA-Z0-9]+)$' - qsub_id=0 - [[ "${qsubout}" =~ $re ]] && qsub_id=${BASH_REMATCH[1]} - if [[ ${MACHINE_ID} = stampede.* ]]; then - qsub_id="${qsub_id}" - fi + echo "Job id ${qsub_id}" elif [[ $SCHEDULER = 'slurm' ]]; then slurmout=$( sbatch $job_card ) re='Submitted batch job ([0-9]+)' - slurm_id=0 [[ "${slurmout}" =~ $re ]] && slurm_id=${BASH_REMATCH[1]} + echo "Job id ${slurm_id}" elif [[ $SCHEDULER = 'lsf' ]]; then bsubout=$( bsub < $job_card ) re='Job <([0-9]+)> is submitted to queue <(.+)>.' - bsub_id=0 [[ "${bsubout}" =~ $re ]] && bsub_id=${BASH_REMATCH[1]} + echo "Job id ${bsub_id}" else echo "Unknown SCHEDULER $SCHEDULER" exit 1 @@ -66,69 +67,38 @@ submit_and_wait() { until [[ $job_running -eq 1 ]] do echo "TEST ${TEST_NR} ${TEST_NAME} is waiting to enter the queue" - if [[ $SCHEDULER = 'moab' ]]; then - job_running=$( showq -u ${USER} -n | grep ${JBNME} | wc -l); sleep 5 - elif [[ $SCHEDULER = 'pbs' ]]; then - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - job_running=$( qstat ${qsub_id} | grep ${qsub_id} | wc -l); sleep 5 - else - job_running=$( qstat -u ${USER} -n | grep ${JBNME} | wc -l); sleep 5 - fi - elif [[ $SCHEDULER = 'sbatch' ]]; then - if [[ ${MACHINE_ID} = stampede.* ]]; then - job_running=$( squeue ${qsub_id} | grep ${qsub_id} | wc -l); sleep 5 - else - job_running=$( squeue -u ${USER} -n | grep ${JBNME} | wc -l); sleep 5 - fi + [[ ${ECFLOW:-false} == true ]] && ecflow_client --label=job_status "waiting to enter the queue" + if [[ $SCHEDULER = 'pbs' ]]; then + job_running=$( qstat ${qsub_id} | grep ${qsub_id} | wc -l ) elif [[ $SCHEDULER = 'slurm' ]]; then - job_running=$( squeue -u ${USER} -j ${slurm_id} | grep ${slurm_id} | wc -l); sleep 5 + job_running=$( squeue -u ${USER} -j ${slurm_id} | grep ${slurm_id} | wc -l) elif [[ $SCHEDULER = 'lsf' ]]; then - job_running=$( bjobs -u ${USER} -J ${JBNME} 2>/dev/null | grep ${QUEUE} | wc -l); sleep 5 + job_running=$( bjobs ${bsub_id} | grep ${bsub_id} | wc -l) else echo "Unknown SCHEDULER $SCHEDULER" exit 1 fi + sleep 5 (( count=count+1 )) if [[ $count -eq 13 ]]; then echo "No job in queue after one minute, exiting..."; exit 2; fi done # find jobid - if [[ $SCHEDULER = 'moab' ]]; then - : - elif [[ $SCHEDULER = 'pbs' ]]; then - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - jobid=$( qstat ${qsub_id} | grep ${qsub_id} | awk '{print $1}' ) - jobid="${jobid%.chadm*}" - else - jobid=$( qstat -u ${USER} | grep ${JBNME} | awk '{print $1}' ) - fi - trap 'echo "Job ${jobid} killed"; qdel ${jobid}; trap 0; exit' 1 2 3 4 5 6 7 8 10 12 13 15 - if [[ ${qsub_id} != ${jobid} ]]; then - echo "Warning: qsub_id is not equal to jobid" - fi - elif [[ $SCHEDULER = 'sbatch' ]]; then - if [[ ${MACHINE_ID} = stampede.* ]]; then - jobid=$( squeue ${qsub_id} | grep ${qsub_id} | awk '{print $1}' ) - jobid="${jobid}" - else - jobid=$( squeue -u ${USER} | grep ${JBNME} | awk '{print $1}' ) - fi - trap 'echo "Job ${jobid} killed"; qdel ${jobid}; trap 0; exit' 1 2 3 4 5 6 7 8 10 12 13 15 - if [[ ${qsub_id} != ${jobid} ]]; then - echo "Warning: qsub_id is not equal to jobid" - fi + if [[ $SCHEDULER = 'pbs' ]]; then + jobid=${qsub_id} elif [[ $SCHEDULER = 'slurm' ]]; then jobid=${slurm_id} elif [[ $SCHEDULER = 'lsf' ]]; then - jobid=$( bjobs -u ${USER} -J ${JBNME} -noheader -o "jobid" ) - trap 'echo "Job ${jobid} killed"; bkill ${jobid}; trap 0; exit' 1 2 3 4 5 6 7 8 10 12 13 15 - if [[ ${bsub_id} -ne ${jobid} ]]; then - echo "Warning: bsub_id is not equal to jobid" - fi + jobid=${bsub_id} else echo "Unknown SCHEDULER $SCHEDULER" exit 1 fi + echo "TEST ${TEST_NR} ${TEST_NAME} is submitted " + if [[ ${ECFLOW:-false} == true ]]; then + ecflow_client --label=job_id "${jobid}" + ecflow_client --label=job_status "submitted" + fi # wait for the job to finish and compare results job_running=1 @@ -136,151 +106,77 @@ submit_and_wait() { until [[ $job_running -eq 0 ]] do - sleep 60 & wait $! - - if [[ $SCHEDULER = 'moab' ]]; then - job_running=$( showq -u ${USER} -n | grep ${JBNME} | wc -l) - elif [[ $SCHEDULER = 'pbs' ]]; then - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - job_running=$( qstat ${qsub_id} | grep ${qsub_id} | wc -l); sleep 5 - else - job_running=$( qstat -u ${USER} -n | grep ${JBNME} | wc -l) - fi - elif [[ $SCHEDULER = 'sbatch' ]]; then - if [[ ${MACHINE_ID} = stampede.* ]]; then - job_running=$( squeue ${qsub_id} | grep ${qsub_id} | wc -l); sleep 5 - else - job_running=$( squeue -u ${USER} -n | grep ${JBNME} | wc -l) - fi + if [[ $SCHEDULER = 'pbs' ]]; then + job_running=$( qstat ${qsub_id} | grep ${qsub_id} | wc -l ) elif [[ $SCHEDULER = 'slurm' ]]; then job_running=$( squeue -u ${USER} -j ${slurm_id} | grep ${slurm_id} | wc -l) elif [[ $SCHEDULER = 'lsf' ]]; then - job_running=$( bjobs -u ${USER} -J ${JBNME} 2>/dev/null | wc -l) + job_running=$( bjobs ${bsub_id} | grep ${bsub_id} | wc -l) else echo "Unknown SCHEDULER $SCHEDULER" exit 1 fi - if [[ $SCHEDULER = 'moab' ]]; then - - status=$( showq -u ${USER} -n | grep ${JBNME} | awk '{print $3}'); status=${status:--} - if [[ -f ${RUNDIR}/err ]] ; then FnshHrs=$( grep Finished ${RUNDIR}/err | tail -1 | awk '{ print $9 }'); fi - FnshHrs=${FnshHrs:-0} - if [[ $status = 'Idle' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is waiting in a queue, Status: $status" - elif [[ $status = 'Running' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is running, Status: $status , Finished $FnshHrs hours" - elif [[ $status = 'Starting' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is ready to run, Status: $status , Finished $FnshHrs hours" - elif [[ $status = 'Completed' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status" ; job_running=0 - else echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status , Finished $FnshHrs hours" - fi - - elif [[ $SCHEDULER = 'pbs' ]]; then + if [[ $SCHEDULER = 'pbs' ]]; then - #status=$( qstat -u ${USER} -n | grep ${JBNME} | awk '{print $"10"}' ); status=${status:--} PJP comment out to speed up regression test - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - status=$( qstat ${qsub_id} | grep ${qsub_id} | awk '{print $5}' ); status=${status:--} - else - status=$( qstat -u ${USER} -n | grep ${JBNME} | awk '{print $10}' ); status=${status:--} - fi - if [[ -f ${RUNDIR}/err ]] ; then FnshHrs=$( tail -100 ${RUNDIR}/err | grep Finished | tail -1 | awk '{ print $9 }' ); fi - FnshHrs=${FnshHrs:-0} - if [[ $status = 'Q' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is waiting in a queue, Status: $status jobid ${jobid}" - elif [[ $status = 'H' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is held in a queue, Status: $status" - elif [[ $status = 'R' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is running, Status: $status , Finished $FnshHrs hours" + status=$( qstat ${qsub_id} | grep ${qsub_id} | awk '{print $5}' ); status=${status:--} + if [[ $status = 'Q' ]]; then + status_label='waiting in a queue' + elif [[ $status = 'H' ]]; then + status_label='held in a queue' + elif [[ $status = 'R' ]]; then + status_label='running' elif [[ $status = 'E' ]] || [[ $status = 'C' ]]; then - if [[ ${MACHINE_ID} = cheyenne.* ]]; then - exit_status=$( qstat ${jobid} -x -f | grep Exit_status | awk '{print $3}') - else - jobid=$( qstat -u ${USER} | grep ${JBNME} | awk '{print $1}') - exit_status=$( qstat ${jobid} -f | grep exit_status | awk '{print $3}') - fi + status_label='finished' + test_status='DONE' + exit_status=$( qstat ${jobid} -x -f | grep Exit_status | awk '{print $3}') if [[ $exit_status != 0 ]]; then - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" >> ${REGRESSIONTEST_LOG} - echo >> ${REGRESSIONTEST_LOG} - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" - echo test_status='FAIL' - break fi - echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status" - job_running=0 - elif [[ $status = 'C' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status" ; job_running=0 - else echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status , Finished $FnshHrs hours" + else + status_label='finished' fi elif [[ $SCHEDULER = 'slurm' ]]; then status=$( squeue -u ${USER} -j ${slurm_id} 2>/dev/null | grep ${slurm_id} | awk '{print $5}' ); status=${status:--} - if [[ $status = 'R' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is running, Status: $status" - elif [[ $status = 'F' ]]; then - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" >> ${REGRESSIONTEST_LOG} - echo >> ${REGRESSIONTEST_LOG} - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" - echo - echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is failed, Status: $status" - job_running=0 - elif [[ $status = 'C' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status" ; job_running=0 + if [[ $status = 'R' ]]; then + status_label='running' + elif [[ $status = 'PD' ]]; then + status_label='pending' + elif [[ $status = 'F' ]]; then + status_label='failed' + test_status='FAIL' + elif [[ $status = 'C' ]]; then + status_label='finished' + test_status='DONE' else - state=$( sacct -n -j ${slurm_id}.batch --format=JobID,state,Jobname | grep ${slurm_id} | awk '{print $2}' ) - echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is ${state}" - fi - - elif [[ $SCHEDULER = 'sbatch' ]]; then - - #status=$( qstat -u ${USER} -n | grep ${JBNME} | awk '{print $"10"}' ); status=${status:--} PJP comment out to speed up regression test - if [[ ${MACHINE_ID} = stampede.* ]]; then - status=$( squeue ${qsub_id} | grep ${qsub_id} | awk '{print $5}' ); status=${status:--} - else - status=$( squeue -u ${USER} -n | grep ${JBNME} | awk '{print $10}' ); status=${status:--} - fi - if [[ -f ${RUNDIR}/err ]] ; then FnshHrs=$( tail -100 ${RUNDIR}/err | grep Finished | tail -1 | awk '{ print $9 }' ); fi - FnshHrs=${FnshHrs:-0} - if [[ $status = 'Q' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is waiting in a queue, Status: $status jobid ${jobid}" - elif [[ $status = 'H' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is held in a queue, Status: $status" - elif [[ $status = 'R' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is running, Status: $status , Finished $FnshHrs hours" - elif [[ $status = 'E' ]] || [[ $status = 'C' ]]; then - if [[ ${MACHINE_ID} = stampede.* ]]; then - exit_status=$( squeue ${jobid} -x -f | grep Exit_status | awk '{print $3}') - else - jobid=$( squeue -u ${USER} | grep ${JBNME} | awk '{print $1}') - exit_status=$( qstat ${jobid} -f | grep exit_status | awk '{print $3}') - fi - if [[ $exit_status != 0 ]]; then - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" >> ${REGRESSIONTEST_LOG} - echo >> ${REGRESSIONTEST_LOG} - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" - echo - test_status='FAIL' - break - fi - echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status" - job_running=0 - elif [[ $status = 'C' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status" ; job_running=0 - else echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status , Finished $FnshHrs hours" + echo "Slurm unknown status ${status}. Check sacct ..." + sacct -n -j ${slurm_id} --format=JobID,state%20,Jobname%20 + status_label=$( sacct -n -j ${slurm_id} --format=JobID,state%20,Jobname%20 | grep "^${slurm_id}" | grep ${JBNME} | awk '{print $2}' ) fi elif [[ $SCHEDULER = 'lsf' ]]; then - status=$( bjobs -u ${USER} -J ${JBNME} 2>/dev/null | grep ${QUEUE} | awk '{print $3}' ); status=${status:--} - if [[ -f ${RUNDIR}/err ]] ; then FnshHrs=$( grep Finished ${RUNDIR}/err | tail -1 | awk '{ print $9 }' ) ; fi - FnshHrs=${FnshHrs:-0} - if [[ $status = 'PEND' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is waiting in a queue, Status: $status" - elif [[ $status = 'RUN' ]]; then echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is running, Status: $status , Finished $FnshHrs hours" + status=$( bjobs ${bsub_id} 2>/dev/null | grep ${bsub_id} | awk '{print $3}' ); status=${status:--} + if [[ $status = 'PEND' ]]; then + status_label='pending' + elif [[ $status = 'RUN' ]]; then + status_label='running' + elif [[ $status = 'DONE' ]]; then + status_label='finished' + test_status='DONE' elif [[ $status = 'EXIT' ]]; then - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" >> ${REGRESSIONTEST_LOG} - echo;echo;echo >> ${REGRESSIONTEST_LOG} - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" - echo;echo;echo + status_label='failed' test_status='FAIL' - break - else echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is finished, Status: $status , Finished $FnshHrs hours" - exit_status=$( bjobs -u ${USER} -J ${JBNME} -a 2>/dev/null | grep $QUEUE | awk '{print $3}' ) + else + echo "bsub unknown status ${status}" + status_label='finished' + test_status='DONE' + exit_status=$( bjobs ${bsub_id} 2>/dev/null | grep ${bsub_id} | awk '{print $3}' ); status=${status:--} if [[ $exit_status = 'EXIT' ]]; then - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" >> ${REGRESSIONTEST_LOG} - echo;echo;echo >> ${REGRESSIONTEST_LOG} - echo "Test ${TEST_NR} ${TEST_NAME} FAIL" - echo;echo;echo + status_label='failed' test_status='FAIL' - break fi fi @@ -289,18 +185,30 @@ submit_and_wait() { exit 1 fi + + echo "$n min. TEST ${TEST_NR} ${TEST_NAME} is ${status_label}, status: $status jobid ${jobid}" + [[ ${ECFLOW:-false} == true ]] && ecflow_client --label=job_status "$status_label" + + if [[ $test_status = 'FAIL' || $test_status = 'DONE' ]]; then + break + fi + (( n=n+1 )) + sleep 60 & wait $! done if [[ $test_status = 'FAIL' ]]; then if [[ ${UNIT_TEST} == false ]]; then - echo $TEST_NAME >> $PATHRT/fail_test + echo "${TEST_NAME} ${TEST_NR} failed" >> $PATHRT/fail_test + echo "Test ${TEST_NR} ${TEST_NAME} FAIL" >> ${REGRESSIONTEST_LOG} + echo;echo;echo >> ${REGRESSIONTEST_LOG} + echo "Test ${TEST_NR} ${TEST_NAME} FAIL" else echo ${TEST_NR} $TEST_NAME >> $PATHRT/fail_unit_test fi - if [[ $ROCOTO == true ]]; then - exit 2 + if [[ $ROCOTO == true || $ECFLOW == true ]]; then + exit 1 fi fi @@ -313,9 +221,7 @@ check_results() { set +x ROCOTO=${ROCOTO:-false} - - # Default compiler "intel" - export COMPILER=${NEMS_COMPILER:-intel} + ECFLOW=${ECFLOW:-false} local test_status='PASS' @@ -333,7 +239,7 @@ check_results() { if [[ ${CREATE_BASELINE} = false ]]; then # - # --- regression test comparison ---- + # --- regression test comparison # for i in ${LIST_FILES} ; do printf %s " Comparing " $i " ....." >> ${REGRESSIONTEST_LOG} @@ -355,7 +261,7 @@ check_results() { # Although identical in ncdiff, RESTART/fv_core.res.nc differs in byte 469, line 3, # for the fv3_control_32bit test between each run (without changing the source code) - # for GNU and PGI compilers - skip comparison. + # for GNU compilers - skip comparison. echo ".......SKIP for gnu compilers" >> ${REGRESSIONTEST_LOG} echo ".......SKIP for gnu compilers" @@ -380,7 +286,8 @@ check_results() { # # --- create baselines # - echo;echo;echo "Moving set ${TEST_NR} ${TEST_NAME} files ...." + echo;echo "Moving baseline ${TEST_NR} ${TEST_NAME} files ...." + echo;echo "Moving baseline ${TEST_NR} ${TEST_NAME} files ...." >> ${REGRESSIONTEST_LOG} if [[ ! -d ${NEW_BASELINE}/${CNTL_DIR}/RESTART ]] ; then echo " mkdir -p ${NEW_BASELINE}/${CNTL_DIR}/RESTART" >> ${REGRESSIONTEST_LOG} mkdir -p ${NEW_BASELINE}/${CNTL_DIR}/RESTART @@ -389,13 +296,14 @@ check_results() { for i in ${LIST_FILES} ; do printf %s " Moving " $i " ....." printf %s " Moving " $i " ....." >> ${REGRESSIONTEST_LOG} + printf %s " Moving " $i " ....." if [[ -f ${RUNDIR}/$i ]] ; then cp ${RUNDIR}/${i} ${NEW_BASELINE}/${CNTL_DIR}/${i} - echo ".... OK" - echo ".... OK" >> ${REGRESSIONTEST_LOG} + echo "....OK" >>${REGRESSIONTEST_LOG} + echo "....OK" else - echo ".... missing " ${RUNDIR}/$i - echo ".... missing " ${RUNDIR}/$i >> ${REGRESSIONTEST_LOG} + echo "....NOT OK. Missing " ${RUNDIR}/$i >>${REGRESSIONTEST_LOG} + echo "....NOT OK. Missing " ${RUNDIR}/$i test_status='FAIL' fi done @@ -409,28 +317,27 @@ check_results() { if [[ $test_status = 'FAIL' ]]; then if [[ ${UNIT_TEST} == false ]]; then - echo $TEST_NAME >> $PATHRT/fail_test + echo "${TEST_NAME} ${TEST_NR} failed in check_result" >> $PATHRT/fail_test else echo ${TEST_NR} $TEST_NAME >> $PATHRT/fail_unit_test fi - if [[ $ROCOTO = true ]]; then - exit 2 + if [[ $ROCOTO = true || $ECFLOW == true ]]; then + exit 1 fi fi eval "$set_x" } + kill_job() { [[ -z $1 ]] && exit 1 local -r jobid=$1 - if [[ $SCHEDULER = 'moab' ]]; then - : - elif [[ $SCHEDULER = 'pbs' ]]; then + if [[ $SCHEDULER = 'pbs' ]]; then qdel ${jobid} elif [[ $SCHEDULER = 'slurm' ]]; then scancel ${jobid} @@ -439,7 +346,6 @@ kill_job() { fi } - rocoto_create_compile_task() { new_compile=true @@ -448,12 +354,12 @@ rocoto_create_compile_task() { echo " " >> $ROCOTO_XML fi - if [[ "Q$APP" != Q ]] ; then - rocoto_cmd="&PATHRT;/appbuild.sh &PATHTR;/FV3 $APP $COMPILE_NR" - else - #rocoto_cmd="&PATHRT;/compile_cmake.sh &PATHTR; $MACHINE_ID \"${NEMS_VER}\" $COMPILE_NR" - rocoto_cmd="$PATHRT/compile.sh ${PATHTR}/FV3 $MACHINE_ID \"${NEMS_VER}\" $COMPILE_NR" - #rocoto_cmd="&PATHRT;/compile.sh ${NEMS_VER} $COMPILE_NR" + rocoto_cmd="$PATHRT/compile.sh ${PATHTR}/FV3 $MACHINE_ID \"${NEMS_VER}\" $COMPILE_NR" + + # serialize WW3 builds. FIXME + DEP_STRING="" + if [[ ${MAKE_OPT^^} =~ "WW3=Y" && ${COMPILE_PREV_WW3_NR} != '' ]]; then + DEP_STRING="" fi NATIVE="" @@ -493,6 +399,7 @@ EOF else cat << EOF >> $ROCOTO_XML + $DEP_STRING $rocoto_cmd compile_${COMPILE_NR} ${ACCNR} @@ -507,7 +414,6 @@ EOF fi } - rocoto_create_run_task() { if [[ $DEP_RUN != '' ]]; then @@ -522,11 +428,8 @@ rocoto_create_run_task() { fi NATIVE="" - if [[ ${MACHINE_ID} == wcoss ]]; then - NATIVE="-a poe -R span[ptile=${TPN}]" - fi if [[ ${MACHINE_ID} == wcoss_dell_p3 ]]; then - NATIVE="-R span[ptile=${TPN}]" + NATIVE="28-R 'affinity[core(${THRD})]'" fi if [[ ${MACHINE_ID} == wcoss_cray ]]; then NATIVE="" @@ -540,7 +443,7 @@ rocoto_create_run_task() { ${ACCNR} ${QUEUE} ${PARTITION} - ${CORES} + ${NODES}:ppn=${TPN} 00:${WLCLK}:00 &LOG;/run_${TEST_NR}_${TEST_NAME}${RT_SUFFIX}.log ${NATIVE} @@ -549,7 +452,6 @@ EOF } - rocoto_kill() { for jobid in $( $ROCOTOSTAT -w $ROCOTO_XML -d $ROCOTO_DB | grep 197001010000 | grep -E 'QUEUED|RUNNING' | awk -F" " '{print $3}' ); do kill_job ${jobid} @@ -562,29 +464,24 @@ rocoto_run() { while [[ $state != "Done" ]] do $ROCOTORUN -v 10 -w $ROCOTO_XML -d $ROCOTO_DB - sleep 10 & wait $! + sleep 10 state=$($ROCOTOSTAT -w $ROCOTO_XML -d $ROCOTO_DB -s | grep 197001010000 | awk -F" " '{print $2}') dead_compile=$($ROCOTOSTAT -w $ROCOTO_XML -d $ROCOTO_DB | grep compile_ | grep DEAD | head -1 | awk -F" " '{print $2}') if [[ ! -z ${dead_compile} ]]; then echo "y" | ${ROCOTOCOMPLETE} -w $ROCOTO_XML -d $ROCOTO_DB -m ${dead_compile}_tasks ${ROCOTOCOMPLETE} -w $ROCOTO_XML -d $ROCOTO_DB -t ${dead_compile} fi - sleep 20 & wait $! + sleep 20 done } + ecflow_create_compile_task() { new_compile=true - if [[ "Q$APP" != Q ]] ; then - ecflow_cmd="$PATHRT/appbuild.sh ${PATHTR}/FV3 $APP $COMPILE_NR > ${LOG_DIR}/compile_${COMPILE_NR}.log 2>&1" - else - #ecflow_cmd="$PATHRT/compile_cmake.sh ${PATHTR} $MACHINE_ID \"${NEMS_VER}\" $COMPILE_NR > ${LOG_DIR}/compile_${COMPILE_NR}.log 2>&1" - ecflow_cmd="$PATHRT/compile.sh ${PATHTR}/FV3 $MACHINE_ID \"${NEMS_VER}\" $COMPILE_NR > ${LOG_DIR}/compile_${COMPILE_NR}.log 2>&1" - #ecflow_cmd="$PATHRT/compile.sh ${NEMS_VER} $COMPILE_NR > ${LOG_DIR}/compile_${COMPILE_NR}.log 2>&1" - fi + ecflow_cmd="$PATHRT/compile.sh ${PATHTR}/FV3 $MACHINE_ID \"${NEMS_VER}\" $COMPILE_NR > ${LOG_DIR}/compile_${COMPILE_NR}.log 2>&1 &" cat << EOF > ${ECFLOW_RUN}/${ECFLOW_SUITE}/compile_${COMPILE_NR}.ecf %include @@ -593,6 +490,9 @@ $ecflow_cmd EOF echo " task compile_${COMPILE_NR}" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + echo " label build_options '${NEMS_VER}'" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + echo " label job_id ''" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + echo " label job_status ''" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def echo " inlimit max_builds" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def # serialize WW3 builds. FIXME if [[ ${NEMS_VER^^} =~ "WW3=Y" && ${COMPILE_PREV_WW3_NR} != '' ]]; then @@ -604,21 +504,23 @@ ecflow_create_run_task() { cat << EOF > ${ECFLOW_RUN}/${ECFLOW_SUITE}/${TEST_NAME}${RT_SUFFIX}.ecf %include -$PATHRT/run_test.sh ${PATHRT} ${RUNDIR_ROOT} ${TEST_NAME} ${TEST_NR} ${COMPILE_NR} > ${LOG_DIR}/run_${TEST_NR}_${TEST_NAME}${RT_SUFFIX}.log 2>&1 +$PATHRT/run_test.sh ${PATHRT} ${RUNDIR_ROOT} ${TEST_NAME} ${TEST_NR} ${COMPILE_NR} > ${LOG_DIR}/run_${TEST_NR}_${TEST_NAME}${RT_SUFFIX}.log 2>&1 & %include EOF echo " task ${TEST_NAME}${RT_SUFFIX}" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + echo " label job_id ''" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def + echo " label job_status ''" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def echo " inlimit max_jobs" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def if [[ $DEP_RUN != '' ]]; then if [[ ${UNIT_TEST} == false ]]; then echo " trigger compile_${COMPILE_NR} == complete and ${DEP_RUN}${RT_SUFFIX} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def else echo " trigger compile_${COMPILE_NR} == complete and ${DEP_RUN} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def - fi + fi else echo " trigger compile_${COMPILE_NR} == complete" >> ${ECFLOW_RUN}/${ECFLOW_SUITE}.def - fi + fi } ecflow_run() { @@ -663,15 +565,15 @@ ecflow_run() { ecflow_kill() { [[ ${ECFLOW_RUNNING:-false} == true ]] || return set +e - wait + ecflow_client --suspend /${ECFLOW_SUITE} ecflow_client --kill /${ECFLOW_SUITE} - sleep 10 + sleep 20 + ecflow_client --delete=force yes /${ECFLOW_SUITE} } ecflow_stop() { [[ ${ECFLOW_RUNNING:-false} == true ]] || return set +e - wait SUITES=$( ecflow_client --get | grep "^suite" ) echo "SUITES=${SUITES}" if [ -z "${SUITES}" ]; then diff --git a/tests/run_test.sh b/tests/run_test.sh index cbfb5923..5d538098 100755 --- a/tests/run_test.sh +++ b/tests/run_test.sh @@ -1,16 +1,28 @@ #!/bin/bash set -eux +echo "PID=$$" +SECONDS=0 + +trap '[ "$?" -eq 0 ] || write_fail_test' EXIT +trap 'echo "run_test.sh interrupted PID=$$"; cleanup' INT +trap 'echo "run_test.sh terminated PID=$$"; cleanup' TERM + +cleanup() { + [[ $ROCOTO = 'false' ]] && interrupt_job + trap 0 + exit +} + write_fail_test() { if [[ ${UNIT_TEST} == true ]]; then echo ${TEST_NR} $TEST_NAME >> $PATHRT/fail_unit_test else - echo $TEST_NAME >> $PATHRT/fail_test + echo "${TEST_NAME} ${TEST_NR} failed in run_test" >> $PATHRT/fail_test fi + exit 1 } -SECONDS=0 - if [[ $# != 5 ]]; then echo "Usage: $0 PATHRT RUNDIR_ROOT TEST_NAME TEST_NR COMPILE_NR" exit 1 @@ -36,10 +48,7 @@ export INPUT_DIR=${CNTL_DIR} export RUNDIR=${RUNDIR_ROOT}/${TEST_NAME}${RT_SUFFIX} export CNTL_DIR=${CNTL_DIR}${BL_SUFFIX} -JBNME=$(basename $RUNDIR_ROOT)_${TEST_NR} -export JBNME - -export FV3X=fcst_${COMPILE_NR}.exe +export JBNME=$(basename $RUNDIR_ROOT)_${TEST_NR} UNIT_TEST=${UNIT_TEST:-false} if [[ ${UNIT_TEST} == false ]]; then @@ -49,13 +58,101 @@ else fi export REGRESSIONTEST_LOG -# Submit the actual test run script echo "Test ${TEST_NR} ${TEST_NAME} ${TEST_DESCR}" -trap 'echo "run_test.sh: Test ${TEST_NAME} killed"; kill $(jobs -p); wait; trap 0; exit' 1 2 3 4 5 6 7 8 10 12 13 15 -trap '[ "$?" -eq 0 ] || write_fail_test' EXIT -RUN_SCRIPT=rt_fv3.sh -./${RUN_SCRIPT} > ${RUNDIR_ROOT}/${TEST_NAME}${RT_SUFFIX}.log 2>&1 +source rt_utils.sh +source atparse.bash +source edit_inputs.sh + +mkdir -p ${RUNDIR} +cd $RUNDIR + +############################################################################### +# Make configure and run files +############################################################################### + +# FV3 executable: +cp ${PATHRT}/fcst_${COMPILE_NR}.exe fcst.exe + +# modulefile for FV3 prerequisites: +cp ${PATHRT}/modules.fcst_${COMPILE_NR} modules.fcst + +# Get the shell file that loads the "module" command and purges modules: +cp ${PATHRT}/../NEMS/src/conf/module-setup.sh.inc module-setup.sh + +SRCD="${PATHTR}" +RUND="${RUNDIR}" + +# Set up the run directory +atparse < ${PATHRT}/fv3_conf/${FV3_RUN:-fv3_run.IN} > fv3_run +source ./fv3_run +atparse < ${PATHTR}/parm/${INPUT_NML:-input.nml.IN} > input.nml +atparse < ${PATHTR}/parm/${MODEL_CONFIGURE:-model_configure.IN} > model_configure +atparse < ${PATHTR}/parm/${NEMS_CONFIGURE:-nems.configure} > nems.configure + +edit_ice_in < ${PATHTR}/parm/ice_in_template > ice_in +edit_mom_input < ${PATHTR}/parm/${MOM_INPUT:-MOM_input_template_$OCNRES} > INPUT/MOM_input +edit_diag_table < ${PATHTR}/parm/diag_table_template > diag_table +edit_data_table < ${PATHTR}/parm/data_table_template > data_table + +cp ${PATHTR}/parm/fd_nems.yaml fd_nems.yaml +cp ${PATHTR}/parm/pio_in pio_in +cp ${PATHTR}/parm/med_modelio.nml med_modelio.nml + +if [[ "Q${INPUT_NEST02_NML:-}" != Q ]] ; then + atparse < ${PATHTR}/parm/${INPUT_NEST02_NML} > input_nest02.nml +fi + +if [[ $SCHEDULER = 'pbs' ]]; then + NODES=$(( TASKS / TPN )) + if (( NODES * TPN < TASKS )); then + NODES=$(( NODES + 1 )) + fi + atparse < $PATHRT/fv3_conf/fv3_qsub.IN > job_card +elif [[ $SCHEDULER = 'slurm' ]]; then + NODES=$(( TASKS / TPN )) + if (( NODES * TPN < TASKS )); then + NODES=$(( NODES + 1 )) + fi + atparse < $PATHRT/fv3_conf/fv3_slurm.IN > job_card +elif [[ $SCHEDULER = 'lsf' ]]; then + if (( TASKS < TPN )); then + TPN=${TASKS} + fi + NODES=$(( TASKS / TPN )) + if (( NODES * TPN < TASKS )); then + NODES=$(( NODES + 1 )) + fi + atparse < $PATHRT/fv3_conf/fv3_bsub.IN > job_card +fi + +atparse < ${PATHTR}/parm/${NEMS_CONFIGURE:-nems.configure} > nems.configure + +################################################################################ +# Submit test job +################################################################################ + +if [[ $SCHEDULER = 'none' ]]; then + + ulimit -s unlimited + mpiexec -n ${TASKS} ./fv3.exe >out 2> >(tee err >&3) + +else + + if [[ $ROCOTO = 'false' ]]; then + submit_and_wait job_card + else + chmod u+x job_card + ./job_card + fi + +fi + +check_results + +################################################################################ +# End test +################################################################################ elapsed=$SECONDS echo "Elapsed time $elapsed seconds. Test ${TEST_NAME}" diff --git a/tests/tail.h b/tests/tail.h index 57fde504..480941a8 100644 --- a/tests/tail.h +++ b/tests/tail.h @@ -1,6 +1,6 @@ ### tail.h start -wait # wait for background process to stop +wait $! # wait for background process to stop ecflow_client --ping --host=${ECF_HOST} --port=${ECF_PORT} not_running=$? From bdea56604f791af104c3e68f60eefd0bbd257114 Mon Sep 17 00:00:00 2001 From: MinsukJi-NOAA Date: Thu, 1 Oct 2020 11:45:37 -0500 Subject: [PATCH 3/4] Move ufs.s2s.r.*.nc file into RESTART directory --- parm/nems.configure.medcmeps_atm_ocn_ice.IN | 2 +- parm/nems.configure.medcmeps_atm_ocn_ice_wav.IN | 2 +- tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt | 4 ++-- tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux | 2 +- tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart | 2 +- tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug | 2 +- tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart | 2 +- 14 files changed, 15 insertions(+), 15 deletions(-) diff --git a/parm/nems.configure.medcmeps_atm_ocn_ice.IN b/parm/nems.configure.medcmeps_atm_ocn_ice.IN index d12ad946..ce5e8e39 100644 --- a/parm/nems.configure.medcmeps_atm_ocn_ice.IN +++ b/parm/nems.configure.medcmeps_atm_ocn_ice.IN @@ -93,7 +93,7 @@ ALLCOMP_attributes:: ScalarFieldIdxGridNY = 2 ScalarFieldName = cpl_scalars start_type = @[RUNTYPE] - case_name = ufs.s2s + case_name = ./RESTART/ufs.s2s restart_n = @[FV3_RESTART_INTERVAL] restart_option = nhours restart_ymd = -999 diff --git a/parm/nems.configure.medcmeps_atm_ocn_ice_wav.IN b/parm/nems.configure.medcmeps_atm_ocn_ice_wav.IN index 7461a7b2..22ccc69e 100644 --- a/parm/nems.configure.medcmeps_atm_ocn_ice_wav.IN +++ b/parm/nems.configure.medcmeps_atm_ocn_ice_wav.IN @@ -107,7 +107,7 @@ ALLCOMP_attributes:: ScalarFieldIdxGridNY = 2 ScalarFieldName = cpl_scalars start_type = @[RUNTYPE] - case_name = ufs.s2s + case_name = ./RESTART/ufs.s2s restart_n = @[FV3_RESTART_INTERVAL] restart_option = nhours restart_ymd = -999 diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt index 25fd7793..481c04b0 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_1d_bmark_rt @@ -92,8 +92,8 @@ export LIST_FILES="phyf024.tile1.nc \ RESTART/MOM.res.2013-04-01-12-00-00_3.nc \ RESTART/iced.2013-04-02-00000.nc \ RESTART/iced.2013-04-01-43200.nc \ - ufs.s2s.cpl.r.2013-04-01-43200.nc \ - ufs.s2s.cpl.r.2013-04-02-00000.nc" + RESTART/ufs.s2s.cpl.r.2013-04-01-43200.nc \ + RESTART/ufs.s2s.cpl.r.2013-04-02-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux index f47da53f..4b556fda 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_2d_atm_flux @@ -55,7 +55,7 @@ export LIST_FILES="phyf048.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-05-00000.nc \ - ufs.s2s.cpl.r.2016-10-05-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart index 620b02f8..36d3db0e 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_bmark_restart @@ -55,7 +55,7 @@ export LIST_FILES="phyf024.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2013-04-02-00000.nc \ - ufs.s2s.cpl.r.2013-04-02-00000.nc" + RESTART/ufs.s2s.cpl.r.2013-04-02-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt index e54755df..2f904451 100644 --- a/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt +++ b/tests/tests/cpld_fv3_ccpp_384_mom6_cice_cmeps_ww3_1d_bmark_rt @@ -58,7 +58,7 @@ export LIST_FILES="phyf024.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2013-04-02-00000.nc \ - ufs.s2s.cpl.r.2013-04-02-00000.nc" + RESTART/ufs.s2s.cpl.r.2013-04-02-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf index 15f6da3f..0a424f51 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1d_satmedmf @@ -55,7 +55,7 @@ export LIST_FILES="phyf024.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-04-00000.nc \ - ufs.s2s.cpl.r.2016-10-04-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-04-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux index 1e794d36..dd5767ce 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_1stepcold_atm_flux @@ -55,7 +55,7 @@ export LIST_FILES="phyf048.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-05-00000.nc \ - ufs.s2s.cpl.r.2016-10-05-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads index 8d6b370d..1d9bc8b4 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_2threads @@ -55,7 +55,7 @@ export LIST_FILES="phyf048.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-05-00000.nc \ - ufs.s2s.cpl.r.2016-10-05-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux index a3a900ba..8fc7bcb7 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_atm_flux @@ -55,7 +55,7 @@ export LIST_FILES="phyf048.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-05-00000.nc \ - ufs.s2s.cpl.r.2016-10-05-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp index ec1b2a0e..52354e48 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_2d_decomp @@ -55,7 +55,7 @@ export LIST_FILES="phyf048.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-05-00000.nc \ - ufs.s2s.cpl.r.2016-10-05-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux index 29738ac6..edf3e9fb 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_3d_atm_flux @@ -55,7 +55,7 @@ export LIST_FILES="phyf072.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-06-00000.nc \ - ufs.s2s.cpl.r.2016-10-06-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-06-00000.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug index 6b3dddcf..f3c63040 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_6h_debug @@ -55,7 +55,7 @@ export LIST_FILES="phyf006.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-03-21600.nc \ - ufs.s2s.cpl.r.2016-10-03-21600.nc" + RESTART/ufs.s2s.cpl.r.2016-10-03-21600.nc" export_fv3 export_cpl diff --git a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart index 9429f9eb..d8ca2eb0 100644 --- a/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart +++ b/tests/tests/cpld_fv3_ccpp_mom6_cice_cmeps_restart @@ -55,7 +55,7 @@ export LIST_FILES="phyf072.tile1.nc \ RESTART/MOM.res_2.nc \ RESTART/MOM.res_3.nc \ RESTART/iced.2016-10-06-00000.nc \ - ufs.s2s.cpl.r.2016-10-06-00000.nc" + RESTART/ufs.s2s.cpl.r.2016-10-06-00000.nc" export_fv3 export_cpl From c0d9e646ae51811eb18004d4299fa094cea2a7ad Mon Sep 17 00:00:00 2001 From: MinsukJi-NOAA Date: Fri, 2 Oct 2020 16:42:04 -0500 Subject: [PATCH 4/4] Clean up rt.sh --- .../cpld_fv3_mom6_cice_atm_flux_run.IN | 3 +- tests/rt.sh | 179 +++++------------- 2 files changed, 50 insertions(+), 132 deletions(-) diff --git a/tests/fv3_conf/cpld_fv3_mom6_cice_atm_flux_run.IN b/tests/fv3_conf/cpld_fv3_mom6_cice_atm_flux_run.IN index 10b1a82e..77fd653d 100644 --- a/tests/fv3_conf/cpld_fv3_mom6_cice_atm_flux_run.IN +++ b/tests/fv3_conf/cpld_fv3_mom6_cice_atm_flux_run.IN @@ -62,8 +62,7 @@ else cp ../${DEP_RUN}/RESTART/MOM*.nc ./INPUT # CMEPS restart and pointer files - #cp ${DEP_RUN}/RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc . - cp ../${DEP_RUN}/ufs.s2s.cpl.r.2016-10-05-00000.nc . + cp ../${DEP_RUN}/RESTART/ufs.s2s.cpl.r.2016-10-05-00000.nc . RFILE="ufs.s2s.cpl.r.2016-10-05-00000.nc" ls -1 ${RFILE}>rpointer.cpl diff --git a/tests/rt.sh b/tests/rt.sh index e4295903..f103e18f 100755 --- a/tests/rt.sh +++ b/tests/rt.sh @@ -9,17 +9,18 @@ die() { echo "$@" >&2; exit 1; } usage() { set +x echo - echo "Usage: $0 -c | -f | -s | -l | -m | -k | -r | -e | -h" + echo "Usage: $0 -c | -e | -f | -h | -k | -l | -m | -n | -r | -s" echo - echo " -c create new baseline results for " + echo " -c create new baseline results" + echo " -e use ecFlow workflow manager" echo " -f run full suite of regression tests" - echo " -s run standard suite of regression tests" + echo " -h display this help" + echo " -k keep run directory" echo " -l runs test specified in " echo " -m compare against new baseline results" - echo " -k keep run directory" + echo " -n run single test " echo " -r use Rocoto workflow manager" - echo " -e use ecFlow workflow manager" - echo " -h display this help" + echo " -s run standard suite of regression tests" echo set -x exit 1 @@ -64,13 +65,13 @@ rt_35d() { sed -i -e "s/\(export SYEAR\)/\1=\"$sy\"/" $new_test_name sed -i -e "s/\(export SMONTH\)/\1=\"$sm\"/" $new_test_name - DEP_RUN=${DEP_RUN}_${DATE_35D} TEST_NAME=${new_test_name#tests/} } rt_trap() { [[ ${ROCOTO:-false} == true ]] && rocoto_kill + [[ ${ECFLOW:-false} == true ]] && ecflow_kill cleanup } @@ -84,7 +85,7 @@ cleanup() { trap '{ echo "rt.sh interrupted"; rt_trap ; }' INT trap '{ echo "rt.sh quit"; rt_trap ; }' QUIT trap '{ echo "rt.sh terminated"; rt_trap ; }' TERM -trap '{ echo "rt.sh error on line $LINENO"; rt_trap ; }' ERR +trap '{ echo "rt.sh error on line $LINENO"; cleanup ; }' ERR trap '{ echo "rt.sh finished"; cleanup ; }' EXIT # PATHRT - Path to regression tests directory @@ -124,6 +125,7 @@ if [[ $MACHINE_ID = wcoss_cray ]]; then ROCOTORUN=$(which rocotorun) ROCOTOSTAT=$(which rocotostat) ROCOTOCOMPLETE=$(which rocotocomplete) + ROCOTO_SCHEDULER=lsfcray module load ecflow/intel/4.7.1 ECFLOW_START=${ECF_ROOT}/intel/bin/ecflow_start.sh @@ -158,6 +160,7 @@ elif [[ $MACHINE_ID = wcoss_dell_p3 ]]; then ROCOTORUN=$(which rocotorun) ROCOTOSTAT=$(which rocotostat) ROCOTOCOMPLETE=$(which rocotocomplete) + ROCOTO_SCHEDULER=lsf module load ips/18.0.1.163 module load ecflow/4.7.1 @@ -165,7 +168,7 @@ elif [[ $MACHINE_ID = wcoss_dell_p3 ]]; then ECF_PORT=$(grep $USER /usrx/local/sys/ecflow/assigned_ports.txt | awk '{print $2}') DISKNM=/gpfs/dell2/emc/modeling/noscrub/emc.nemspara/RT - QUEUE=dev + QUEUE=debug PARTITION= ACCNR=GFS-DEV STMP=/gpfs/dell2/stmp @@ -192,25 +195,22 @@ elif [[ $MACHINE_ID = gaea.* ]]; then STMP=/lustre/f2/scratch PTMP=/lustre/f2/scratch - # default scheduler on Gaea SCHEDULER=slurm cp fv3_conf/fv3_slurm.IN_gaea fv3_conf/fv3_slurm.IN elif [[ $MACHINE_ID = hera.* ]]; then - export NCEPLIBS=/scratch1/NCEPDEV/global/gwv/l819/lib source $PATHTR/NEMS/src/conf/module-setup.sh.inc module use $PATHTR/modulefiles/${MACHINE_ID} module load fv3 - # Re-instantiate COMPILER in case it gets deleted by module purge - COMPILER=${NEMS_COMPILER:-intel} - module load rocoto ROCOTORUN=$(which rocotorun) ROCOTOSTAT=$(which rocotostat) ROCOTOCOMPLETE=$(which rocotocomplete) + ROCOTO_SCHEDULER=slurm + export PATH=/scratch2/NCEPDEV/fv3-cam/Dusan.Jovic/ecflow/bin:$PATH export PYTHONPATH=/scratch2/NCEPDEV/fv3-cam/Dusan.Jovic/ecflow/lib/python2.7/site-packages ECFLOW_START=/scratch2/NCEPDEV/fv3-cam/Dusan.Jovic/ecflow/bin/ecflow_start.sh @@ -234,10 +234,7 @@ elif [[ $MACHINE_ID = orion.* ]]; then module load fv3 module load gcc/8.3.0 - # Re-instantiate COMPILER in case it gets deleted by module purge - COMPILER=${NEMS_COMPILER:-intel} - - module load rocoto/1.3.1 + module load contrib rocoto/1.3.1 ROCOTORUN=$(which rocotorun) ROCOTOSTAT=$(which rocotostat) ROCOTOCOMPLETE=$(which rocotocomplete) @@ -263,43 +260,41 @@ elif [[ $MACHINE_ID = jet.* ]]; then module use $PATHTR/modulefiles/${MACHINE_ID} module load fv3 - # Re-instantiate COMPILER in case it gets deleted by module purge - COMPILER=${NEMS_COMPILER:-intel} - - module load rocoto/1.3.1 + module load rocoto/1.3.2 ROCOTORUN=$(which rocotorun) ROCOTOSTAT=$(which rocotostat) ROCOTOCOMPLETE=$(which rocotocomplete) + ROCOTO_SCHEDULER=slurm - export PATH=/mnt/lfs3/projects/hfv3gfs/Dusan.Jovic/ecflow/bin:$PATH - export PYTHONPATH=/mnt/lfs3/projects/hfv3gfs/Dusan.Jovic/ecflow/lib/python2.7/site-packages - ECFLOW_START=/mnt/lfs3/projects/hfv3gfs/Dusan.Jovic/ecflow/bin/ecflow_start.sh + export PATH=/lfs4/HFIP/hfv3gfs/software/ecFlow-5.3.1/bin:$PATH + export PYTHONPATH=/lfs4/HFIP/hfv3gfs/software/ecFlow-5.3.1/lib/python2.7/site-packages + ECFLOW_START=/lfs4/HFIP/hfv3gfs/software/ecFlow-5.3.1/bin/ecflow_start.sh ECF_PORT=$(( $(id -u) + 1500 )) - QUEUE=debug + QUEUE=batch ACCNR=hfv3gfs PARTITION=xjet - DISKNM=/lfs3/projects/hfv3gfs/GMTB/RT - dprefix=/lfs3/projects/hfv3gfs/$USER + DISKNM=/lfs4/HFIP/hfv3gfs/RT + dprefix=/lfs4/HFIP/hfv3gfs/$USER STMP=$dprefix/RT_BASELINE PTMP=$dprefix/RT_RUNDIRS - # default scheduler on Jet SCHEDULER=slurm cp fv3_conf/fv3_slurm.IN_jet fv3_conf/fv3_slurm.IN elif [[ $MACHINE_ID = cheyenne.* ]]; then source $PATHTR/NEMS/src/conf/module-setup.sh.inc - # Re-instantiate COMPILER in case it gets deleted by module purge - COMPILER=${NEMS_COMPILER:-intel} - export PYTHONPATH= - ECFLOW_START= - QUEUE=premium + module load python/2.7.16 + export PATH=/glade/p/ral/jntp/tools/ecFlow-5.3.1/bin:$PATH + export PYTHONPATH=/glade/p/ral/jntp/tools/ecFlow-5.3.1/lib/python2.7/site-packages + ECFLOW_START=/glade/p/ral/jntp/tools/ecFlow-5.3.1/bin/ecflow_start.sh + ECF_PORT=$(( $(id -u) + 1500 )) + QUEUE=regular PARTITION= dprefix=/glade/scratch - DISKNM=/glade/p/ral/jntp/GMTB/NEMSfv3gfs/RT - STMP=$dprefix + DISKNM=/glade/p/ral/jntp/GMTB/ufs-weather-model/RT + STMP=/glade/work PTMP=$dprefix SCHEDULER=pbs cp fv3_conf/fv3_qsub.IN_cheyenne fv3_conf/fv3_qsub.IN @@ -307,21 +302,19 @@ elif [[ $MACHINE_ID = cheyenne.* ]]; then elif [[ $MACHINE_ID = stampede.* ]]; then source $PATHTR/NEMS/src/conf/module-setup.sh.inc - # Re-instantiate COMPILER in case it gets deleted by module purge - COMPILER=${NEMS_COMPILER:-intel} export PYTHONPATH= ECFLOW_START= QUEUE=skx-dev PARTITION= - dprefix=$WORK/NEMSfv3gfs/run - DISKNM=$WORK/NEMSfv3gfs/RT - STMP=$dprefix/stmp4 - PTMP=$dprefix/stmp3 - SCHEDULER=sbatch + dprefix=$WORK/ufs-s2s-model/run + DISKNM=$WORK/ufs-s2s-model/RT + STMP=$dprefix + PTMP=$dprefix + SCHEDULER=slurm MPIEXEC=ibrun MPIEXECOPTS= - cp fv3_conf/fv3_qsub.IN_stampede fv3_conf/fv3_qsub.IN + cp fv3_conf/fv3_slurm.IN_stampede fv3_conf/fv3_slurm.IN else die "Unknown machine ID, please edit detect_machine.sh file" @@ -331,7 +324,6 @@ mkdir -p ${STMP}/${USER} # Different own baseline directories for different compilers NEW_BASELINE=${STMP}/${USER}/S2S_RT/REGRESSION_TEST -#if [[ $MACHINE_ID = cheyenne.* ]] || [[ $MACHINE_ID = jet.* ]] || [[ $MACHINE_ID = gaea.* ]]; then if [[ $MACHINE_ID = hera.* ]] || [[ $MACHINE_ID = orion.* ]] || [[ $MACHINE_ID = cheyenne.* ]]; then NEW_BASELINE=${NEW_BASELINE}_${COMPILER^^} fi @@ -348,11 +340,6 @@ SINGLE_NAME='' TEST_35D=false TESTS_FILE='rt.conf' -# Switch to special regression test config on wcoss_cray: -# don't run the IPD and CCPP tests in REPRO mode. -if [[ $MACHINE_ID = wcoss_cray ]]; then - TESTS_FILE='rt_wcoss_cray.conf' -fi SET_ID='standard' while getopts ":cfsl:mn:kreh" opt; do @@ -429,7 +416,7 @@ if [[ $CREATE_BASELINE == true ]]; then # rm -rf "${NEW_BASELINE}" mkdir -p "${NEW_BASELINE}" - echo "copy baseline inputs form: ${RTPWD}" + echo "copy baseline inputs from: ${RTPWD}" echo " to: ${NEW_BASELINE}" rsync -a "${RTPWD}"/FV3_* "${NEW_BASELINE}"/ @@ -438,25 +425,6 @@ if [[ $CREATE_BASELINE == true ]]; then rsync -a "${RTPWD}"/CPL_* "${NEW_BASELINE}"/ rsync -a "${RTPWD}"/WW3_* "${NEW_BASELINE}"/ rsync -a "${RTPWD}"/BM_* "${NEW_BASELINE}"/ - - # FIXME: move these namelist files to parm directory - #rsync -a "${RTPWD}"/fv3_regional_control/input.nml "${NEW_BASELINE}"/fv3_regional_control/ - #rsync -a "${RTPWD}"/fv3_regional_quilt/input.nml "${NEW_BASELINE}"/fv3_regional_quilt/ - #rsync -a "${RTPWD}"/fv3_regional_c768/input.nml "${NEW_BASELINE}"/fv3_regional_c768/ - #rsync -a "${RTPWD}"/fv3_regional_restart/input.nml "${NEW_BASELINE}"/fv3_regional_restart/ - - #rsync -a "${RTPWD}"/fv3_regional_control/model_configure "${NEW_BASELINE}"/fv3_regional_control/ - #rsync -a "${RTPWD}"/fv3_regional_quilt/model_configure "${NEW_BASELINE}"/fv3_regional_quilt/ - #rsync -a "${RTPWD}"/fv3_regional_c768/model_configure "${NEW_BASELINE}"/fv3_regional_c768/ - #rsync -a "${RTPWD}"/fv3_regional_restart/model_configure "${NEW_BASELINE}"/fv3_regional_restart/ - - #rsync -a "${RTPWD}"/fv3_regional_control/INPUT "${NEW_BASELINE}"/fv3_regional_control/ - #rsync -a "${RTPWD}"/fv3_regional_quilt/INPUT "${NEW_BASELINE}"/fv3_regional_quilt/ - #rsync -a "${RTPWD}"/fv3_regional_c768/INPUT "${NEW_BASELINE}"/fv3_regional_c768/ - #rsync -a "${RTPWD}"/fv3_regional_restart/INPUT "${NEW_BASELINE}"/fv3_regional_restart/ - #rsync -a "${RTPWD}"/fv3_stretched/INPUT "${NEW_BASELINE}"/fv3_stretched/ - #rsync -a "${RTPWD}"/fv3_stretched_nest/INPUT "${NEW_BASELINE}"/fv3_stretched_nest/ - #rsync -a "${RTPWD}"/fv3_stretched_nest_quilt/INPUT "${NEW_BASELINE}"/fv3_stretched_nest_quilt/ fi COMPILE_LOG=${PATHRT}/Compile_$MACHINE_ID.log @@ -477,8 +445,6 @@ LOG_DIR=${PATHRT}/log_$MACHINE_ID rm -rf ${LOG_DIR} mkdir ${LOG_DIR} -rm -f ../fv3.exe - if [[ $ROCOTO == true ]]; then ROCOTO_XML=${PATHRT}/rocoto_workflow.xml @@ -546,7 +512,8 @@ suite ${ECFLOW_SUITE} edit ECF_INCLUDE '${ECFLOW_RUN}' edit ECF_KILL_CMD kill -15 %ECF_RID% > %ECF_JOB%.kill 2>&1 edit ECF_TRIES 1 - label rundir_root '${RUNDIR_ROOT}' + label src_dir '${PATHTR}' + label run_dir '${RUNDIR_ROOT}' limit max_builds 1 limit max_jobs 30 EOF @@ -563,6 +530,8 @@ EOF QUEUE=batch elif [[ $MACHINE_ID = jet.* ]]; then QUEUE=batch + elif [[ $MACHINE_ID = cheyenne.* ]]; then + QUEUE=regular else die "ecFlow is not supported on this machine $MACHINE_ID" fi @@ -587,7 +556,6 @@ while read -r line; do if [[ $line == COMPILE* ]] ; then - APP='' NEMS_VER=$(echo $line | cut -d'|' -f2 | sed -e 's/^ *//' -e 's/ *$//') SET=$( echo $line | cut -d'|' -f3) MACHINES=$(echo $line | cut -d'|' -f4 | sed -e 's/^ *//' -e 's/ *$//') @@ -609,61 +577,12 @@ while read -r line; do echo " bash Compile is done" fi - # Set RT_SUFFIX (regression test run directories and log files) and BL_SUFFIX - # (regression test baseline directories) for REPRO (IPD, CCPP) or PROD (CCPP) runs - #if [[ ${NEMS_VER^^} =~ "REPRO=Y" ]]; then - # RT_SUFFIX="_repro" - # BL_SUFFIX="_repro" - #elif [[ ${NEMS_VER^^} =~ "CCPP=Y" ]]; then - # RT_SUFFIX="_prod" - # BL_SUFFIX="_ccpp" - #fi - if [[ ${NEMS_VER^^} =~ "WW3=Y" ]]; then COMPILE_PREV_WW3_NR=${COMPILE_NR} fi continue - elif [[ $line == APPBUILD* ]] ; then - - APP=$( echo $line | cut -d'|' -f2 | sed -e 's/^ *//' -e 's/ *$//') - SET=$( echo $line | cut -d'|' -f3) - MACHINES=$(echo $line | cut -d'|' -f4 | sed -e 's/^ *//' -e 's/ *$//') - CB=$( echo $line | cut -d'|' -f5) - - [[ $SET_ID != ' ' && $SET != *${SET_ID}* ]] && continue - [[ $MACHINES != ' ' && $MACHINES != "${MACHINE_ID}" ]] && continue - [[ $CREATE_BASELINE == true && $CB != *fv3* ]] && continue - [[ ${ROCOTO} == true || ${ECFLOW} == true ]] && continue - - (( COMPILE_NR += 1 )) - - if [[ $ROCOTO == true ]]; then - rocoto_create_compile_task - elif [[ $ECFLOW == true ]]; then - ecflow_create_compile_task - else - echo test > "${LOG_DIR}/compile_${COMPILE_NR}.log" 2>&1 - test -s ./appbuild.sh - test -x ./appbuild.sh - MACHINE_ID=${MACHINE_ID} ./appbuild.sh "$PATHTR/FV3" "$APP" "$COMPILE_NR" > ${LOG_DIR}/compile_${COMPILE_NR}.log 2>&1 - echo " bash NEMSAppBuilder is done" - fi - - # Set RT_SUFFIX (regression test run directories and log files) and BL_SUFFIX - # (regression test baseline directories) for REPRO (IPD, CCPP) or PROD (CCPP) runs - if [[ ${NEMS_VER^^} =~ "REPRO=Y" ]]; then - RT_SUFFIX="_repro" - BL_SUFFIX="_repro" - elif [[ ${NEMS_VER^^} =~ "CCPP=Y" ]]; then - RT_SUFFIX="_prod" - BL_SUFFIX="_ccpp" - fi - - unset APP - continue - elif [[ $line == RUN* ]] ; then TEST_NAME=$(echo $line | cut -d'|' -f2 | sed -e 's/^ *//' -e 's/ *$//') @@ -692,13 +611,6 @@ while read -r line; do RT_SUFFIX=${RT_SUFFIX:-""} BL_SUFFIX=${BL_SUFFIX:-""} - if [[ $MACHINE_ID = wcoss_cray ]]; then - if [[ $RT_SUFFIX != "" || $BL_SUFFIX != "" ]]; then - # skip all REPRO and/or CCPP runs on wcoss_cray. FIXME - continue - fi - fi - if [[ $ROCOTO == true && $new_compile == true ]]; then new_compile=false in_metatask=true @@ -712,6 +624,11 @@ EOF ( source ${PATHRT}/tests/$TEST_NAME + NODES=$(( TASKS / TPN )) + if (( NODES * TPN < TASKS )); then + NODES=$(( NODES + 1 )) + fi + cat << EOF > ${RUNDIR_ROOT}/run_test_${TEST_NR}.env export MACHINE_ID=${MACHINE_ID} export RTPWD=${RTPWD} @@ -726,6 +643,8 @@ EOF export QUEUE=${QUEUE} export PARTITION=${PARTITION} export ROCOTO=${ROCOTO} + export ECFLOW=${ECFLOW} + export REGRESSIONTEST_LOG=${REGRESSIONTEST_LOG} export LOG_DIR=${LOG_DIR} export DEP_RUN=${DEP_RUN} EOF