From d11184e1b09bb017141728bc55d1df053980b357 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Wed, 23 Oct 2024 13:30:45 -0400 Subject: [PATCH 01/25] changes to enable DA cycling on gaea --- env/GAEA.env | 225 +++++++++++++++++++++++++++++++++++--- ush/load_ufsda_modules.sh | 4 +- workflow/hosts/gaea.yaml | 14 +-- 3 files changed, 219 insertions(+), 24 deletions(-) diff --git a/env/GAEA.env b/env/GAEA.env index 7736e0f1ea..02c05278b4 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -26,22 +26,127 @@ if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:- NTHREADS1=${threads_per_task:-1} [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} - # This may be useful when Gaea is fully ported, so ignore SC warning - # shellcheck disable=SC2034 APRUN_default="${launcher} -n ${ntasks}" else echo "ERROR config.resources must be sourced before sourcing GAEA.env" exit 2 fi -if [[ "${step}" = "prep" ]]; then +case ${step} in + "prep" | "prepbufr") export POE="NO" - export BACK="NO" + export BACK=${BACK:-"YES"} export sys_tp="GAEA" export launcher_PREP="srun" + ;; + "prepsnowobs") -elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then + export APRUN_CALCFIMS="${APRUN_default}" + ;; + "prep_emissions") + + export APRUN="${APRUN_default}" + ;; + "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll") + + export CFP_MP="YES" + [[ "${step}" = "waveprep" ]] && export MP_PULSE=0 + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + + ;; + "atmanlvar") + + export NTHREADS_ATMANLVAR=${NTHREADSmax} + export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" + ;; + "atmanlfv3inc") + + export NTHREADS_ATMANLFV3INC=${NTHREADSmax} + export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" + ;; + "atmensanlobs") + + export NTHREADS_ATMENSANLOBS=${NTHREADSmax} + export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" + ;; + "atmensanlsol") + + export NTHREADS_ATMENSANLSOL=${NTHREADSmax} + export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" + ;; + "atmensanlletkf") + + export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} + export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" + ;; + "atmensanlfv3inc") + + export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} + export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" + ;; + "aeroanlvar") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_AEROANL=${NTHREADSmax} + export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" + ;; + "aeroanlgenb") + + export NTHREADS_AEROANLGENB=${NTHREADSmax} + export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" + ;; + "prepobsaero") + + export NTHREADS_PREPOBSAERO=${NTHREADS1} + export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" +;; + "snowanl") + + export NTHREADS_SNOWANL=${NTHREADSmax} + export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + ;; + "esnowrecen") + + export NTHREADS_ESNOWRECEN=${NTHREADSmax} + export APRUN_ESNOWRECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ESNOWRECEN}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + ;; + + "marinebmat") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + export APRUN_MARINEBMAT="${APRUN_default}" + ;; + "marineanlvar") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + export NTHREADS_MARINEANLVAR=${threads_per_task_anal:-${max_threads_per_task}} + export APRUN_MARINEANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANLVAR}" + ;; +"ocnanalecen") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + max_threads_per_task=$((max_tasks_per_node / tasks_per_node_ocnanalecen)) + + export NTHREADS_OCNANALECEN=${threads_per_task_ocnanalecen:-${max_threads_per_task}} + [[ ${NTHREADS_OCNANALECEN} -gt ${max_threads_per_task} ]] && export NTHREADS_OCNANALECEN=${max_threads_per_task} + export APRUN_OCNANALECEN="${launcher} -n ${ntasks_ocnanalecen} --cpus-per-task=${NTHREADS_OCNANALECEN}" +;; + "marineanlchkpt") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_OCNANAL=${NTHREADSmax} + export APRUN_MARINEANLCHKPT="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANAL}" + ;; + "anal" | "analcalc") export MKL_NUM_THREADS=4 export MKL_CBWR=AUTO @@ -50,7 +155,8 @@ elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then export USE_CFP=${USE_CFP:-"YES"} export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - export NTHREADS_GSI=${NTHREADSmax} + + export NTHREADS_GSI=${threads_per_task_anal:-${max_threads_per_task}} export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} @@ -65,37 +171,126 @@ elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then export NTHREADS_GAUSFCANL=1 ntasks_gausfcanl=${ntasks_gausfcanl:-1} export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" - -elif [[ "${step}" = "sfcanl" ]]; then + ;; + "sfcanl") export NTHREADS_CYCLE=${threads_per_task:-14} + [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + ;; + "eobs") -elif [[ "${step}" = "fcst" ]]; then + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_GSI=${NTHREADSmax} + [[ ${NTHREADS_GSI} -gt ${max_threads_per_task} ]] && export NTHREADS_GSI=${max_threads_per_task} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + ;; + "eupd") + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_ENKF=${NTHREADSmax} + export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" + ;; + "fcst" | "efcs") + + export OMP_STACKSIZE=512M (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) # With ESMF threading, the model wants to use the full node export APRUN_UFS="${launcher} -n ${ufs_ntasks}" unset nnodes ufs_ntasks + ;; -elif [[ "${step}" = "upp" ]]; then + "upp") export NTHREADS_UPP=${NTHREADS1} export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" + ;; -elif [[ "${step}" = "atmos_products" ]]; then + "atmos_products") - export USE_CFP="YES" # Use MPMD for downstream product generation on Gaea + export USE_CFP="YES" # Use MPMD for downstream product generation + ;; -elif [[ "${step}" = "oceanice_products" ]]; then +"oceanice_products") export NTHREADS_OCNICEPOST=${NTHREADS1} export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" +;; + + "ecen") + + export NTHREADS_ECEN=${NTHREADSmax} + export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node} + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + + ;; + "esfc") + + export NTHREADS_ESFC=${NTHREADSmax} + export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-14} + [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + ;; + "epos") + + export NTHREADS_EPOS=${NTHREADSmax} + export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" -elif [[ "${step}" = "fit2obs" ]]; then + ;; + "postsnd") + + export CFP_MP="YES" + + export NTHREADS_POSTSND=${NTHREADS1} + export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" + + export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} + [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]] && export NTHREADS_POSTSNDCFP=${max_threads_per_task} + export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" + + ;; + "awips") + + export NTHREADS_AWIPS=${NTHREADS1} + export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" + + ;; + "gempak") + + echo "WARNING: ${step} is not enabled on ${machine}!" + + ;; + "fit2obs") export NTHREADS_FIT2OBS=${NTHREADS1} export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" -fi + ;; + *) + # Some other job not yet defined here + echo "WARNING: The job step ${step} does not specify Hercules-specific resources" + ;; +esac diff --git a/ush/load_ufsda_modules.sh b/ush/load_ufsda_modules.sh index 8117d3f359..42bcfe75ad 100755 --- a/ush/load_ufsda_modules.sh +++ b/ush/load_ufsda_modules.sh @@ -34,13 +34,13 @@ source "${HOMEgfs}/ush/module-setup.sh" module use "${HOMEgfs}/sorc/gdas.cd/modulefiles" case "${MACHINE_ID}" in - ("hera" | "orion" | "hercules" | "wcoss2") + ("hera" | "orion" | "gaea" | "hercules" | "wcoss2") module load "${MODS}/${MACHINE_ID}" ncdump=$( command -v ncdump ) NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) export NETCDF ;; - ("jet" | "gaea" | "s4" | "acorn") + ("jet" | "s4" | "acorn") echo WARNING: UFSDA NOT SUPPORTED ON THIS PLATFORM ;; *) diff --git a/workflow/hosts/gaea.yaml b/workflow/hosts/gaea.yaml index 5a37b5dabf..a3dc54340d 100644 --- a/workflow/hosts/gaea.yaml +++ b/workflow/hosts/gaea.yaml @@ -5,18 +5,18 @@ BASE_IC: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/ICSDIR' PACKAGEROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/nwpara' COMROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/com' COMINsyn: '${COMROOT}/gfs/prod/syndat' -HOMEDIR: '/gpfs/f5/ufs-ard/scratch/${USER}' -STMP: '/gpfs/f5/ufs-ard/scratch/${USER}' -PTMP: '/gpfs/f5/ufs-ard/scratch/${USER}' -NOSCRUB: $HOMEDIR ACCOUNT: ufs-ard +HOMEDIR: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' +STMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' +PTMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' +NOSCRUB: $HOMEDIR SCHEDULER: slurm QUEUE: normal -QUEUE_SERVICE: normal +QUEUE_SERVICE: hpss PARTITION_BATCH: batch -PARTITION_SERVICE: batch +PARTITION_SERVICE: dtn_f5_f6 RESERVATION: '' -CLUSTERS: 'c5' +CLUSTERS: 'c5,es' CHGRP_RSTPROD: 'NO' CHGRP_CMD: 'chgrp rstprod' HPSSARCH: 'NO' From cbc8953726488be6ef4a8ea2cba931be61631c53 Mon Sep 17 00:00:00 2001 From: jswhit2 Date: Wed, 30 Oct 2024 18:06:28 +0000 Subject: [PATCH 02/25] fix machine name --- env/GAEA.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/GAEA.env b/env/GAEA.env index 02c05278b4..0e17758e8c 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -291,6 +291,6 @@ case ${step} in ;; *) # Some other job not yet defined here - echo "WARNING: The job step ${step} does not specify Hercules-specific resources" + echo "WARNING: The job step ${step} does not specify Gaea-specific resources" ;; esac From 7129d0c3271d2373e718374a968208c7b6bbd2c2 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Thu, 14 Nov 2024 10:59:25 -0500 Subject: [PATCH 03/25] updates for c5/c6 --- env/GAEA.env | 9 +++------ ush/detect_machine.sh | 16 +++++++++++----- ush/load_ufsda_modules.sh | 2 +- ush/module-setup.sh | 2 +- workflow/hosts.py | 2 +- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/env/GAEA.env b/env/GAEA.env index 0e17758e8c..f34ddcd1d5 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -40,10 +40,6 @@ case ${step} in export sys_tp="GAEA" export launcher_PREP="srun" ;; - "prepsnowobs") - - export APRUN_CALCFIMS="${APRUN_default}" - ;; "prep_emissions") export APRUN="${APRUN_default}" @@ -105,6 +101,8 @@ case ${step} in ;; "snowanl") + export APRUN_CALCFIMS="${launcher} -n 1" + export NTHREADS_SNOWANL=${NTHREADSmax} export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" @@ -126,8 +124,7 @@ case ${step} in "marineanlvar") export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - export NTHREADS_MARINEANLVAR=${threads_per_task_anal:-${max_threads_per_task}} - export APRUN_MARINEANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANLVAR}" + export APRUN_MARINEANLVAR="${APRUN_default}" ;; "ocnanalecen") diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 8ad217140a..c8b8081931 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -21,8 +21,11 @@ case $(hostname -f) in dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9 dlogin10.dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood10 - gaea5[1-8]) MACHINE_ID=gaea ;; ### gaea51-58 - gaea5[1-8].ncrc.gov) MACHINE_ID=gaea ;; ### gaea51-58 + gaea5[1-8]) MACHINE_ID=gaeac5 ;; ### gaea51-58 + gaea5[1-8].ncrc.gov) MACHINE_ID=gaeac5 ;; ### gaea51-58 + + gaea6[1-8]) MACHINE_ID=gaeac6 ;; ### gaea61-68 + gaea6[1-8].ncrc.gov) MACHINE_ID=gaeac6 ;; ### gaea61-68 hfe0[1-9]) MACHINE_ID=hera ;; ### hera01-09 hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12 @@ -81,9 +84,12 @@ elif [[ -d /work ]]; then else MACHINE_ID=orion fi -elif [[ -d /gpfs && -d /ncrc ]]; then - # We are on GAEA. - MACHINE_ID=gaea +elif [[ -d /gpfs/f5 && -d /ncrc ]]; then + # We are on GAEAC5. + MACHINE_ID=gaeac5 +elif [[ -d /gpfs/f6 && -d /ncrc ]]; then + # We are on GAEAC6. + MACHINE_ID=gaeac6 elif [[ -d /data/prod ]]; then # We are on SSEC's S4 MACHINE_ID=s4 diff --git a/ush/load_ufsda_modules.sh b/ush/load_ufsda_modules.sh index 42bcfe75ad..ab0e4c18fc 100755 --- a/ush/load_ufsda_modules.sh +++ b/ush/load_ufsda_modules.sh @@ -34,7 +34,7 @@ source "${HOMEgfs}/ush/module-setup.sh" module use "${HOMEgfs}/sorc/gdas.cd/modulefiles" case "${MACHINE_ID}" in - ("hera" | "orion" | "gaea" | "hercules" | "wcoss2") + ("hera" | "orion" | "hercules" | "gaea" | "gaeac6" | "wcoss2") module load "${MODS}/${MACHINE_ID}" ncdump=$( command -v ncdump ) NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 398562652d..3a85540d3c 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -68,7 +68,7 @@ elif [[ ${MACHINE_ID} = stampede* ]] ; then module purge elif [[ ${MACHINE_ID} = gaea* ]] ; then - # We are on GAEA. + # We are on GAEA (C5 or C6). if ( ! eval module help > /dev/null 2>&1 ) ; then source /usr/share/lmod/lmod/init/bash source /etc/profile diff --git a/workflow/hosts.py b/workflow/hosts.py index 7bde58f95f..67e548d2a4 100644 --- a/workflow/hosts.py +++ b/workflow/hosts.py @@ -48,7 +48,7 @@ def detect(cls): machine = 'WCOSS2' elif os.path.exists('/data/prod'): machine = 'S4' - elif os.path.exists('/gpfs/f5'): + elif os.path.exists('/gpfs/f5') or os.path.exists('/gpfs/f6'): machine = 'GAEA' elif container is not None: machine = 'CONTAINER' From 7ed5e8dddffc96fe4b72ae6b3f026710cc816002 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Thu, 14 Nov 2024 11:01:58 -0500 Subject: [PATCH 04/25] new files for c5/c6 --- modulefiles/module_base.gaeac5.lua | 48 +++++++++++++++++++++++++++ modulefiles/module_base.gaeac6.lua | 48 +++++++++++++++++++++++++++ modulefiles/module_gwsetup.gaeac5.lua | 20 +++++++++++ modulefiles/module_gwsetup.gaeac6.lua | 20 +++++++++++ workflow/hosts/gaeac5.yaml | 30 +++++++++++++++++ workflow/hosts/gaeac6.yaml | 29 ++++++++++++++++ 6 files changed, 195 insertions(+) create mode 100644 modulefiles/module_base.gaeac5.lua create mode 100644 modulefiles/module_base.gaeac6.lua create mode 100644 modulefiles/module_gwsetup.gaeac5.lua create mode 100644 modulefiles/module_gwsetup.gaeac6.lua create mode 100644 workflow/hosts/gaeac5.yaml create mode 100644 workflow/hosts/gaeac6.yaml diff --git a/modulefiles/module_base.gaeac5.lua b/modulefiles/module_base.gaeac5.lua new file mode 100644 index 0000000000..f379225380 --- /dev/null +++ b/modulefiles/module_base.gaeac5.lua @@ -0,0 +1,48 @@ +help([[ +Load environment to run GFS on Gaea +]]) + +local spack_mod_path=(os.getenv("spack_mod_path") or "None") +prepend_path("MODULEPATH", spack_mod_path) + +load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) +load(pathJoin("stack-cray-mpich", (os.getenv("stack_cray_mpich_ver") or "None"))) +load(pathJoin("python", (os.getenv("python_ver") or "None"))) + +load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) +load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) +load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) +load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) +load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) +load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) +load(pathJoin("perlbrew", (os.getenv("perl_ver") or "None"))) + +load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) +load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) +load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) +load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) +load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) +load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) +load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) +load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) +load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) +load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "None"))) +load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) +load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) +load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) +load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) +load(pathJoin("met", (os.getenv("met_ver") or "None"))) +load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) +load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) + +setenv("WGRIB2","wgrib2") +setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) + +prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) + +prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) +load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) + + +whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_base.gaeac6.lua b/modulefiles/module_base.gaeac6.lua new file mode 100644 index 0000000000..f379225380 --- /dev/null +++ b/modulefiles/module_base.gaeac6.lua @@ -0,0 +1,48 @@ +help([[ +Load environment to run GFS on Gaea +]]) + +local spack_mod_path=(os.getenv("spack_mod_path") or "None") +prepend_path("MODULEPATH", spack_mod_path) + +load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) +load(pathJoin("stack-cray-mpich", (os.getenv("stack_cray_mpich_ver") or "None"))) +load(pathJoin("python", (os.getenv("python_ver") or "None"))) + +load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) +load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) +load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) +load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) +load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) +load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) +load(pathJoin("perlbrew", (os.getenv("perl_ver") or "None"))) + +load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) +load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) +load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) +load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) +load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) +load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) +load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) +load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) +load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) +load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "None"))) +load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) +load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) +load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) +load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) +load(pathJoin("met", (os.getenv("met_ver") or "None"))) +load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) +load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) + +setenv("WGRIB2","wgrib2") +setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) + +prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) + +prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) +load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) + + +whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_gwsetup.gaeac5.lua b/modulefiles/module_gwsetup.gaeac5.lua new file mode 100644 index 0000000000..0bcc689bad --- /dev/null +++ b/modulefiles/module_gwsetup.gaeac5.lua @@ -0,0 +1,20 @@ +help([[ +Load environment to run GFS workflow setup scripts on Gaea +]]) + +prepend_path("MODULEPATH", "/ncrc/proj/epic/rocoto/modulefiles") +load(pathJoin("rocoto")) + +prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") + +local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0" +local python_ver=os.getenv("python_ver") or "3.10.13" + +load(pathJoin("stack-intel", stack_intel_ver)) +load(pathJoin("python", python_ver)) +load("py-jinja2") +load("py-pyyaml") +load("py-numpy") +load("git-lfs") + +whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_gwsetup.gaeac6.lua b/modulefiles/module_gwsetup.gaeac6.lua new file mode 100644 index 0000000000..7893d7414c --- /dev/null +++ b/modulefiles/module_gwsetup.gaeac6.lua @@ -0,0 +1,20 @@ +help([[ +Load environment to run GFS workflow setup scripts on Gaea +]]) + +prepend_path("MODULEPATH", "/ncrc/proj/epic/rocoto/modulefiles") +load(pathJoin("rocoto")) + +prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") + +local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0" +local python_ver=os.getenv("python_ver") or "3.10.13" + +load(pathJoin("stack-intel", stack_intel_ver)) +load(pathJoin("python", python_ver)) +load("py-jinja2") +load("py-pyyaml") +load("py-numpy") +load("git-lfs") + +whatis("Description: GFS run setup environment") diff --git a/workflow/hosts/gaeac5.yaml b/workflow/hosts/gaeac5.yaml new file mode 100644 index 0000000000..cb2d070a05 --- /dev/null +++ b/workflow/hosts/gaeac5.yaml @@ -0,0 +1,30 @@ +BASE_GIT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git' +DMPDIR: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/dump' +BASE_DATA: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data' +BASE_IC: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/ICSDIR' +PACKAGEROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/nwpara' +COMROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/com' +COMINsyn: '${COMROOT}/gfs/prod/syndat' +NOSCRUB: $HOMEDIR +ACCOUNT: ufs-ard +HOMEDIR: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' +STMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' +PTMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' +NOSCRUB: $HOMEDIR +SCHEDULER: slurm +QUEUE: normal +QUEUE_SERVICE: hpss +PARTITION_BATCH: batch +PARTITION_SERVICE: dtn_f5_f6 +RESERVATION: '' +CLUSTERS: 'c5,es' +CHGRP_RSTPROD: 'NO' +CHGRP_CMD: 'chgrp rstprod' +HPSSARCH: 'NO' +HPSS_PROJECT: emc-global +LOCALARCH: 'NO' +ATARDIR: '${NOSCRUB}/archive_rotdir/${PSLOT}' +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' +SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] +AERO_INPUTS_DIR: /gpfs/f5/epic/proj-shared/global/glopara/data/gocart_emissions diff --git a/workflow/hosts/gaeac6.yaml b/workflow/hosts/gaeac6.yaml new file mode 100644 index 0000000000..ef10d1b14e --- /dev/null +++ b/workflow/hosts/gaeac6.yaml @@ -0,0 +1,29 @@ +BASE_GIT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/git' +DMPDIR: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/dump' +BASE_DATA: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data' +BASE_IC: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/ICSDIR' +PACKAGEROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/nwpara' +COMROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/com' +COMINsyn: '${COMROOT}/gfs/prod/syndat' +NOSCRUB: $HOMEDIR +ACCOUNT: ufs-ard +HOMEDIR: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' +STMP: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' +PTMP: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' +NOSCRUB: $HOMEDIR +SCHEDULER: slurm +QUEUE: normal +QUEUE_SERVICE: hpss +PARTITION_BATCH: batch +PARTITION_SERVICE: dtn_f5_f6 +RESERVATION: '' +CLUSTERS: 'c6,es' +CHGRP_RSTPROD: 'NO' +CHGRP_CMD: 'chgrp rstprod' +HPSSARCH: 'NO' +HPSS_PROJECT: emc-global +LOCALARCH: 'NO' +ATARDIR: '${NOSCRUB}/archive_rotdir/${PSLOT}' +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' +SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] From f2a4679388e3e8d4b347a6fa3c9addd392072af5 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Thu, 14 Nov 2024 11:22:33 -0500 Subject: [PATCH 05/25] gaea -> gaeac5 --- ush/load_ufsda_modules.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/load_ufsda_modules.sh b/ush/load_ufsda_modules.sh index ab0e4c18fc..017ff31d68 100755 --- a/ush/load_ufsda_modules.sh +++ b/ush/load_ufsda_modules.sh @@ -34,7 +34,7 @@ source "${HOMEgfs}/ush/module-setup.sh" module use "${HOMEgfs}/sorc/gdas.cd/modulefiles" case "${MACHINE_ID}" in - ("hera" | "orion" | "hercules" | "gaea" | "gaeac6" | "wcoss2") + ("hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "wcoss2") module load "${MODS}/${MACHINE_ID}" ncdump=$( command -v ncdump ) NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) From a0ce4ec43b5ed6e86c9e1d8a213af2b46a2366f4 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Thu, 14 Nov 2024 11:26:00 -0500 Subject: [PATCH 06/25] no longer needed --- workflow/hosts/gaea.yaml | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 workflow/hosts/gaea.yaml diff --git a/workflow/hosts/gaea.yaml b/workflow/hosts/gaea.yaml deleted file mode 100644 index ef31f49806..0000000000 --- a/workflow/hosts/gaea.yaml +++ /dev/null @@ -1,29 +0,0 @@ -BASE_GIT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git' -DMPDIR: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/dump' -BASE_DATA: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data' -BASE_IC: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/ICSDIR' -PACKAGEROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/nwpara' -COMROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/com' -COMINsyn: '${COMROOT}/gfs/prod/syndat' -ACCOUNT: ufs-ard -HOMEDIR: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' -STMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' -PTMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' -NOSCRUB: $HOMEDIR -SCHEDULER: slurm -QUEUE: normal -QUEUE_SERVICE: hpss -PARTITION_BATCH: batch -PARTITION_SERVICE: dtn_f5_f6 -RESERVATION: '' -CLUSTERS: 'c5,es' -CHGRP_RSTPROD: 'NO' -CHGRP_CMD: 'chgrp rstprod' -HPSSARCH: 'NO' -HPSS_PROJECT: emc-global -LOCALARCH: 'NO' -ATARDIR: '${NOSCRUB}/archive_rotdir/${PSLOT}' -MAKE_NSSTBUFR: 'NO' -MAKE_ACFTBUFR: 'NO' -SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] -AERO_INPUTS_DIR: /gpfs/f5/epic/proj-shared/global/glopara/data/gocart_emissions From 779b3138a8d65d34c8359a4e77657ec49e97d026 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 11:00:19 -0500 Subject: [PATCH 07/25] update dir names --- modulefiles/module_base.gaeac6.lua | 6 +++--- parm/config/gfs/config.resources | 7 ++++++- sorc/link_workflow.sh | 3 ++- ush/load_fv3gfs_modules.sh | 3 ++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/modulefiles/module_base.gaeac6.lua b/modulefiles/module_base.gaeac6.lua index f379225380..a63f42e55f 100644 --- a/modulefiles/module_base.gaeac6.lua +++ b/modulefiles/module_base.gaeac6.lua @@ -38,10 +38,10 @@ load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) setenv("WGRIB2","wgrib2") setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) -prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +prepend_path("MODULEPATH", pathJoin("/gpfs/f6/ira-da/world-shared/global/glopara/data/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) - -prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) + +prepend_path("MODULEPATH", pathJoin("/gpfs/f6/ira-da/world-shared/global/glopara/data/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index cddd1643fd..2a06eba3b7 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -47,7 +47,12 @@ case ${machine} in # shellcheck disable=SC2034 mem_node_max="96GB" ;; - "GAEA") + "GAEAC5") + max_tasks_per_node=128 + # shellcheck disable=SC2034 + mem_node_max="251GB" + ;; + "GAEAC6") max_tasks_per_node=128 # shellcheck disable=SC2034 mem_node_max="251GB" diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index b35b7ff35a..9153eff0d2 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -75,7 +75,8 @@ case "${machine}" in "hercules") FIX_DIR="/work/noaa/global/glopara/fix" ;; "jet") FIX_DIR="/lfs5/HFIP/hfv3gfs/glopara/FIX/fix" ;; "s4") FIX_DIR="/data/prod/glopara/fix" ;; - "gaea") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/data/fix" ;; + "gaeac5") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/data/fix" ;; + "gaeac6") FIX_DIR="/gpfs/f6/ira-da/world-shared/global/glopara/data/fix" ;; "noaacloud") FIX_DIR="/contrib/global-workflow-shared-data/fix" ;; *) echo "FATAL: Unknown target machine ${machine}, couldn't set FIX_DIR" diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index ff6f64cece..f5f63bd7ff 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -18,9 +18,10 @@ source "${HOMEgfs}/versions/run.ver" # Load our modules: module use "${HOMEgfs}/modulefiles" +echo "${HOMEgfs}/modulefiles" case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaea" | "jet" | "s4" | "noaacloud") + "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud") module load "module_base.${MACHINE_ID}" ;; *) From f266c4910da708053c7b46ef8ead84122e53fcdd Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 11:00:59 -0500 Subject: [PATCH 08/25] separate files for c5, c6 --- env/{GAEA.env => GAEAC5.ENV} | 0 env/GAEAC6.env | 293 +++++++++++++++++++++++++++++++++++ 2 files changed, 293 insertions(+) rename env/{GAEA.env => GAEAC5.ENV} (100%) create mode 100755 env/GAEAC6.env diff --git a/env/GAEA.env b/env/GAEAC5.ENV similarity index 100% rename from env/GAEA.env rename to env/GAEAC5.ENV diff --git a/env/GAEAC6.env b/env/GAEAC6.env new file mode 100755 index 0000000000..f34ddcd1d5 --- /dev/null +++ b/env/GAEAC6.env @@ -0,0 +1,293 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 + +ulimit -s unlimited +ulimit -a + +# Calculate common variables +# Check first if the dependent variables are set +if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then + max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) + NTHREADSmax=${threads_per_task:-${max_threads_per_task}} + NTHREADS1=${threads_per_task:-1} + [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} + [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} + APRUN_default="${launcher} -n ${ntasks}" +else + echo "ERROR config.resources must be sourced before sourcing GAEA.env" + exit 2 +fi + +case ${step} in + "prep" | "prepbufr") + + export POE="NO" + export BACK=${BACK:-"YES"} + export sys_tp="GAEA" + export launcher_PREP="srun" + ;; + "prep_emissions") + + export APRUN="${APRUN_default}" + ;; + "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll") + + export CFP_MP="YES" + [[ "${step}" = "waveprep" ]] && export MP_PULSE=0 + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + + ;; + "atmanlvar") + + export NTHREADS_ATMANLVAR=${NTHREADSmax} + export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" + ;; + "atmanlfv3inc") + + export NTHREADS_ATMANLFV3INC=${NTHREADSmax} + export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" + ;; + "atmensanlobs") + + export NTHREADS_ATMENSANLOBS=${NTHREADSmax} + export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" + ;; + "atmensanlsol") + + export NTHREADS_ATMENSANLSOL=${NTHREADSmax} + export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" + ;; + "atmensanlletkf") + + export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} + export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" + ;; + "atmensanlfv3inc") + + export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} + export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" + ;; + "aeroanlvar") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_AEROANL=${NTHREADSmax} + export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" + ;; + "aeroanlgenb") + + export NTHREADS_AEROANLGENB=${NTHREADSmax} + export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" + ;; + "prepobsaero") + + export NTHREADS_PREPOBSAERO=${NTHREADS1} + export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" +;; + "snowanl") + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_SNOWANL=${NTHREADSmax} + export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + ;; + "esnowrecen") + + export NTHREADS_ESNOWRECEN=${NTHREADSmax} + export APRUN_ESNOWRECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ESNOWRECEN}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + ;; + + "marinebmat") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + export APRUN_MARINEBMAT="${APRUN_default}" + ;; + "marineanlvar") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + export APRUN_MARINEANLVAR="${APRUN_default}" + ;; +"ocnanalecen") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + max_threads_per_task=$((max_tasks_per_node / tasks_per_node_ocnanalecen)) + + export NTHREADS_OCNANALECEN=${threads_per_task_ocnanalecen:-${max_threads_per_task}} + [[ ${NTHREADS_OCNANALECEN} -gt ${max_threads_per_task} ]] && export NTHREADS_OCNANALECEN=${max_threads_per_task} + export APRUN_OCNANALECEN="${launcher} -n ${ntasks_ocnanalecen} --cpus-per-task=${NTHREADS_OCNANALECEN}" +;; + "marineanlchkpt") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_OCNANAL=${NTHREADSmax} + export APRUN_MARINEANLCHKPT="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANAL}" + ;; + "anal" | "analcalc") + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_GSI=${threads_per_task_anal:-${max_threads_per_task}} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} + export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-12} + [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} + ntasks_cycle=${ntiles:-6} + export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}" + + export NTHREADS_GAUSFCANL=1 + ntasks_gausfcanl=${ntasks_gausfcanl:-1} + export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" + ;; + "sfcanl") + + export NTHREADS_CYCLE=${threads_per_task:-14} + [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + ;; + "eobs") + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_GSI=${NTHREADSmax} + [[ ${NTHREADS_GSI} -gt ${max_threads_per_task} ]] && export NTHREADS_GSI=${max_threads_per_task} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + ;; + "eupd") + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_ENKF=${NTHREADSmax} + export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" + ;; + "fcst" | "efcs") + + export OMP_STACKSIZE=512M + + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) + (( ufs_ntasks = nnodes*tasks_per_node )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ufs_ntasks}" + unset nnodes ufs_ntasks + ;; + + "upp") + + export NTHREADS_UPP=${NTHREADS1} + export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" + ;; + + "atmos_products") + + export USE_CFP="YES" # Use MPMD for downstream product generation + ;; + +"oceanice_products") + + export NTHREADS_OCNICEPOST=${NTHREADS1} + export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" +;; + + "ecen") + + export NTHREADS_ECEN=${NTHREADSmax} + export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node} + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + + ;; + "esfc") + + export NTHREADS_ESFC=${NTHREADSmax} + export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-14} + [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + ;; + "epos") + + export NTHREADS_EPOS=${NTHREADSmax} + export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" + + ;; + "postsnd") + + export CFP_MP="YES" + + export NTHREADS_POSTSND=${NTHREADS1} + export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" + + export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} + [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]] && export NTHREADS_POSTSNDCFP=${max_threads_per_task} + export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" + + ;; + "awips") + + export NTHREADS_AWIPS=${NTHREADS1} + export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" + + ;; + "gempak") + + echo "WARNING: ${step} is not enabled on ${machine}!" + + ;; + "fit2obs") + + export NTHREADS_FIT2OBS=${NTHREADS1} + export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" + + ;; + *) + # Some other job not yet defined here + echo "WARNING: The job step ${step} does not specify Gaea-specific resources" + ;; +esac From 69d4edac835008857f6fd9089a5d27cddefcd55c Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 11:02:23 -0500 Subject: [PATCH 09/25] separate files for c5, c6 --- parm/config/gfs/config.resources.GAEA | 33 ----------------------- parm/config/gfs/config.resources.GAEAC5 | 1 + parm/config/gfs/config.resources.GAEAC6 | 1 + versions/{run.gaea.ver => run.gaeac5.ver} | 0 versions/run.gaeac6.ver | 8 ++++++ 5 files changed, 10 insertions(+), 33 deletions(-) delete mode 100644 parm/config/gfs/config.resources.GAEA create mode 120000 parm/config/gfs/config.resources.GAEAC5 create mode 120000 parm/config/gfs/config.resources.GAEAC6 rename versions/{run.gaea.ver => run.gaeac5.ver} (100%) create mode 100644 versions/run.gaeac6.ver diff --git a/parm/config/gfs/config.resources.GAEA b/parm/config/gfs/config.resources.GAEA deleted file mode 100644 index c50601da00..0000000000 --- a/parm/config/gfs/config.resources.GAEA +++ /dev/null @@ -1,33 +0,0 @@ -#! /usr/bin/env bash - -# Gaea-specific job resources - -case ${step} in - "prep") - # Run on two nodes (requires ~400GB total) - tasks_per_node=7 - ;; - - "eobs") - # The number of tasks and cores used must be the same for eobs - # See https://github.com/NOAA-EMC/global-workflow/issues/2092 for details - case ${CASE} in - "C768" | "C384") - export tasks_per_node=50 - ;; - *) - export tasks_per_node=40 - ;; - esac - ;; - - *) - ;; - -esac - -unset memory -# shellcheck disable=SC2312 -for mem_var in $(env | grep '^memory_' | cut -d= -f1); do - unset "${mem_var}" -done diff --git a/parm/config/gfs/config.resources.GAEAC5 b/parm/config/gfs/config.resources.GAEAC5 new file mode 120000 index 0000000000..4cc203ee40 --- /dev/null +++ b/parm/config/gfs/config.resources.GAEAC5 @@ -0,0 +1 @@ +config.resources.GAEA \ No newline at end of file diff --git a/parm/config/gfs/config.resources.GAEAC6 b/parm/config/gfs/config.resources.GAEAC6 new file mode 120000 index 0000000000..4cc203ee40 --- /dev/null +++ b/parm/config/gfs/config.resources.GAEAC6 @@ -0,0 +1 @@ +config.resources.GAEA \ No newline at end of file diff --git a/versions/run.gaea.ver b/versions/run.gaeac5.ver similarity index 100% rename from versions/run.gaea.ver rename to versions/run.gaeac5.ver diff --git a/versions/run.gaeac6.ver b/versions/run.gaeac6.ver new file mode 100644 index 0000000000..4162cc63e3 --- /dev/null +++ b/versions/run.gaeac6.ver @@ -0,0 +1,8 @@ +export stack_intel_ver=2023.2.0 +export stack_cray_mpich_ver=8.1.29 +export spack_env=gsi-addon + +export perl_ver=5.38.2 + +source "${HOMEgfs:-}/versions/spack.ver" +export spack_mod_path="/ncrc/proj/epic/spack-stack/c6/spack-stack-${spack_stack_ver}/envs/${spack_env}/install/modulefiles/Core" From a448782da549be56a7fa4cb69f304b2b1dc55609 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 11:03:01 -0500 Subject: [PATCH 10/25] update --- workflow/hosts.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/workflow/hosts.py b/workflow/hosts.py index 67e548d2a4..5058b30ca9 100644 --- a/workflow/hosts.py +++ b/workflow/hosts.py @@ -16,7 +16,7 @@ class Host: """ SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', 'HERCULES', - 'WCOSS2', 'S4', 'CONTAINER', 'GAEA', + 'WCOSS2', 'S4', 'CONTAINER', 'GAEAC5', 'GAEAC6', 'AWSPW', 'AZUREPW', 'GOOGLEPW'] def __init__(self, host=None): @@ -48,8 +48,10 @@ def detect(cls): machine = 'WCOSS2' elif os.path.exists('/data/prod'): machine = 'S4' - elif os.path.exists('/gpfs/f5') or os.path.exists('/gpfs/f6'): - machine = 'GAEA' + elif os.path.exists('/gpfs/f5'): + machine = 'GAEAC5' + elif os.path.exists('/gpfs/f6'): + machine = 'GAEAC6' elif container is not None: machine = 'CONTAINER' elif pw_csp is not None: From 6d74c5f7a11e8dc847a874a175b5ad6ff2e1a144 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 11:03:45 -0500 Subject: [PATCH 11/25] update --- workflow/hosts/gaeac6.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflow/hosts/gaeac6.yaml b/workflow/hosts/gaeac6.yaml index ef10d1b14e..b1d4f02b9b 100644 --- a/workflow/hosts/gaeac6.yaml +++ b/workflow/hosts/gaeac6.yaml @@ -1,12 +1,12 @@ BASE_GIT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/git' -DMPDIR: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/dump' +#DMPDIR: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/dump' BASE_DATA: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data' BASE_IC: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/ICSDIR' PACKAGEROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/nwpara' -COMROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/com' +#COMROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/com' COMINsyn: '${COMROOT}/gfs/prod/syndat' NOSCRUB: $HOMEDIR -ACCOUNT: ufs-ard +ACCOUNT: ira-da HOMEDIR: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' STMP: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' PTMP: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' @@ -17,7 +17,7 @@ QUEUE_SERVICE: hpss PARTITION_BATCH: batch PARTITION_SERVICE: dtn_f5_f6 RESERVATION: '' -CLUSTERS: 'c6,es' +CLUSTERS: 'c6' CHGRP_RSTPROD: 'NO' CHGRP_CMD: 'chgrp rstprod' HPSSARCH: 'NO' From 3e87509d9b470876223eb571e540752d4de43942 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 11:48:39 -0500 Subject: [PATCH 12/25] update c5 modulefile --- modulefiles/module_base.gaea.lua | 48 --------------------------- modulefiles/module_gwsetup.gaeac5.lua | 2 +- 2 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 modulefiles/module_base.gaea.lua diff --git a/modulefiles/module_base.gaea.lua b/modulefiles/module_base.gaea.lua deleted file mode 100644 index f379225380..0000000000 --- a/modulefiles/module_base.gaea.lua +++ /dev/null @@ -1,48 +0,0 @@ -help([[ -Load environment to run GFS on Gaea -]]) - -local spack_mod_path=(os.getenv("spack_mod_path") or "None") -prepend_path("MODULEPATH", spack_mod_path) - -load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) -load(pathJoin("stack-cray-mpich", (os.getenv("stack_cray_mpich_ver") or "None"))) -load(pathJoin("python", (os.getenv("python_ver") or "None"))) - -load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) -load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) -load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) -load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) -load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) -load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) -load(pathJoin("perlbrew", (os.getenv("perl_ver") or "None"))) - -load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) -load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) -load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) -load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) -load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) -load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) -load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) -load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) -load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "None"))) -load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) -load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) -load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) -load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) -load(pathJoin("met", (os.getenv("met_ver") or "None"))) -load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) -load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) - -setenv("WGRIB2","wgrib2") -setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - -prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) -load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) - -prepend_path("MODULEPATH", pathJoin("/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) -load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) - - -whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_gwsetup.gaeac5.lua b/modulefiles/module_gwsetup.gaeac5.lua index 0bcc689bad..24aa2f75e6 100644 --- a/modulefiles/module_gwsetup.gaeac5.lua +++ b/modulefiles/module_gwsetup.gaeac5.lua @@ -7,7 +7,7 @@ load(pathJoin("rocoto")) prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") -local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0" +local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0" local python_ver=os.getenv("python_ver") or "3.10.13" load(pathJoin("stack-intel", stack_intel_ver)) From d4888c4b55cb17448fb2f236da900da1724bd659 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 14:29:47 -0500 Subject: [PATCH 13/25] update max_tasks_per_node for c6 --- parm/config/gfs/config.resources | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 2a06eba3b7..9639192519 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -53,7 +53,7 @@ case ${machine} in mem_node_max="251GB" ;; "GAEAC6") - max_tasks_per_node=128 + max_tasks_per_node=192 # shellcheck disable=SC2034 mem_node_max="251GB" ;; From 936c0574fc44f00a2ed6f3b0e6bab3aa0cf55678 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 14:34:39 -0500 Subject: [PATCH 14/25] update --- env/{GAEAC5.ENV => GAEAC5.env} | 2 +- env/GAEAC6.env | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename env/{GAEAC5.ENV => GAEAC5.env} (99%) diff --git a/env/GAEAC5.ENV b/env/GAEAC5.env similarity index 99% rename from env/GAEAC5.ENV rename to env/GAEAC5.env index f34ddcd1d5..1ca1dbe844 100755 --- a/env/GAEAC5.ENV +++ b/env/GAEAC5.env @@ -288,6 +288,6 @@ case ${step} in ;; *) # Some other job not yet defined here - echo "WARNING: The job step ${step} does not specify Gaea-specific resources" + echo "WARNING: The job step ${step} does not specify GaeaC5-specific resources" ;; esac diff --git a/env/GAEAC6.env b/env/GAEAC6.env index f34ddcd1d5..264d31ce2a 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -288,6 +288,6 @@ case ${step} in ;; *) # Some other job not yet defined here - echo "WARNING: The job step ${step} does not specify Gaea-specific resources" + echo "WARNING: The job step ${step} does not specify GaeaC6-specific resources" ;; esac From bed74a0bc69c28f0093cbb100117fa06a29ae69d Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 14:57:13 -0500 Subject: [PATCH 15/25] fix for build gdas app on c5,c6 --- sorc/build_all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sorc/build_all.sh b/sorc/build_all.sh index e75c853c39..e8a3fe482d 100755 --- a/sorc/build_all.sh +++ b/sorc/build_all.sh @@ -149,7 +149,7 @@ build_opts["ww3prepost"]="${_wave_opt} ${_verbose_opt} ${_build_ufs_opt} ${_buil # Optional DA builds if [[ "${_build_ufsda}" == "YES" ]]; then - if [[ "${MACHINE_ID}" != "orion" && "${MACHINE_ID}" != "hera" && "${MACHINE_ID}" != "hercules" && "${MACHINE_ID}" != "wcoss2" && "${MACHINE_ID}" != "noaacloud" && "${MACHINE_ID}" != "gaea" ]]; then + if [[ "${MACHINE_ID}" != "orion" && "${MACHINE_ID}" != "hera" && "${MACHINE_ID}" != "hercules" && "${MACHINE_ID}" != "wcoss2" && "${MACHINE_ID}" != "noaacloud" && "${MACHINE_ID}" != "gaeac5" && "${MACHINE_ID}" != "gaeac6" ]]; then echo "NOTE: The GDAS App is not supported on ${MACHINE_ID}. Disabling build." else build_jobs["gdas"]=8 From f384bc28c76130ef4f5270a71aa29ce13761a812 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 15:14:44 -0500 Subject: [PATCH 16/25] update --- workflow/hosts/gaeac5.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflow/hosts/gaeac5.yaml b/workflow/hosts/gaeac5.yaml index cb2d070a05..dd6744dd26 100644 --- a/workflow/hosts/gaeac5.yaml +++ b/workflow/hosts/gaeac5.yaml @@ -1,12 +1,12 @@ BASE_GIT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/git' -DMPDIR: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/dump' +#DMPDIR: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/dump' BASE_DATA: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data' BASE_IC: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/ICSDIR' PACKAGEROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/nwpara' -COMROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/com' +#COMROOT: '/gpfs/f5/ufs-ard/world-shared/global/glopara/data/com' COMINsyn: '${COMROOT}/gfs/prod/syndat' NOSCRUB: $HOMEDIR -ACCOUNT: ufs-ard +ACCOUNT: nggps_psd HOMEDIR: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' STMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' PTMP: '/gpfs/f5/${ACCOUNT}/scratch/${USER}' @@ -17,7 +17,7 @@ QUEUE_SERVICE: hpss PARTITION_BATCH: batch PARTITION_SERVICE: dtn_f5_f6 RESERVATION: '' -CLUSTERS: 'c5,es' +CLUSTERS: 'c5' CHGRP_RSTPROD: 'NO' CHGRP_CMD: 'chgrp rstprod' HPSSARCH: 'NO' From 07e7cd50ec58311c2894e8db443477a99f24f093 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 15:17:45 -0500 Subject: [PATCH 17/25] update links for fix dirs --- sorc/link_workflow.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 9153eff0d2..58d6ac5b81 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -75,8 +75,8 @@ case "${machine}" in "hercules") FIX_DIR="/work/noaa/global/glopara/fix" ;; "jet") FIX_DIR="/lfs5/HFIP/hfv3gfs/glopara/FIX/fix" ;; "s4") FIX_DIR="/data/prod/glopara/fix" ;; - "gaeac5") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/data/fix" ;; - "gaeac6") FIX_DIR="/gpfs/f6/ira-da/world-shared/global/glopara/data/fix" ;; + "gaeac5") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/fix" ;; + "gaeac6") FIX_DIR="/gpfs/f6/bil-fire8/world-shared/global/glopara/fix" ;; "noaacloud") FIX_DIR="/contrib/global-workflow-shared-data/fix" ;; *) echo "FATAL: Unknown target machine ${machine}, couldn't set FIX_DIR" From 775d04deabfd000326378081d833bde15d7496bb Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 21:57:09 -0500 Subject: [PATCH 18/25] update --- parm/config/gfs/config.resources.GAEAC5 | 36 ++++++++++++++++++++++++- parm/config/gfs/config.resources.GAEAC6 | 36 ++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) mode change 120000 => 100644 parm/config/gfs/config.resources.GAEAC5 mode change 120000 => 100644 parm/config/gfs/config.resources.GAEAC6 diff --git a/parm/config/gfs/config.resources.GAEAC5 b/parm/config/gfs/config.resources.GAEAC5 deleted file mode 120000 index 4cc203ee40..0000000000 --- a/parm/config/gfs/config.resources.GAEAC5 +++ /dev/null @@ -1 +0,0 @@ -config.resources.GAEA \ No newline at end of file diff --git a/parm/config/gfs/config.resources.GAEAC5 b/parm/config/gfs/config.resources.GAEAC5 new file mode 100644 index 0000000000..fe3bc96153 --- /dev/null +++ b/parm/config/gfs/config.resources.GAEAC5 @@ -0,0 +1,35 @@ +#! /usr/bin/env bash + +# GaeaC5-specific job resources + +case ${step} in + "prep") + # Run on two nodes (requires ~400GB total) + tasks_per_node=7 + ;; + + "eobs") + # The number of tasks and cores used must be the same for eobs + # See https://github.com/NOAA-EMC/global-workflow/issues/2092 for details + case ${CASE} in + "C768" | "C384") + export tasks_per_node=50 + ;; + *) + export tasks_per_node=40 + ;; + esac + ;; + + *) + ;; + +esac + +export FI_VERBS_PREFER_XRC=0 + +unset memory +# shellcheck disable=SC2312 +for mem_var in $(env | grep '^memory_' | cut -d= -f1); do + unset "${mem_var}" +done diff --git a/parm/config/gfs/config.resources.GAEAC6 b/parm/config/gfs/config.resources.GAEAC6 deleted file mode 120000 index 4cc203ee40..0000000000 --- a/parm/config/gfs/config.resources.GAEAC6 +++ /dev/null @@ -1 +0,0 @@ -config.resources.GAEA \ No newline at end of file diff --git a/parm/config/gfs/config.resources.GAEAC6 b/parm/config/gfs/config.resources.GAEAC6 new file mode 100644 index 0000000000..fe3bc96153 --- /dev/null +++ b/parm/config/gfs/config.resources.GAEAC6 @@ -0,0 +1,35 @@ +#! /usr/bin/env bash + +# GaeaC5-specific job resources + +case ${step} in + "prep") + # Run on two nodes (requires ~400GB total) + tasks_per_node=7 + ;; + + "eobs") + # The number of tasks and cores used must be the same for eobs + # See https://github.com/NOAA-EMC/global-workflow/issues/2092 for details + case ${CASE} in + "C768" | "C384") + export tasks_per_node=50 + ;; + *) + export tasks_per_node=40 + ;; + esac + ;; + + *) + ;; + +esac + +export FI_VERBS_PREFER_XRC=0 + +unset memory +# shellcheck disable=SC2312 +for mem_var in $(env | grep '^memory_' | cut -d= -f1); do + unset "${mem_var}" +done From e30007c74c685bfaf42de7edcb4eefd2bc65351e Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 21:59:23 -0500 Subject: [PATCH 19/25] not needed --- modulefiles/module_gwsetup.gaea.lua | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 modulefiles/module_gwsetup.gaea.lua diff --git a/modulefiles/module_gwsetup.gaea.lua b/modulefiles/module_gwsetup.gaea.lua deleted file mode 100644 index 0bcc689bad..0000000000 --- a/modulefiles/module_gwsetup.gaea.lua +++ /dev/null @@ -1,20 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts on Gaea -]]) - -prepend_path("MODULEPATH", "/ncrc/proj/epic/rocoto/modulefiles") -load(pathJoin("rocoto")) - -prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") - -local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0" -local python_ver=os.getenv("python_ver") or "3.10.13" - -load(pathJoin("stack-intel", stack_intel_ver)) -load(pathJoin("python", python_ver)) -load("py-jinja2") -load("py-pyyaml") -load("py-numpy") -load("git-lfs") - -whatis("Description: GFS run setup environment") From 38a1060cc071d3c9ab0eae6772717bd7eeabc0fb Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Fri, 15 Nov 2024 22:14:11 -0500 Subject: [PATCH 20/25] update --- versions/build.gaeac5.ver | 5 +++++ versions/run.gaeac5.ver | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 versions/build.gaeac5.ver diff --git a/versions/build.gaeac5.ver b/versions/build.gaeac5.ver new file mode 100644 index 0000000000..1b473aa297 --- /dev/null +++ b/versions/build.gaeac5.ver @@ -0,0 +1,5 @@ +export stack_intel_ver=2023.2.0 +export stack_cray_mpich_ver=8.1.28 +export spack_env=gsi-addon-dev +source "${HOMEgfs:-}/versions/spack.ver" +export spack_mod_path="/ncrc/proj/epic/spack-stack/spack-stack-${spack_stack_ver}/envs/${spack_env}/install/modulefiles/Core" diff --git a/versions/run.gaeac5.ver b/versions/run.gaeac5.ver index 81aa70df57..9d4075f334 100644 --- a/versions/run.gaeac5.ver +++ b/versions/run.gaeac5.ver @@ -1,5 +1,5 @@ -export stack_intel_ver=2023.1.0 -export stack_cray_mpich_ver=8.1.25 +export stack_intel_ver=2023.2.0 +export stack_cray_mpich_ver=8.1.28 export spack_env=gsi-addon-dev export perl_ver=5.38.2 From c2c87d426c8bdce7349b6bd70fa1d0a3f5a1499c Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Wed, 20 Nov 2024 18:20:07 -0500 Subject: [PATCH 21/25] fix output frequency for cice history files (to every 3h, not every dt) --- ush/parsing_namelists_CICE.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/parsing_namelists_CICE.sh b/ush/parsing_namelists_CICE.sh index 3822094c97..51b5143bc3 100755 --- a/ush/parsing_namelists_CICE.sh +++ b/ush/parsing_namelists_CICE.sh @@ -70,7 +70,7 @@ local CICE_RESTART_FORMAT="pnetcdf2" local CICE_DUMPFREQ="y" # "h","d","m" or "y" for restarts at intervals of "hours", "days", "months" or "years" local CICE_DUMPFREQ_N=10000 # Set this to a really large value, as cice, mom6 and cmeps restart interval is controlled by ufs.configure local CICE_DIAGFREQ=$(( 86400 / DT_CICE )) # frequency of diagnostic output in timesteps, recommended for 1x per day -local CICE_HISTFREQ_N="0, 0, ${FHOUT_ICE}, 1, 1" +local CICE_HISTFREQ_N="0, 0, ${FHOUT_ICE}, 0, 1" if [[ "${RUN}" =~ "gdas" ]]; then local CICE_HIST_AVG=".false., .false., .false., .false., .false." # DA needs instantaneous else From 594cd5293e99b7979f35cb0340c0e85581214c82 Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Wed, 20 Nov 2024 18:21:46 -0500 Subject: [PATCH 22/25] update --- workflow/hosts/gaeac6.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/workflow/hosts/gaeac6.yaml b/workflow/hosts/gaeac6.yaml index b1d4f02b9b..2d9a2838ed 100644 --- a/workflow/hosts/gaeac6.yaml +++ b/workflow/hosts/gaeac6.yaml @@ -1,9 +1,9 @@ -BASE_GIT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/git' -#DMPDIR: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/dump' -BASE_DATA: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data' -BASE_IC: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/ICSDIR' -PACKAGEROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/nwpara' -#COMROOT: '/gpfs/f6/ufs-ard/world-shared/global/glopara/data/com' +BASE_GIT: '/gpfs/f6/bil-fire8/world-shared/global/glopara/data/git' +#DMPDIR: '/gpfs/f6/bil-fire8/world-shared/global/glopara/data/dump' +BASE_DATA: '/gpfs/f6/bil-fire8/world-shared/global/glopara/data' +BASE_IC: '/gpfs/f6/bil-fire8/world-shared/global/glopara/data/ICSDIR' +PACKAGEROOT: '/gpfs/f6/bil-fire8/world-shared/global/glopara/data/nwpara' +#COMROOT: '/gpfs/f6/bil-fire8/world-shared/global/glopara/data/com' COMINsyn: '${COMROOT}/gfs/prod/syndat' NOSCRUB: $HOMEDIR ACCOUNT: ira-da From 70db434be00297989ec231f693a5a1cf51e5a9fa Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Wed, 11 Dec 2024 11:06:37 -0500 Subject: [PATCH 23/25] fix service queue --- workflow/hosts/gaeac6.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/hosts/gaeac6.yaml b/workflow/hosts/gaeac6.yaml index 2d9a2838ed..a8368354ec 100644 --- a/workflow/hosts/gaeac6.yaml +++ b/workflow/hosts/gaeac6.yaml @@ -13,9 +13,9 @@ PTMP: '/gpfs/f6/${ACCOUNT}/scratch/${USER}' NOSCRUB: $HOMEDIR SCHEDULER: slurm QUEUE: normal -QUEUE_SERVICE: hpss +QUEUE_SERVICE: normal PARTITION_BATCH: batch -PARTITION_SERVICE: dtn_f5_f6 +PARTITION_SERVICE: batch RESERVATION: '' CLUSTERS: 'c6' CHGRP_RSTPROD: 'NO' From 5ae5fff3f59192cfab4efda6a76ee2fe1a90462d Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Wed, 11 Dec 2024 11:22:54 -0500 Subject: [PATCH 24/25] remove --- .github/workflows/hercules.yaml | 81 --------------------------------- 1 file changed, 81 deletions(-) delete mode 100644 .github/workflows/hercules.yaml diff --git a/.github/workflows/hercules.yaml b/.github/workflows/hercules.yaml deleted file mode 100644 index a08ec867b6..0000000000 --- a/.github/workflows/hercules.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: Hercules - -on: - pull_request_target: - branches: - - develop - types: [closed] - -jobs: - - getlabels: - runs-on: ubuntu-22.04 - outputs: - labels: ${{ steps.id.outputs.labels }} - steps: - - name: Get Label Steps - id: id - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - OWNER: ${{ github.repository_owner }} - REPO_NAME: ${{ github.event.repository.name }} - PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} - run: | - LABELS1="$(gh api repos/$OWNER/$REPO_NAME/pulls/$PULL_REQUEST_NUMBER --jq '.labels.[].name')" - LABELS=$(echo "$LABELS1" | tr '\n' ' ') - echo "labels=$LABELS" >> $GITHUB_OUTPUT - - passed: - if: contains( needs.getlabels.outputs.labels, 'CI-Hercules-Passed') && github.event.pull_request.merged - runs-on: ubuntu-22.04 - needs: - - getlabels - - steps: - - name: Passed - uses: schneegans/dynamic-badges-action@v1.6.0 - with: - forceUpdate: true - auth: ${{ secrets.CLI_DYNAMIC_BADGES }} - gistID: e35aa2904a54deae6bbb1fdc2d960c71 - filename: hercules.json - label: hercules - message: passing - color: green - - failed: - if: contains( needs.getlabels.outputs.labels, 'CI-Hercules-Failed') && github.event.pull_request.merged - runs-on: ubuntu-latest - needs: - - getlabels - - steps: - - name: Failed - uses: schneegans/dynamic-badges-action@v1.6.0 - with: - forceUpdate: true - auth: ${{ secrets.CLI_DYNAMIC_BADGES }} - gistID: e35aa2904a54deae6bbb1fdc2d960c71 - filename: hercules.json - label: hercules - message: failing - color: red - - - pending: - if: "!contains( needs.getlabels.outputs.labels, 'CI-Hercules-Passed') && !contains( needs.getlabels.outputs.labels, 'CI-Hercules-Failed')" - runs-on: ubuntu-latest - needs: - - getlabels - - steps: - - name: Pending - uses: schneegans/dynamic-badges-action@v1.6.0 - with: - forceUpdate: true - auth: ${{ secrets.CLI_DYNAMIC_BADGES }} - gistID: e35aa2904a54deae6bbb1fdc2d960c71 - filename: hercules.json - label: hercules - message: pending - color: orange From 8a73d8587ec595f92607cea5ae5ab3e74cf7633b Mon Sep 17 00:00:00 2001 From: "Jeffrey.S.Whitaker" Date: Wed, 11 Dec 2024 11:24:12 -0500 Subject: [PATCH 25/25] remove --- .github/workflows/wcoss2.yaml | 81 ----------------------------------- 1 file changed, 81 deletions(-) delete mode 100644 .github/workflows/wcoss2.yaml diff --git a/.github/workflows/wcoss2.yaml b/.github/workflows/wcoss2.yaml deleted file mode 100644 index 489ae58406..0000000000 --- a/.github/workflows/wcoss2.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: WCOSS2 - -on: - pull_request_target: - branches: - - develop - types: [closed] - -jobs: - - getlabels: - runs-on: ubuntu-22.04 - outputs: - labels: ${{ steps.id.outputs.labels }} - steps: - - name: Get Label Steps - id: id - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - OWNER: ${{ github.repository_owner }} - REPO_NAME: ${{ github.event.repository.name }} - PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} - run: | - LABELS1="$(gh api repos/$OWNER/$REPO_NAME/pulls/$PULL_REQUEST_NUMBER --jq '.labels.[].name')" - LABELS=$(echo "$LABELS1" | tr '\n' ' ') - echo "labels=$LABELS" >> $GITHUB_OUTPUT - - passed: - if: contains( needs.getlabels.outputs.labels, 'CI-Wcoss2-Passed') && github.event.pull_request.merged - runs-on: ubuntu-latest - needs: - - getlabels - - steps: - - name: Passed - uses: schneegans/dynamic-badges-action@v1.6.0 - with: - forceUpdate: true - auth: ${{ secrets.CLI_DYNAMIC_BADGES }} - gistID: e35aa2904a54deae6bbb1fdc2d960c71 - filename: wcoss2.json - label: wcoss2 - message: passing - color: green - - failed: - if: contains( needs.getlabels.outputs.labels, 'CI-Wcoss2-Failed') && github.event.pull_request.merged - runs-on: ubuntu-latest - needs: - - getlabels - - steps: - - name: Failed - uses: schneegans/dynamic-badges-action@v1.6.0 - with: - forceUpdate: true - auth: ${{ secrets.CLI_DYNAMIC_BADGES }} - gistID: e35aa2904a54deae6bbb1fdc2d960c71 - filename: wcoss2.json - label: wcoss2 - message: failing - color: red - - - pending: - if: "!contains( needs.getlabels.outputs.labels, 'CI-Wcoss2-Passed') && !contains( needs.getlabels.outputs.labels, 'CI-Wcoss2-Failed')" - runs-on: ubuntu-latest - needs: - - getlabels - - steps: - - name: Pending - uses: schneegans/dynamic-badges-action@v1.6.0 - with: - forceUpdate: true - auth: ${{ secrets.CLI_DYNAMIC_BADGES }} - gistID: e35aa2904a54deae6bbb1fdc2d960c71 - filename: wcoss2.json - label: wcoss2 - message: pending - color: orange