Skip to content

Commit 950a043

Browse files
authored
Leverage Intel Containers for NOAA RDHPC Cross-Platform Reproducibility (NOAA-GFDL#109)
* Added scripts for running the containerized MOM6SIS2 on Gaea. * update run_container.sh * rename stdout for container test * add ref and rename stdouts * activate container ci * fix typo * mount ncrc in container * fix mount name * run build process using slurm * forget link dataset
1 parent a82a961 commit 950a043

File tree

9 files changed

+515
-72
lines changed

9 files changed

+515
-72
lines changed
Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: CEFI-MOM6-ci
22

33
on:
4-
# Triggers this workflow on pull request event with "CEFI_MOM6_RT_gaea_c5" label
4+
# Triggers this workflow on pull request event with "CEFI_MOM6_RT_container" label
55
pull_request:
66
branches: [ "main" ]
77
types: [ labeled ]
@@ -14,8 +14,8 @@ env:
1414
#
1515
jobs:
1616
checkout-build:
17-
if: ${{ github.event.label.name == 'CEFI_MOM6_RT_gaea_c5' }}
18-
runs-on: self-hosted
17+
if: ${{ github.event.label.name == 'CEFI_MOM6_RT_container' }}
18+
runs-on: [self-hosted, container]
1919
timeout-minutes: 600
2020
strategy:
2121
max-parallel: 1
@@ -28,9 +28,12 @@ jobs:
2828
fetch-depth: 1
2929
submodules: recursive
3030

31-
- name: Build MOM6SIS2
31+
- name: Build MOM6SIS2 using container
3232
run: |
3333
cd ${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds
34+
pwd
35+
#img=/gpfs/f6/ira-cefi/world-shared/container/ubuntu22.04-intel-ufs-env-v1.8.0-llvm.img
36+
#apptainer exec -B /gpfs -B /ncrc/home2/Yi-cheng.Teng:/ncrc/home2/Yi-cheng.Teng $img bash linux-build.bash -m docker -p linux-intel -t repro -f mom6sis2
3437
jobid=$(sbatch --parsable ci_build_driver.sh | awk -F';' '{print $1}' | cut -f1)
3538
#
3639
sleep 1
@@ -45,8 +48,8 @@ jobs:
4548
fi
4649
sleep 60 # Adjust the sleep duration as needed
4750
done
48-
#
49-
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds/build/gaea-ncrc5.intel23/ocean_ice/repro/MOM6SIS2"
51+
52+
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds/build/docker-linux-intel/ocean_ice/repro/MOM6SIS2"
5053
if [ -f "$check_file" ]; then
5154
echo "PASSED: $check_file"
5255
else
@@ -56,16 +59,16 @@ jobs:
5659
5760
run-CEFI_MOM6-ci:
5861
needs: checkout-build
59-
runs-on: self-hosted
62+
runs-on: [self-hosted, container]
6063
strategy:
61-
max-parallel: 2
64+
max-parallel: 1
6265
matrix:
63-
case: ["NWA12.COBALT", "NEP10.COBALT"]
66+
case: ["NEP10.COBALT"]
6467
steps:
6568
- name: Run Experiment ${{ matrix.case }}
6669
run: |
6770
cd ${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/exps/${{ matrix.case }}
68-
jobid=$(sbatch --parsable driver.sh | awk -F';' '{print $1}' | cut -f1)
71+
jobid=$(sbatch --parsable run_container.sh | awk -F';' '{print $1}' | cut -f1)
6972
#
7073
sleep 1
7174
while :; do
@@ -80,7 +83,7 @@ jobs:
8083
sleep 60 # Adjust the sleep duration as needed
8184
done
8285
#
83-
expected_string="All restart files are identical, PASS"
86+
expected_string="ocean.stats is identical to ref, PASS"
8487
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/exps/${{ matrix.case }}/${{ matrix.case }}_o.$jobid"
8588
if [ -f "$check_file" ]; then
8689
if grep -qF "$expected_string" $check_file; then
@@ -96,22 +99,22 @@ jobs:
9699
97100
add-pass-label:
98101
needs: run-CEFI_MOM6-ci
99-
runs-on: self-hosted
102+
runs-on: [self-hosted, container]
100103
if: ${{ needs.run-CEFI_MOM6-ci.result == 'success' }}
101104
steps:
102-
- name: Add "pass_CEFI_MOM6_RT" label on success
105+
- name: Add "pass_CEFI_MOM6_RT_container" label on success
103106
run: |
104107
TOKEN=${{ secrets.GITHUB_TOKEN }}
105-
RT_TEST_LABEL="CEFI_MOM6_RT_gaea_c5"
106-
PASS_LABEL="pass_CEFI_MOM6_RT"
108+
RT_TEST_LABEL="CEFI_MOM6_RT_container"
109+
PASS_LABEL="pass_CEFI_MOM6_RT_container"
107110
108-
# Remove the "CEFI_MOM6_RT_gaea_c5" label
111+
# Remove the "CEFI_MOM6_RT_container" label
109112
curl -X DELETE \
110113
-H "Authorization: Bearer $TOKEN" \
111114
-H "Accept: application/vnd.github.v3+json" \
112115
"https://api.github.com/repos/$GITHUB_REPOSITORY/issues/${{ env.PR_NUMBER }}/labels/$RT_TEST_LABEL"
113116
114-
# Add the "pass_CEFI_MOM6_RT" label
117+
# Add the "pass_CEFI_MOM6_RT_container" label
115118
curl -X POST \
116119
-H "Authorization: Bearer $TOKEN" \
117120
-H "Accept: application/vnd.github.v3+json" \
@@ -120,7 +123,7 @@ jobs:
120123
121124
clean-up:
122125
needs: add-pass-label
123-
runs-on: self-hosted
126+
runs-on: [self-hosted, container]
124127
strategy:
125128
max-parallel: 1
126129
steps:

builds/ci_build_driver.sh

Lines changed: 11 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,23 @@
11
#!/bin/bash
22
#SBATCH --nodes=1
3-
#SBATCH --time=60
4-
#SBATCH --job-name="MOM6SIS2_ci_build"
5-
#SBATCH --output=MOM6SIS2_ci_build_o.%j
6-
#SBATCH --error=MOM6SIS2_ci_build_e.%j
7-
#SBATCH --qos=debug
3+
#SBATCH --time=360
4+
#SBATCH --job-name="MOM6SIS2_container_build"
5+
#SBATCH --output=MOM6SIS2_container_build_o.%j
6+
#SBATCH --error=MOM6SIS2_container_build_e.%j
7+
#SBATCH --qos=normal
88
#SBATCH --partition=batch
9-
#SBATCH --clusters=c5
10-
#SBATCH --account=cefi
9+
#SBATCH --clusters=c6
10+
#SBATCH --account=ira-cefi
1111

1212
#
1313
[ -d "build" ] && rm -rf build
1414

1515
#
16-
echo "Build MOM6SIS2-COBALT for CI testing started: " `date`
16+
echo "Build MOM6SIS2-COBALT using container started: " `date`
1717

1818
#
19-
machine_name="gaea"
20-
platform="ncrc5.intel23"
21-
target="repro"
22-
flavor="fms1_mom6sis2"
23-
24-
FMSlib_PATH="/gpfs/f5/cefi/scratch/Yi-cheng.Teng/github/FMS/2024.02_FMS1"
25-
rootdir=$(pwd)
26-
abs_rootdir=$rootdir
27-
28-
echo $abs_rootdir
29-
30-
#load modules
31-
source $MODULESHOME/init/bash
32-
source $rootdir/$machine_name/$platform.env
33-
. $rootdir/$machine_name/$platform.env
34-
35-
makeflags="NETCDF=3"
36-
37-
if [[ $target =~ "repro" ]] ; then
38-
makeflags="$makeflags REPRO=1"
39-
fi
40-
41-
srcdir=$abs_rootdir/../src
42-
43-
#
44-
sed -i 's/static pid_t gettid(void)/pid_t gettid(void)/g' $srcdir/FMS/affinity/affinity.c
45-
46-
#
47-
if [[ $flavor == "fms1_mom6sis2" ]] ; then
48-
echo "build mom6sis2 with FMS1 cap"
49-
50-
mkdir -p build/$machine_name-$platform/ocean_ice/$target
51-
pushd build/$machine_name-$platform/ocean_ice/$target
52-
rm -f path_names
53-
$srcdir/mkmf/bin/list_paths $srcdir/MOM6/{config_src/infra/FMS1,config_src/memory/dynamic_symmetric,config_src/drivers/FMS_cap,config_src/external/ODA_hooks,config_src/external/database_comms,config_src/external/drifters,config_src/external/stochastic_physics,pkg/GSW-Fortran/{modules,toolbox}/,src/{*,*/*}/} $srcdir/SIS2/{config_src/dynamic_symmetric,config_src/external/Icepack_interfaces,src} $srcdir/icebergs/src $srcdir/FMS/{coupler,include}/ $srcdir/{ocean_BGC/generic_tracers,ocean_BGC/mocsy/src}/ $srcdir/{atmos_null,ice_param,land_null,coupler/shared/,coupler/full/}/
54-
55-
compiler_options='-DINTERNAL_FILE_NML -DMAX_FIELDS_=600 -DNOT_SET_AFFINITY -Duse_deprecated_io -D_USE_MOM6_DIAG -D_USE_GENERIC_TRACER -DUSE_PRECISION=2 -D_USE_LEGACY_LAND_ -Duse_AM3_physics'
56-
57-
$srcdir/mkmf/bin/mkmf -t $abs_rootdir/$machine_name/$platform.mk -o "-I${FMSlib_PATH}/shared/$target" -p MOM6SIS2 -l "-L${FMSlib_PATH}/shared/$target -lfms" -c "$compiler_options" path_names
58-
59-
make $makeflags MOM6SIS2
60-
61-
fi
62-
19+
export img=/gpfs/f6/ira-cefi/world-shared/container/ubuntu22.04-intel-ufs-env-v1.8.0-llvm.img
20+
apptainer exec -B /gpfs -B /ncrc/home2/Yi-cheng.Teng:/ncrc/home2/Yi-cheng.Teng $img bash linux-build.bash -m docker -p linux-intel -t repro -f mom6sis2
6321

6422
#
65-
echo "Build MOM6SIS2-COBALT for CI testing ended: " `date`
23+
echo "Build MOM6SIS2-COBALT using container ended: " `date`
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/bin/bash
2+
3+
################################################################################
4+
# Help #
5+
################################################################################
6+
Help()
7+
{
8+
# Display Help
9+
echo "Add description of the script functions here."
10+
echo
11+
echo "Syntax: scriptTemplate [-|h|e|p]"
12+
echo "options:"
13+
echo "-h Print this Help."
14+
echo "-e Create the external executable in the user-specified folder"
15+
echo "-p env file that contains the necessary modules within the container"
16+
echo
17+
}
18+
19+
################################################################################
20+
################################################################################
21+
# Main program #
22+
################################################################################
23+
################################################################################
24+
################################################################################
25+
# Process the input options. Add options as needed. #
26+
################################################################################
27+
# Get the options
28+
#while getopts ":hep" option; do
29+
# case $option in
30+
# h) # display Help
31+
# Help
32+
# exit;;
33+
# e) # external directory to hold externalized executables
34+
# exec_dir=$2
35+
# echo "Will create external executable in $exec_dir"
36+
# p) # env file that contains the necessary modules inside the container
37+
# env_file=$3
38+
# echo "Will load modules in $env_file"
39+
#
40+
# esac
41+
#done
42+
#shift $(($OPTIND ))
43+
44+
while getopts ":he:p:" option; do
45+
case $option in
46+
h) # display Help
47+
Help
48+
exit
49+
;;
50+
e) # external directory to hold externalized executables
51+
exec_dir=$OPTARG
52+
echo "Will create external executable in $exec_dir"
53+
;;
54+
p) # env file that contains the necessary modules inside the container
55+
env_file=$OPTARG
56+
echo "Will load modules in $env_file"
57+
;;
58+
esac
59+
done
60+
61+
# Shift past the processed options
62+
shift $((OPTIND -1))
63+
64+
65+
fileList=$@
66+
67+
source $env_file
68+
mkdir -p $exec_dir
69+
cp /opt/container-scripts/run_container_executable.sh $exec_dir
70+
cp /opt/container-scripts/build_container_executable.sh $exec_dir
71+
#replace the paths in the script
72+
sed -i "s|IMAGE|$SINGULARITY_CONTAINER|g" $exec_dir/*_executable.sh
73+
nbinds=`echo $SINGULARITY_BIND | awk -F "," '{print NF }'`
74+
bindstring=" "
75+
for (( i = 1; i <= $nbinds; i++ )); do binddir=`echo $SINGULARITY_BIND | cut -d "," -f $i` && bindstring="${bindstring} -B ${binddir}" ; done
76+
echo $bindstring
77+
sed -i "s|BINDDIRS|$bindstring|g" $exec_dir/*_executable.sh
78+
sed -i "s|LDLIB_PATH|$LD_LIBRARY_PATH|g" $exec_dir/*_executable.sh
79+
sed -i "s|LIB_PATH|$LIBRARY_PATH|g" $exec_dir/*_executable.sh
80+
sed -i "s|FI_PATH|$FI_PROVIDER_PATH|g" $exec_dir/*_executable.sh
81+
82+
for file in $fileList
83+
do
84+
fullfile=$(readlink -m $file)
85+
basefile=$(basename "$fullfile")
86+
cp $exec_dir/run_container_executable.sh $exec_dir/$basefile
87+
pathdir=$(dirname $fullfile)
88+
echo "fullfile is $fullfile"
89+
echo $pathdir
90+
91+
EXEC_PATH="$pathdir:$PATH"
92+
sed -i "s|EXEC_PATH|$EXEC_PATH|g" $exec_dir/$basefile
93+
sed -i "s|ESMF_MK|$ESMFMKFILE|g" $exec_dir/$basefile
94+
done
95+
#fileList="make cmake ecbuild python python3"
96+
fileList=""
97+
for file in $fileList
98+
do
99+
fullfile=$(which $file)
100+
basefile=$(basename "$fullfile")
101+
cp $exec_dir/build_container_executable.sh $exec_dir/$basefile
102+
pathdir=$(dirname $fullfile)
103+
104+
EXEC_PATH="$pathdir:$PATH"
105+
sed -i "s|EXEC_PATH|$EXEC_PATH|g" $exec_dir/$basefile
106+
sed -i "s|CMAKE_PREPATH|$CMAKE_PREFIX_PATH|g" $exec_dir/$basefile
107+
sed -i "s|ESMF_MK|$ESMFMKFILE|g" $exec_dir/$basefile
108+
done
109+
110+
chmod +x $exec_dir/*

builds/docker/linux-intel.env

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
source /usr/lmod/lmod/init/bash
2+
module purge
3+
module use /opt/spack-stack/spack-stack-1.8.0/envs/unified-env/install/modulefiles/Core
4+
module load stack-oneapi
5+
module load stack-intel-oneapi-mpi
6+
module load jedi-ufs-env
7+
module unload fms/2024.02

0 commit comments

Comments
 (0)