Skip to content

Commit

Permalink
tests/e2e: Wait for no restarts after deployment
Browse files Browse the repository at this point in the history
Especially on azure workers we are seeing several pod restarts right
after CoCo deployment, let's wait for 3x21s which should be enough to
detect instabilities as the liveness probe is 15+20s.

Signed-off-by: Lukáš Doktor <[email protected]>
  • Loading branch information
ldoktor committed Dec 11, 2023
1 parent 002882b commit 2ac5ce3
Showing 1 changed file with 39 additions and 0 deletions.
39 changes: 39 additions & 0 deletions tests/e2e/operator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,40 @@ uninstall_operator() {
fi
}

# Wait for no new/restarted pod in 3x21s (20s is the liveness probe)
#
wait_for_stabilization() {
declare -A restart_counts
iteration=0
count=0
while true; do
change=0
pod_info=$(kubectl get pods -n confidential-containers-system -o=jsonpath='{range .items[*]}{.metadata.name}{" "}{range .status.containerStatuses[*]}{.name}{" "}{.restartCount}{"\n"}{end}{end}')

while read -r pod container restart_count; do
if [ "${restart_counts[$pod-$container]--1}" != "$restart_count" ]; then
echo "DEBUG: Pod: $pod, Container: $container, Restart count: $restart_count"
restart_counts["$pod-$container"]=$restart_count
change=1
fi
done <<< "$pod_info"

[ $change -eq 0 ] && ((iteration+=1))

if [ $iteration -gt 3 ]; then
echo "INFO: No new restarts in 3x21s, proceeding..."
break
elif [ $count -gt 20 ]; then
echo "ERROR: Pods are still restarting after 20x21s, bailing out!"
return 1
fi

((count+=1))
sleep 21
done
}


usage() {
cat <<-EOF
Utility to build/install/uninstall the operator.
Expand All @@ -267,6 +301,7 @@ usage() {
command : optional command (build and install by default). Can be:
"build": build only,
"install": install only,
"wait_for_stabilization": wait for CoCo pods to be stable
"uninstall": uninstall the operator.
EOF
}
Expand All @@ -281,6 +316,7 @@ main() {
install_operator
build_pre_install_img
install_ccruntime
wait_for_stabilization
else
case $1 in
-h|--help) usage && exit 0;;
Expand All @@ -296,6 +332,9 @@ main() {
uninstall_ccruntime
uninstall_operator
;;
wait_for_stabilization)
wait_for_stabilization
;;
*)
echo "Unknown command '$1'"
usage && exit 1
Expand Down

0 comments on commit 2ac5ce3

Please sign in to comment.