From 1f42aff93301973724e1b7c3c600ae064ff2b9bd Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Sun, 5 Nov 2023 10:59:52 -0700 Subject: [PATCH] systests: debug systemd failures When a systemd-related system test fails, we usually get: systemctl start foo FAILED exit status 1, try 'systemctl --status' or 'journalctl -xe' That makes it impossible to debug flakes. Solution: new systemctl_start() [note underscore], to be used instead of systemctl start. On failure, will run log commands. Signed-off-by: Ed Santiago --- test/system/250-systemd.bats | 10 +++----- test/system/252-quadlet.bats | 7 ++---- test/system/255-auto-update.bats | 7 +++--- test/system/270-socket-activation.bats | 2 +- test/system/helpers.systemd.bash | 35 ++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats index 87c3cfe3cf..5cf6a0978c 100644 --- a/test/system/250-systemd.bats +++ b/test/system/250-systemd.bats @@ -46,8 +46,7 @@ function service_setup() { run systemctl enable "$SERVICE_NAME" assert $status -eq 0 "Error enabling systemd unit $SERVICE_NAME: $output" - run systemctl start "$SERVICE_NAME" - assert $status -eq 0 "Error starting systemd unit $SERVICE_NAME: $output" + systemctl_start "$SERVICE_NAME" run systemctl status "$SERVICE_NAME" assert $status -eq 0 "systemctl status $SERVICE_NAME: $output" @@ -230,8 +229,7 @@ LISTEN_FDNAMES=listen_fdnames" | sort) systemctl daemon-reload INSTANCE="$SERVICE_NAME@1.service" - run systemctl start "$INSTANCE" - assert $status -eq 0 "Error starting systemd unit $INSTANCE: $output" + systemctl_start "$INSTANCE" run systemctl status "$INSTANCE" assert $status -eq 0 "systemctl status $INSTANCE: $output" @@ -401,7 +399,7 @@ EOF # Dispatch the YAML file service_name="podman-kube@$(systemd-escape $yaml_source).service" - systemctl start $service_name + systemctl_start $service_name systemctl is-active $service_name # Make sure that Podman is the service's MainPID @@ -456,7 +454,7 @@ $name stderr" "logs work with passthrough" # Now stop and start the service again. systemctl stop $service_name - systemctl start $service_name + systemctl_start $service_name systemctl is-active $service_name run_podman container inspect $service_container --format "{{.State.Running}}" is "$output" "true" diff --git a/test/system/252-quadlet.bats b/test/system/252-quadlet.bats index 6780af194f..912b6be08c 100644 --- a/test/system/252-quadlet.bats +++ b/test/system/252-quadlet.bats @@ -91,10 +91,7 @@ function service_setup() { local activestate="inactive" fi - echo "$_LOG_PROMPT systemctl $startargs start $service" - run systemctl $startargs start "$service" - echo "$output" - assert $status -eq 0 "Error starting systemd unit $service" + systemctl_start $startargs "$service" # FIXME FIXME FIXME: this is racy with short-lived containers! echo "$_LOG_PROMPT systemctl status $service" @@ -798,7 +795,7 @@ ExecStart=/bin/bash -c "echo %T >$percent_t_file" Type=oneshot EOF systemctl daemon-reload - systemctl --wait start $service + systemctl_start --wait $service percent_t=$(< $percent_t_file) # Clean up. Don't bother to systemctl-reload, service_setup does that below. rm -f $unitfile diff --git a/test/system/255-auto-update.bats b/test/system/255-auto-update.bats index 75e0a7fca8..6540ea1e06 100644 --- a/test/system/255-auto-update.bats +++ b/test/system/255-auto-update.bats @@ -92,7 +92,7 @@ function generate_service() { run_podman rm -t 0 -f $cname systemctl daemon-reload - systemctl start container-$cname + systemctl_start container-$cname systemctl status container-$cname # Original image ID. @@ -530,7 +530,7 @@ EOF # Dispatch the YAML file service_name="podman-kube@$(systemd-escape $yaml_source).service" - systemctl start $service_name + systemctl_start $service_name systemctl is-active $service_name # Make sure the containers are properly configured @@ -588,8 +588,7 @@ EOF systemctl daemon-reload - run systemctl start pod-$podname.service - assert $status -eq 0 "Error starting pod systemd unit: $output" + systemctl_start pod-$podname.service _wait_service_ready container-$ctrname.service run_podman pod inspect --format "{{.State}}" $podname diff --git a/test/system/270-socket-activation.bats b/test/system/270-socket-activation.bats index a8ae234331..0073000b08 100644 --- a/test/system/270-socket-activation.bats +++ b/test/system/270-socket-activation.bats @@ -59,7 +59,7 @@ EOF rm -f $pause_pid_file fi fi - systemctl start "$SERVICE_NAME.socket" + systemctl_start "$SERVICE_NAME.socket" } function teardown() { diff --git a/test/system/helpers.systemd.bash b/test/system/helpers.systemd.bash index 52581bfc6e..bda601c153 100644 --- a/test/system/helpers.systemd.bash +++ b/test/system/helpers.systemd.bash @@ -33,6 +33,41 @@ systemd-run() { timeout --foreground -v --kill=10 $PODMAN_TIMEOUT systemd-run $_DASHUSER "$@"; } +# "systemctl start" is special: when it fails, it doesn't give any useful info. +# This helper fixes that. +systemctl_start() { + # Arg processing. First arg might be "--wait"... + local wait= + if [[ "$1" = "--wait" ]]; then + wait="$1" + shift + fi + # ...but beyond that, only one arg is allowed + local unit="$1" + shift + assert "$*" = "" "systemctl_start invoked with spurious args" + + echo "$_LOG_PROMPT systemctl $wait start $unit" + run systemctl $wait start "$unit" + echo "$output" + if [[ $status -eq 0 ]]; then + return + fi + + # Failed. This is our value added. + echo + echo "***** systemctl start $unit -- FAILED!" + echo + echo "$_LOG_PROMPT systemctl status $unit" + run systemctl status "$unit" + echo "$output" + echo + echo "$_LOG_PROMPT journalctl -xeu $unit" + run journalctl -xeu "$unit" + echo "$output" + false +} + install_kube_template() { # If running from a podman source directory, build and use the source # version of the play-kube-@ unit file