From 1f42aff93301973724e1b7c3c600ae064ff2b9bd Mon Sep 17 00:00:00 2001
From: Ed Santiago <santiago@redhat.com>
Date: Sun, 5 Nov 2023 10:59:52 -0700
Subject: [PATCH] systests: debug systemd failures

When a systemd-related system test fails, we usually get:

    systemctl start foo
    FAILED exit status 1, try 'systemctl --status' or 'journalctl -xe'

That makes it impossible to debug flakes.

Solution: new systemctl_start() [note underscore], to be used
instead of systemctl <SPACE> start. On failure, will run log
commands.

Signed-off-by: Ed Santiago <santiago@redhat.com>
---
 test/system/250-systemd.bats           | 10 +++-----
 test/system/252-quadlet.bats           |  7 ++----
 test/system/255-auto-update.bats       |  7 +++---
 test/system/270-socket-activation.bats |  2 +-
 test/system/helpers.systemd.bash       | 35 ++++++++++++++++++++++++++
 5 files changed, 45 insertions(+), 16 deletions(-)
diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats
index 87c3cfe3cf..5cf6a0978c 100644
--- a/test/system/250-systemd.bats
+++ b/test/system/250-systemd.bats
@@ -46,8 +46,7 @@ function service_setup() {
     run systemctl enable "$SERVICE_NAME"
     assert $status -eq 0 "Error enabling systemd unit $SERVICE_NAME: $output"
 
-    run systemctl start "$SERVICE_NAME"
-    assert $status -eq 0 "Error starting systemd unit $SERVICE_NAME: $output"
+    systemctl_start "$SERVICE_NAME"
 
     run systemctl status "$SERVICE_NAME"
     assert $status -eq 0 "systemctl status $SERVICE_NAME: $output"
@@ -230,8 +229,7 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
     systemctl daemon-reload
 
     INSTANCE="$SERVICE_NAME@1.service"
-    run systemctl start "$INSTANCE"
-    assert $status -eq 0 "Error starting systemd unit $INSTANCE: $output"
+    systemctl_start "$INSTANCE"
 
     run systemctl status "$INSTANCE"
     assert $status -eq 0 "systemctl status $INSTANCE: $output"
@@ -401,7 +399,7 @@ EOF
 
     # Dispatch the YAML file
     service_name="podman-kube@$(systemd-escape $yaml_source).service"
-    systemctl start $service_name
+    systemctl_start $service_name
     systemctl is-active $service_name
 
     # Make sure that Podman is the service's MainPID
@@ -456,7 +454,7 @@ $name stderr" "logs work with passthrough"
 
     # Now stop and start the service again.
     systemctl stop $service_name
-    systemctl start $service_name
+    systemctl_start $service_name
     systemctl is-active $service_name
     run_podman container inspect $service_container --format "{{.State.Running}}"
     is "$output" "true"
diff --git a/test/system/252-quadlet.bats b/test/system/252-quadlet.bats
index 6780af194f..912b6be08c 100644
--- a/test/system/252-quadlet.bats
+++ b/test/system/252-quadlet.bats
@@ -91,10 +91,7 @@ function service_setup() {
         local activestate="inactive"
     fi
 
-    echo "$_LOG_PROMPT systemctl $startargs start $service"
-    run systemctl $startargs start "$service"
-    echo "$output"
-    assert $status -eq 0 "Error starting systemd unit $service"
+    systemctl_start $startargs "$service"
 
     # FIXME FIXME FIXME: this is racy with short-lived containers!
     echo "$_LOG_PROMPT systemctl status $service"
@@ -798,7 +795,7 @@ ExecStart=/bin/bash -c "echo %T >$percent_t_file"
 Type=oneshot
 EOF
     systemctl daemon-reload
-    systemctl --wait start $service
+    systemctl_start --wait $service
     percent_t=$(< $percent_t_file)
     # Clean up. Don't bother to systemctl-reload, service_setup does that below.
     rm -f $unitfile
diff --git a/test/system/255-auto-update.bats b/test/system/255-auto-update.bats
index 75e0a7fca8..6540ea1e06 100644
--- a/test/system/255-auto-update.bats
+++ b/test/system/255-auto-update.bats
@@ -92,7 +92,7 @@ function generate_service() {
     run_podman rm -t 0 -f $cname
 
     systemctl daemon-reload
-    systemctl start container-$cname
+    systemctl_start container-$cname
     systemctl status container-$cname
 
     # Original image ID.
@@ -530,7 +530,7 @@ EOF
 
     # Dispatch the YAML file
     service_name="podman-kube@$(systemd-escape $yaml_source).service"
-    systemctl start $service_name
+    systemctl_start $service_name
     systemctl is-active $service_name
 
     # Make sure the containers are properly configured
@@ -588,8 +588,7 @@ EOF
 
     systemctl daemon-reload
 
-    run systemctl start pod-$podname.service
-    assert $status -eq 0 "Error starting pod systemd unit: $output"
+    systemctl_start pod-$podname.service
     _wait_service_ready container-$ctrname.service
 
     run_podman pod inspect --format "{{.State}}" $podname
diff --git a/test/system/270-socket-activation.bats b/test/system/270-socket-activation.bats
index a8ae234331..0073000b08 100644
--- a/test/system/270-socket-activation.bats
+++ b/test/system/270-socket-activation.bats
@@ -59,7 +59,7 @@ EOF
             rm -f $pause_pid_file
         fi
     fi
-    systemctl start "$SERVICE_NAME.socket"
+    systemctl_start "$SERVICE_NAME.socket"
 }
 
 function teardown() {
diff --git a/test/system/helpers.systemd.bash b/test/system/helpers.systemd.bash
index 52581bfc6e..bda601c153 100644
--- a/test/system/helpers.systemd.bash
+++ b/test/system/helpers.systemd.bash
@@ -33,6 +33,41 @@ systemd-run() {
     timeout --foreground -v --kill=10 $PODMAN_TIMEOUT systemd-run $_DASHUSER "$@";
 }
 
+# "systemctl start" is special: when it fails, it doesn't give any useful info.
+# This helper fixes that.
+systemctl_start() {
+    # Arg processing. First arg might be "--wait"...
+    local wait=
+    if [[ "$1" = "--wait" ]]; then
+        wait="$1"
+        shift
+    fi
+    # ...but beyond that, only one arg is allowed
+    local unit="$1"
+    shift
+    assert "$*" = "" "systemctl_start invoked with spurious args"
+
+    echo "$_LOG_PROMPT systemctl $wait start $unit"
+    run systemctl $wait start "$unit"
+    echo "$output"
+    if [[ $status -eq 0 ]]; then
+        return
+    fi
+
+    # Failed. This is our value added.
+    echo
+    echo "***** systemctl start $unit -- FAILED!"
+    echo
+    echo "$_LOG_PROMPT systemctl status $unit"
+    run systemctl status "$unit"
+    echo "$output"
+    echo
+    echo "$_LOG_PROMPT journalctl -xeu $unit"
+    run journalctl -xeu "$unit"
+    echo "$output"
+    false
+}
+
 install_kube_template() {
     # If running from a podman source directory, build and use the source
     # version of the play-kube-@ unit file