Skip to content

Commit

Permalink
systests: debug systemd failures
Browse files Browse the repository at this point in the history
When a systemd-related system test fails, we usually get:

    systemctl start foo
    FAILED exit status 1, try 'systemctl --status' or 'journalctl -xe'

That makes it impossible to debug flakes.

Solution: new systemctl_start() [note underscore], to be used
instead of systemctl <SPACE> start. On failure, will run log
commands.

Signed-off-by: Ed Santiago <[email protected]>
  • Loading branch information
edsantiago committed Dec 4, 2023
1 parent cbb3e4d commit 1f42aff
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 16 deletions.
10 changes: 4 additions & 6 deletions test/system/250-systemd.bats
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ function service_setup() {
run systemctl enable "$SERVICE_NAME"
assert $status -eq 0 "Error enabling systemd unit $SERVICE_NAME: $output"

run systemctl start "$SERVICE_NAME"
assert $status -eq 0 "Error starting systemd unit $SERVICE_NAME: $output"
systemctl_start "$SERVICE_NAME"

run systemctl status "$SERVICE_NAME"
assert $status -eq 0 "systemctl status $SERVICE_NAME: $output"
Expand Down Expand Up @@ -230,8 +229,7 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
systemctl daemon-reload

INSTANCE="$SERVICE_NAME@1.service"
run systemctl start "$INSTANCE"
assert $status -eq 0 "Error starting systemd unit $INSTANCE: $output"
systemctl_start "$INSTANCE"

run systemctl status "$INSTANCE"
assert $status -eq 0 "systemctl status $INSTANCE: $output"
Expand Down Expand Up @@ -401,7 +399,7 @@ EOF

# Dispatch the YAML file
service_name="podman-kube@$(systemd-escape $yaml_source).service"
systemctl start $service_name
systemctl_start $service_name
systemctl is-active $service_name

# Make sure that Podman is the service's MainPID
Expand Down Expand Up @@ -456,7 +454,7 @@ $name stderr" "logs work with passthrough"

# Now stop and start the service again.
systemctl stop $service_name
systemctl start $service_name
systemctl_start $service_name
systemctl is-active $service_name
run_podman container inspect $service_container --format "{{.State.Running}}"
is "$output" "true"
Expand Down
7 changes: 2 additions & 5 deletions test/system/252-quadlet.bats
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,7 @@ function service_setup() {
local activestate="inactive"
fi

echo "$_LOG_PROMPT systemctl $startargs start $service"
run systemctl $startargs start "$service"
echo "$output"
assert $status -eq 0 "Error starting systemd unit $service"
systemctl_start $startargs "$service"

# FIXME FIXME FIXME: this is racy with short-lived containers!
echo "$_LOG_PROMPT systemctl status $service"
Expand Down Expand Up @@ -798,7 +795,7 @@ ExecStart=/bin/bash -c "echo %T >$percent_t_file"
Type=oneshot
EOF
systemctl daemon-reload
systemctl --wait start $service
systemctl_start --wait $service
percent_t=$(< $percent_t_file)
# Clean up. Don't bother to systemctl-reload, service_setup does that below.
rm -f $unitfile
Expand Down
7 changes: 3 additions & 4 deletions test/system/255-auto-update.bats
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ function generate_service() {
run_podman rm -t 0 -f $cname

systemctl daemon-reload
systemctl start container-$cname
systemctl_start container-$cname
systemctl status container-$cname

# Original image ID.
Expand Down Expand Up @@ -530,7 +530,7 @@ EOF

# Dispatch the YAML file
service_name="podman-kube@$(systemd-escape $yaml_source).service"
systemctl start $service_name
systemctl_start $service_name
systemctl is-active $service_name

# Make sure the containers are properly configured
Expand Down Expand Up @@ -588,8 +588,7 @@ EOF

systemctl daemon-reload

run systemctl start pod-$podname.service
assert $status -eq 0 "Error starting pod systemd unit: $output"
systemctl_start pod-$podname.service
_wait_service_ready container-$ctrname.service

run_podman pod inspect --format "{{.State}}" $podname
Expand Down
2 changes: 1 addition & 1 deletion test/system/270-socket-activation.bats
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ EOF
rm -f $pause_pid_file
fi
fi
systemctl start "$SERVICE_NAME.socket"
systemctl_start "$SERVICE_NAME.socket"
}

function teardown() {
Expand Down
35 changes: 35 additions & 0 deletions test/system/helpers.systemd.bash
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,41 @@ systemd-run() {
timeout --foreground -v --kill=10 $PODMAN_TIMEOUT systemd-run $_DASHUSER "$@";
}

# "systemctl start" is special: when it fails, it doesn't give any useful info.
# This helper fixes that.
systemctl_start() {
# Arg processing. First arg might be "--wait"...
local wait=
if [[ "$1" = "--wait" ]]; then
wait="$1"
shift
fi
# ...but beyond that, only one arg is allowed
local unit="$1"
shift
assert "$*" = "" "systemctl_start invoked with spurious args"

echo "$_LOG_PROMPT systemctl $wait start $unit"
run systemctl $wait start "$unit"
echo "$output"
if [[ $status -eq 0 ]]; then
return
fi

# Failed. This is our value added.
echo
echo "***** systemctl start $unit -- FAILED!"
echo
echo "$_LOG_PROMPT systemctl status $unit"
run systemctl status "$unit"
echo "$output"
echo
echo "$_LOG_PROMPT journalctl -xeu $unit"
run journalctl -xeu "$unit"
echo "$output"
false
}

install_kube_template() {
# If running from a podman source directory, build and use the source
# version of the play-kube-@ unit file
Expand Down

0 comments on commit 1f42aff

Please sign in to comment.