Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: pass EXTERNAL_HOSTNAME to autoheal container for better container names in logs #121

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ ENV AUTOHEAL_CONTAINER_LABEL=autoheal \
AUTOHEAL_DEFAULT_STOP_TIMEOUT=10 \
DOCKER_SOCK=/var/run/docker.sock \
CURL_TIMEOUT=30 \
EXTERNAL_HOSTNAME="" \
WEBHOOK_URL="" \
WEBHOOK_JSON_KEY="content" \
APPRISE_URL="" \
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ AUTOHEAL_DEFAULT_STOP_TIMEOUT=10 # Docker waits max 10 seconds (the Docker def
DOCKER_SOCK=/var/run/docker.sock # Unix socket for curl requests to Docker API
CURL_TIMEOUT=30 # --max-time seconds for curl requests to Docker API
WEBHOOK_URL="" # post message to the webhook if a container was restarted (or restart failed)
EXTERNAL_HOSTNAME="" # pass the hostname to the healthcheck script. This is useful when using multiple docker servers.
```

### Optional Container Labels
Expand Down
25 changes: 18 additions & 7 deletions docker-entrypoint
Original file line number Diff line number Diff line change
Expand Up @@ -132,29 +132,40 @@ then
do
STOP_TIMEOUT=".Labels[\"autoheal.stop.timeout\"] // $AUTOHEAL_DEFAULT_STOP_TIMEOUT"
get_container_info | \
jq -r ".[] | select(.Labels[\"autoheal\"] != \"False\") | foreach . as \$CONTAINER([];[]; \$CONTAINER | .Id, .Names[0], .State, ${STOP_TIMEOUT})" | \
while read -r CONTAINER_ID && read -r CONTAINER_NAME && read -r CONTAINER_STATE && read -r TIMEOUT
jq -r ".[] | select(.Labels[\"autoheal\"] != \"False\") | foreach . as \$CONTAINER([];[]; \$CONTAINER | .Id, .Names[0], .State, ${STOP_TIMEOUT}, .Labels[\"com.docker.compose.project\"])" | \
while read -r CONTAINER_ID && read -r CONTAINER_NAME && read -r CONTAINER_STATE && read -r TIMEOUT && read -r COMPOSE_PROJECT
do
# shellcheck disable=2039
CONTAINER_SHORT_ID=${CONTAINER_ID:0:12}
DATE=$(date +%d-%m-%Y" "%H:%M:%S)

if [ "${COMPOSE_PROJECT}" = "null" ]
then
COMPOSE_PROJECT=""
fi

if [ -n "$EXTERNAL_HOSTNAME" ]
then
EXTERNAL_HOSTNAME="/${EXTERNAL_HOSTNAME}/"
fi


if [ "$CONTAINER_NAME" = "null" ]
then
echo "$DATE Container name of (${CONTAINER_SHORT_ID}) is null, which implies container does not exist - don't restart" >&2
elif [ "$CONTAINER_STATE" = "restarting" ]
then
echo "$DATE Container $CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be restarting - don't restart"
echo "$DATE Container ${EXTERNAL_HOSTNAME}${COMPOSE_PROJECT}$CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be restarting - don't restart"
else
echo "$DATE Container $CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be unhealthy - Restarting container now with ${TIMEOUT}s timeout"
echo "$DATE Container ${EXTERNAL_HOSTNAME}${COMPOSE_PROJECT}$CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be unhealthy - Restarting container now with ${TIMEOUT}s timeout"
if ! restart_container "$CONTAINER_ID" "$TIMEOUT"
then
echo "$DATE Restarting container $CONTAINER_SHORT_ID failed" >&2
notify_webhook "Container ${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be unhealthy. Failed to restart the container!" &
notify_webhook "Container ${EXTERNAL_HOSTNAME}${COMPOSE_PROJECT}${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be unhealthy. Failed to restart the container!" &
else
notify_webhook "Container ${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be unhealthy. Successfully restarted the container!" &
notify_webhook "Container ${EXTERNAL_HOSTNAME}${COMPOSE_PROJECT}${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be unhealthy. Successfully restarted the container!" &
fi
notify_post_restart_script "$CONTAINER_NAME" "$CONTAINER_SHORT_ID" "$CONTAINER_STATE" "$TIMEOUT" &
notify_post_restart_script "${EXTERNAL_HOSTNAME}${COMPOSE_PROJECT}$CONTAINER_NAME" "$CONTAINER_SHORT_ID" "$CONTAINER_STATE" "$TIMEOUT" &
fi
done
sleep "$AUTOHEAL_INTERVAL" &
Expand Down
1 change: 1 addition & 0 deletions tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ services:
environment:
AUTOHEAL_CONTAINER_LABEL: "${AUTOHEAL_CONTAINER_LABEL:-all}"
AUTOHEAL_INTERVAL: "10"
EXTERNAL_HOSTNAME: "${EXTERNAL_HOSTNAME:-}"
volumes:
- "/var/run/docker.sock:/var/run/docker.sock"
network_mode: none
9 changes: 5 additions & 4 deletions tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@ set -euxo pipefail

COMPOSE_PROJECT_NAME=${1:-autoheal-test}
export COMPOSE_PROJECT_NAME
export EXTERNAL_HOSTNAME=${HOSTNAME}

function cleanup()
{
exit_status=$?
echo "exit was $exit_status"
# stop autoheal first, to stop it restarting the test containers while we try to stop them
docker-compose stop autoheal
docker-compose -f docker-compose.autoheal.yml -f docker-compose.yml down || true
docker compose stop autoheal
docker compose -f docker-compose.autoheal.yml -f docker-compose.yml down || true
exit "$exit_status"
}
trap cleanup EXIT
docker-compose up --build -d
docker-compose -f docker-compose.autoheal.yml up --build --exit-code-from watch-autoheal watch-autoheal
docker compose up --build -d
docker compose -f docker-compose.autoheal.yml up --build --exit-code-from watch-autoheal watch-autoheal