production-heartbeat #4051
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: production-heartbeat | |
on: | |
workflow_dispatch: # As per documentation, the colon is necessary even though no config is required. | |
schedule: | |
# Cron syntax is "minute[0-59] hour[0-23] date[1-31] month[1-12] day[0-6]". '*' is 'any value', and multiple values | |
# can be specified with comma-separated lists. All times are UTC. | |
# So this expression means "run at 45 minutes past 1, 5, and 9 AM/PM UTC". The hours were chosen so that | |
# the jobs run only close to business hours of Central Time. | |
# Days were chosen to run only from Monday through Friday. | |
- cron: '45 13,17,21 * * 1,2,3,4,5' | |
jobs: | |
production-heartbeat: | |
strategy: | |
# By default, if any job in a matrix fails, all other jobs are immediately cancelled. This makes the jobs run to completion instead. | |
fail-fast: false | |
matrix: | |
os: [{vm: ubuntu-latest, exe: .sh}, {vm: windows-2019, exe: .cmd}] | |
node: ['lts/*'] | |
runs-on: ${{ matrix.os.vm }} | |
timeout-minutes: 60 | |
steps: | |
# === Setup. We need to get the code, set up nodejs, and create the results directory. === | |
- uses: actions/checkout@v3 | |
with: | |
ref: 'release' | |
- uses: actions/setup-node@v3 | |
with: | |
node-version: ${{ matrix.node }} | |
- run: mkdir smoke-test-results | |
# === Set our environment variables, either using default values or the repo's secrets === | |
- name: Set environment variables | |
id: env_var_setup | |
# We'll want to use bash for this, to avoid any cross-platform shenanigans | |
shell: bash | |
run: | | |
# In the following script, the use of the `echo "name=value" >> $GITHUB_ENV` structure is used to set/update | |
# environment variables. Such updates are visible to all subsequent steps. | |
# | |
# If the CLI_VERSION repo secret is set, we want to install that version of sfdx-cli, so we set an environment | |
# variable. Otherwise, we leave the environment variable unset, so it implicitly defaults to `latest`. | |
# Note: This can be used to intentionally fail the GHA by providing an invalid version number. | |
if [[ -n "${{ secrets.CLI_VERSION }}" ]]; then | |
echo "CLI_VERSION=@${{ secrets.CLI_VERSION}}" >> $GITHUB_ENV | |
fi | |
# If the SCANNER_VERSION repo secret is set, we want to install that version of sfdx-scanner, so we set an | |
# environment variable. Otherwise, we leave the environment variable unset, so it implicitly defaults to `latest`. | |
# Note: This can be used to intentionally fail the GHA by providing an invalid version number. | |
if [[ -n "${{ secrets.SCANNER_VERSION }}" ]]; then | |
echo "SCANNER_VERSION=@${{ secrets.SCANNER_VERSION }}" >> $GITHUB_ENV | |
fi | |
# If the FAIL_SMOKE_TESTS repo secret is set to ANY value, we should respond by deleting the `test/test-jars` | |
# folder. The smoke tests expect this folder's contents to exist, so an invocation of `scanner:rule:add` should | |
# fail, thereby failing the smoke tests as a whole. | |
# Note: This serves no purpose aside from providing a way to simulate a smoke test failure. | |
if [[ -n "${{ secrets.FAIL_SMOKE_TESTS }}" ]]; then | |
rm -rf ./test/test-jars | |
fi | |
# === Make three attempts to install sfdx through npm === | |
- name: Install SFDX | |
id: sfdx_install | |
# If the first attempt fails, wait a minute and try again. After a second failure, wait 5 minutes then try again. Then give up. | |
# Set an output parameter, `retry_count`, indicating the number of retry attempts that were made. | |
run: | | |
(echo "::set-output name=retry_count::0" && npm install -g sfdx-cli${{ env.CLI_VERSION }}) || | |
(echo "::set-output name=retry_count::1" && sleep 60 && npm install -g sfdx-cli${{ env.CLI_VERSION }}) || | |
(echo "::set-output name=retry_count::2" && sleep 300 && npm install -g sfdx-cli${{ env.CLI_VERSION }}) | |
# === Make three attempts to install the scanner plugin through sfdx === | |
- name: Install Scanner Plugin | |
id: scanner_install | |
# If the first attempt fails, wait a minute and try again. After a second failure, wait 5 minutes then try again. Then give up. | |
# Set an output parameter, `retry_count`, indicating the number of retry attempts that were made. | |
run: | | |
(echo "::set-output name=retry_count::0" && sfdx plugins:install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) || | |
(echo "::set-output name=retry_count::1" && sleep 60 && sfdx plugins:install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) || | |
(echo "::set-output name=retry_count::2" && sleep 300 && sfdx plugins:install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) | |
# === Log the installed plugins for easier debugging === | |
- name: Log plugins | |
run: sfdx plugins | |
# === Attempt to execute the smoke tests === | |
- name: Run smoke tests | |
id: smoke_tests | |
run: smoke-tests/smoke-test${{ matrix.os.exe }} sfdx | |
# === Upload the smoke-test-results folder as an artifact === | |
- name: Upload smoke-test-results folder as artifact | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v3 | |
with: | |
name: smoke-test-results-${{ runner.os }} | |
path: smoke-test-results | |
# === Report any problems === | |
- name: Report problems | |
# There are problems if any step failed or was skipped. | |
# Note that the `join()` call omits null values, so if any steps were skipped, they won't have a corresponding | |
# value in the string. | |
if: ${{ failure() || cancelled() }} | |
shell: bash | |
env: | |
# If we're here because steps failed or were skipped, then that's a critical problem. Otherwise it's a normal one. | |
# We can't use the `failure()` or `cancelled()` convenience methods outside of the `if` condition, hence the | |
# `contains()` calls. | |
IS_CRITICAL: ${{ contains(join(steps.*.outcome), 'failure') || contains(join(steps.*.outcome), 'skipped') }} | |
# Build the status strings for each step as environment variables to save space later. Null retry_count values | |
# will be replaced with `n/a` to maintain readability in the alert. | |
CLI_INSTALL_STATUS: ${{ steps.sfdx_install.outcome }} after ${{ steps.sfdx_install.outputs.retry_count || 'n/a' }} retries | |
SCANNER_INSTALL_STATUS: ${{ steps.scanner_install.outcome }} after ${{ steps.scanner_install.outputs.retry_count || 'n/a' }} retries | |
SMOKE_TESTS_STATUS: ${{ steps.smoke_tests.outcome }} | |
# A link to this run, so the PagerDuty assignee can quickly get here. | |
RUN_LINK: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
run: | | |
# GHA env-vars don't have robust conditional logic, so we'll use this if-else branch to define some bash env-vars. | |
if [[ ${{ env.IS_CRITICAL }} == true ]]; then | |
ALERT_SEV="critical" | |
ALERT_SUMMARY="Production heartbeat script failed on ${{ runner.os }}" | |
else | |
ALERT_SEV="info" | |
ALERT_SUMMARY="Production heartbeat script succeeded with retries on ${{ runner.os }}" | |
fi | |
# Define a helper function to create our POST request's data, to sidestep issues with nested quotations. | |
generate_post_data() { | |
# This is known as a HereDoc, and it lets us declare multi-line input ending when the specified limit string, | |
# in this case EOF, is encoutered. | |
cat <<EOF | |
{"payload": { | |
"summary": "${ALERT_SUMMARY}", | |
"source": "Github Actions", | |
"severity": "${ALERT_SEV}", | |
"custom_details": "SFDX install: ${{ env.CLI_INSTALL_STATUS }}. Scanner install: ${{ env.SCANNER_INSTALL_STATUS }}. Smoke tests: ${{ env.SMOKE_TESTS_STATUS }}." | |
}, | |
"links": [{ | |
"href": "${{ env.RUN_LINK }}", | |
"text": "Link to action execution" | |
}], | |
"event_action": "trigger", | |
"dedup_key": "GH-HB-${{ matrix.os.vm }}-${{ matrix.node }}", | |
"routing_key": "${{ secrets.PAGERDUTY_HEARTBEAT_KEY }}" | |
} | |
EOF | |
} | |
# Make our POST request | |
curl --request POST --data "$(generate_post_data)" https://events.pagerduty.com/v2/enqueue |