From 78be80f2cd89f152c5c82339e9959a7594fab4bb Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 14:13:26 +0200 Subject: [PATCH 1/7] cirrus: fix default timeouts Two weeks ago we had a weird hang in the macos build job on the persitent worker. The task just hang for an hour wasting time. Most tests are fast so we do not need/want such high timeouts. Therefore drop the default timeout to 20 minutes. The integration task also should take under 20m so we can remove the longer timeout there as well. Some system tests need a bit over 30m currently, timeout is set to 35m. For machine tests we use 30m on linux, 45m on windows and 35m on macos to have some extra room there as machine tests have a much higher run to run variance. Signed-off-by: Paul Holzinger --- .cirrus.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index f47fd4b5a1..9d28a7dec0 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -71,7 +71,7 @@ env: # Default timeout for each task -timeout_in: 60m +timeout_in: 20m gcp_credentials: ENCRYPTED[a28959877b2c9c36f151781b0a05407218cda646c7d047fc556e42f55e097e897ab63ee78369dae141dcf0b46a9d0cdd] @@ -559,9 +559,6 @@ apiv2_test_task: (changesInclude('**/*.go', '**/*.c', '**/*.h') && !changesIncludeOnly('test/**', 'pkg/machine/e2e/**')) depends_on: *build gce_instance: *standardvm - # Test is normally pretty quick, about 10-minutes. If it hangs, - # don't make developers wait the full 1-hour timeout. - timeout_in: 20m env: <<: *stdenvars TEST_FLAVOR: apiv2 @@ -628,7 +625,6 @@ local_integration_test_task: &local_integration_test_task gce_instance: &fastvm <<: *standardvm cpu: 4 - timeout_in: 30m env: TEST_FLAVOR: int clone_script: *get_gosrc @@ -674,7 +670,6 @@ container_integration_test_task: CTR_FQIN: ${PRIOR_FEDORA_CONTAINER_FQIN} CI_DESIRED_DATABASE: boltdb gce_instance: *fastvm - timeout_in: 30m env: TEST_FLAVOR: int TEST_ENVIRON: container @@ -693,7 +688,6 @@ rootless_integration_test_task: depends_on: *build matrix: *platform_axis gce_instance: *fastvm - timeout_in: 30m env: TEST_FLAVOR: int PRIV_NAME: rootless @@ -719,6 +713,7 @@ podman_machine_task: image: "${VM_IMAGE_NAME}" type: "${EC2_INST_TYPE}" region: us-east-1 + timeout_in: 30m env: EC2_INST_TYPE: "m5zn.metal" # Bare-metal instance is required TEST_FLAVOR: "machine-linux" @@ -738,6 +733,7 @@ podman_machine_aarch64_task: depends_on: *build ec2_instance: <<: *standard_build_ec2_aarch64 + timeout_in: 30m env: TEST_FLAVOR: "machine-linux" EC2_INST_TYPE: c6g.metal @@ -772,6 +768,7 @@ podman_machine_windows_task: <<: *windows type: m5zn.metal platform: windows + timeout_in: 45m env: *winenv matrix: - env: @@ -794,6 +791,7 @@ podman_machine_mac_task: skip: *skip_rhel_release depends_on: *build persistent_worker: *mac_pw + timeout_in: 35m env: <<: *mac_env DISTRO_NV: "darwin" @@ -850,6 +848,7 @@ local_system_test_task: &local_system_test_task depends_on: *build matrix: *platform_axis gce_instance: *standardvm + timeout_in: 35m env: TEST_FLAVOR: sys clone_script: *get_gosrc @@ -864,8 +863,8 @@ local_system_test_aarch64_task: &local_system_test_task_aarch64 # Docs: ./contrib/cirrus/CIModes.md only_if: *only_if_system_test depends_on: *build - persistent_worker: *mac_pw ec2_instance: *standard_build_ec2_aarch64 + timeout_in: 35m env: <<: *stdenvars_aarch64 TEST_FLAVOR: sys @@ -903,6 +902,7 @@ rootless_remote_system_test_task: <<: *local_system_test_task alias: rootless_remote_system_test gce_instance: *standardvm + timeout_in: 35m env: TEST_FLAVOR: sys PODBIN_NAME: remote @@ -917,6 +917,7 @@ rootless_system_test_task: depends_on: *build matrix: *platform_axis gce_instance: *standardvm + timeout_in: 35m env: TEST_FLAVOR: sys PRIV_NAME: rootless From 94431c29b4a76f60ed468db2e951bba4f8ceaa8d Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 14:21:53 +0200 Subject: [PATCH 2/7] cirrus: remove ginkgo-e2e.json artifact It is not used by anybody so we do not have to store these and can safe some time by not having to generate it even if it is just ~500ms. Signed-off-by: Paul Holzinger --- .cirrus.yml | 3 --- Makefile | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 9d28a7dec0..f48af47dfc 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -635,9 +635,6 @@ local_integration_test_task: &local_integration_test_task ginkgo_node_logs_artifacts: path: ./test/e2e/ginkgo-node-*.log type: text/plain - ginkgo_json_artifacts: - path: ./ginkgo-e2e.json - type: application/json # Nearly identical to `local_integration_test` except all operations diff --git a/Makefile b/Makefile index 65b5615bc8..db73caae6e 100644 --- a/Makefile +++ b/Makefile @@ -136,8 +136,6 @@ GINKGOTIMEOUT ?= -timeout=90m GINKGOWHAT ?= test/e2e/. GINKGO_PARALLEL=y GINKGO ?= ./test/tools/build/ginkgo -# ginkgo json output is only useful in CI, not on developer runs -GINKGO_JSON ?= $(if $(CI),--json-report ginkgo-e2e.json,) # Allow control over some Ginkgo parameters GINKGO_FLAKE_ATTEMPTS ?= 0 @@ -652,7 +650,7 @@ ginkgo-run: .install.ginkgo $(GINKGO) version $(GINKGO) -vv $(TESTFLAGS) --tags "$(TAGS) remote" $(GINKGOTIMEOUT) --flake-attempts $(GINKGO_FLAKE_ATTEMPTS) \ --trace $(if $(findstring y,$(GINKGO_NO_COLOR)),--no-color,) \ - $(GINKGO_JSON) $(if $(findstring y,$(GINKGO_PARALLEL)),-p,) $(if $(FOCUS),--focus "$(FOCUS)",) \ + $(if $(findstring y,$(GINKGO_PARALLEL)),-p,) $(if $(FOCUS),--focus "$(FOCUS)",) \ $(if $(FOCUS_FILE),--focus-file "$(FOCUS_FILE)",) $(GINKGOWHAT) .PHONY: ginkgo From 186f50ad7d893c2d6f29e2c1a383aba7cd5efd05 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 14:42:06 +0200 Subject: [PATCH 3/7] cirrus: do not upload alt arch cross artifacts They do not add much value to users, first of it compiles podman with cgo disabled which means the included the podman binary is unusable either way. The only goal of the build job is to ensure we can compile on all arches, i.e. go build tags adn types work correctly. The upload if these artifacts alone take over 90s so let's get rid of them to speed up the total CI time. Signed-off-by: Paul Holzinger --- .cirrus.yml | 40 +++++----------------------------------- DOWNLOADS.md | 10 ---------- 2 files changed, 5 insertions(+), 45 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index f48af47dfc..37805bc69c 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -240,15 +240,15 @@ alt_build_task: # binary and archive installation zip file. ALT_NAME: 'Windows Cross' # N/B: Referenced by URLencoded strings elsewhere - env: - ALT_NAME: 'Alt Arch. x86 Cross' # N/B: Referenced by URLencoded strings elsewhere + ALT_NAME: 'Alt Arch. x86 Cross' - env: - ALT_NAME: 'Alt Arch. ARM Cross' # N/B: Referenced by URLencoded strings elsewhere + ALT_NAME: 'Alt Arch. ARM Cross' - env: - ALT_NAME: 'Alt Arch. MIPS Cross' # N/B: Referenced by URLencoded strings elsewhere + ALT_NAME: 'Alt Arch. MIPS Cross' - env: - ALT_NAME: 'Alt Arch. MIPS64 Cross' # N/B: Referenced by URLencoded strings elsewhere + ALT_NAME: 'Alt Arch. MIPS64 Cross' - env: - ALT_NAME: 'Alt Arch. Other Cross' # N/B: Referenced by URLencoded strings elsewhere + ALT_NAME: 'Alt Arch. Other Cross' # This task cannot make use of the shared repo.tbz artifact. clone_script: *full_clone setup_script: *setup @@ -1116,36 +1116,6 @@ artifacts_task: - $ARTCURL/Build%20for%20${FEDORA_NAME}/repo/repo.tbz - tar xjf repo.tbz - cp ./bin/* $CIRRUS_WORKING_DIR/ - alt_binaries_intel_script: - - mkdir -p /tmp/alt - - cd /tmp/alt - - $ARTCURL/Alt%20Arch.%20x86%20Cross/repo/repo.tbz - - tar xjf repo.tbz - - mv ./*.tar.gz $CIRRUS_WORKING_DIR/ - alt_binaries_arm_script: - - mkdir -p /tmp/alt - - cd /tmp/alt - - $ARTCURL/Alt%20Arch.%20ARM%20Cross/repo/repo.tbz - - tar xjf repo.tbz - - mv ./*.tar.gz $CIRRUS_WORKING_DIR/ - alt_binaries_mips_script: - - mkdir -p /tmp/alt - - cd /tmp/alt - - $ARTCURL/Alt%20Arch.%20MIPS%20Cross/repo/repo.tbz - - tar xjf repo.tbz - - mv ./*.tar.gz $CIRRUS_WORKING_DIR/ - alt_binaries_mips64_script: - - mkdir -p /tmp/alt - - cd /tmp/alt - - $ARTCURL/Alt%20Arch.%20MIPS64%20Cross/repo/repo.tbz - - tar xjf repo.tbz - - mv ./*.tar.gz $CIRRUS_WORKING_DIR/ - alt_binaries_other_script: - - mkdir -p /tmp/alt - - cd /tmp/alt - - $ARTCURL/Alt%20Arch.%20Other%20Cross/repo/repo.tbz - - tar xjf repo.tbz - - mv ./*.tar.gz $CIRRUS_WORKING_DIR/ win_binaries_script: - mkdir -p /tmp/win - cd /tmp/win diff --git a/DOWNLOADS.md b/DOWNLOADS.md index def26314cc..c5e25ab8fd 100644 --- a/DOWNLOADS.md +++ b/DOWNLOADS.md @@ -52,13 +52,3 @@ matches corresponding changes in the artifacts task. and [darwin_arm64](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-remote-release-darwin_arm64.zip). * Windows [podman-remote](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-remote-release-windows_amd64.zip) for x86_64 only. -* Other podman-remote release builds (includes configuration files & documentation): - * [podman-release-386.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-386.tar.gz) - * [podman-release-arm.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-arm.tar.gz) - * [podman-release-arm64.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-arm64.tar.gz) - * [podman-release-mips.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-mips.tar.gz) - * [podman-release-mips64.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-mips64.tar.gz) - * [podman-release-mips64le.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-mips64le.tar.gz) - * [podman-release-mipsle.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-mipsle.tar.gz) - * [podman-release-ppc64le.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-ppc64le.tar.gz) - * [podman-release-s390x.tar.gz](https://api.cirrus-ci.com/v1/artifact/github/containers/podman/Artifacts/binary/podman-release-s390x.tar.gz) From 34a7d8dd10decab95bdf0bf804262aca8620bce6 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 14:51:39 +0200 Subject: [PATCH 4/7] cirrus: remove cross jobs for aarch64 and x86_64 We do build and test aarch64 and x86_64 natively so the cross job doesn't seem to add value. Signed-off-by: Paul Holzinger --- contrib/cirrus/runner.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh index 5c9209221b..11823c3970 100755 --- a/contrib/cirrus/runner.sh +++ b/contrib/cirrus/runner.sh @@ -275,16 +275,10 @@ function _run_altbuild() { showrun make package ;; Alt*x86*Cross) - arches=(\ - amd64 - 386) - _build_altbuild_archs "${arches[@]}" + _build_altbuild_archs "386" ;; Alt*ARM*Cross) - arches=(\ - arm - arm64) - _build_altbuild_archs "${arches[@]}" + _build_altbuild_archs "arm" ;; Alt*Other*Cross) arches=(\ From ac18b1a0af286934f111c740052950da794abade Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 15:00:48 +0200 Subject: [PATCH 5/7] cirrus: remove 3rd party connectivity check This doesn't help us at all, first the list is outdated. AFAICT we no longer connect to docker.io, registry.fedoraproject.org or podman.cachix.org (seems to be a cache site for nix.dev?) anywhere in our tests. Second a simple port check is not helpful, in the most cases the CDN's and or load balancer accept connections but return internal server errors when the registy goes down. This is very similar to commit 5b6de98ee8. Signed-off-by: Paul Holzinger --- contrib/cirrus/prebuild.sh | 16 ---------------- contrib/cirrus/required_host_ports.txt | 5 ----- 2 files changed, 21 deletions(-) delete mode 100644 contrib/cirrus/required_host_ports.txt diff --git a/contrib/cirrus/prebuild.sh b/contrib/cirrus/prebuild.sh index 65c254f556..22b101cc4f 100755 --- a/contrib/cirrus/prebuild.sh +++ b/contrib/cirrus/prebuild.sh @@ -81,19 +81,3 @@ if [[ "${DISTRO_NV}" == "$PRIOR_FEDORA_NAME" ]]; then showrun bash ${CIRRUS_WORKING_DIR}/.github/actions/check_cirrus_cron/test.sh fi fi - -msg "Checking 3rd party network service connectivity" -# shellcheck disable=SC2154 -cat ${CIRRUS_WORKING_DIR}/${SCRIPT_BASE}/required_host_ports.txt | \ - while read host port - do - if [[ "$port" -eq "443" ]] - then - echo "SSL/TLS to $host:$port" - echo -n '' | \ - err_retry 9 1000 "" openssl s_client -quiet -no_ign_eof -connect $host:$port - else - echo "Connect to $host:$port" - err_retry 9 1000 1 nc -zv -w 13 $host $port - fi - done diff --git a/contrib/cirrus/required_host_ports.txt b/contrib/cirrus/required_host_ports.txt deleted file mode 100644 index 5f066e059c..0000000000 --- a/contrib/cirrus/required_host_ports.txt +++ /dev/null @@ -1,5 +0,0 @@ -github.com 22 -docker.io 443 -quay.io 443 -registry.fedoraproject.org 443 -podman.cachix.org 443 From d5c5261e6f144a3cf88b091f7eddea6f39502b5c Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 16:06:26 +0200 Subject: [PATCH 6/7] cirrus: move renovate check into validate The renovate config is used for the renovate bot, validating this in the prior fedora prebuild setp is just confusing and hidden. The problem is this image is very big so it is slow to download/extract. To speed things up given it is only a single file we check the diff if we even changed it. Now one could argue this should be part of the validate Makefile target but I given the size I do not want this run by default and I am not sure if we should do the diff check in the Makefile. Lastly remove -it, these is meant for interactive use and throws a warning here because we have no actual tty attached. Signed-off-by: Paul Holzinger --- contrib/cirrus/prebuild.sh | 6 ------ contrib/cirrus/runner.sh | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/contrib/cirrus/prebuild.sh b/contrib/cirrus/prebuild.sh index 22b101cc4f..e475d93442 100755 --- a/contrib/cirrus/prebuild.sh +++ b/contrib/cirrus/prebuild.sh @@ -65,12 +65,6 @@ if [[ "${DISTRO_NV}" == "$PRIOR_FEDORA_NAME" ]]; then # Tests for pr-should-link-jira showrun ${SCRIPT_BASE}/pr-should-link-jira.t - msg "Checking renovate config." - showrun podman run -it \ - -v ./.github/renovate.json5:/usr/src/app/renovate.json5:z \ - ghcr.io/renovatebot/renovate:latest \ - renovate-config-validator - # Run this during daily cron job to prevent a GraphQL API change/breakage # from impacting every PR. Down-side being if it does fail, a maintainer # will need to do some archaeology to find it. diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh index 11823c3970..0e14ee9b3b 100755 --- a/contrib/cirrus/runner.sh +++ b/contrib/cirrus/runner.sh @@ -22,6 +22,10 @@ source $(dirname $0)/lib.sh showrun echo "starting" function _run_validate-source() { + # This target is only meant to be run on PRs. + # We need the following env vars set for the git diff check below. + req_env_vars CIRRUS_CHANGE_IN_REPO PR_BASE_SHA + showrun make validate-source # make sure PRs have tests @@ -29,6 +33,22 @@ function _run_validate-source() { # make sure PRs have jira links (if needed for branch) showrun make test-jira-links-included + + # shellcheck disable=SC2154 + head=$CIRRUS_CHANGE_IN_REPO + # shellcheck disable=SC2154 + base=$PR_BASE_SHA + echo "_run_validate-source: head=$head base=$base" + diffs=$(git diff --name-only $base $head) + + # If PR touches renovate config validate it, as the image is very big only do so when needed + if grep -E -q "^.github/renovate.json5" <<<"$diffs"; then + msg "Checking renovate config." + showrun podman run \ + -v ./.github/renovate.json5:/usr/src/app/renovate.json5:z \ + ghcr.io/renovatebot/renovate:latest \ + renovate-config-validator + fi } function _run_unit() { From 6a0ab6f7bc79717ee8f6589d5aa1b1d081d090ca Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 29 Aug 2024 16:15:30 +0200 Subject: [PATCH 7/7] cirrus: remove _bail_if_test_can_be_skipped Since commit 55ad0d6e0e we do the conditions in the cirrus.yml directly so there is no longer any need for this. Signed-off-by: Paul Holzinger --- contrib/cirrus/runner.sh | 71 ++-------------------------------------- 1 file changed, 2 insertions(+), 69 deletions(-) diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh index 0e14ee9b3b..ea6235eba4 100755 --- a/contrib/cirrus/runner.sh +++ b/contrib/cirrus/runner.sh @@ -52,8 +52,6 @@ function _run_validate-source() { } function _run_unit() { - _bail_if_test_can_be_skipped test/goecho test/version - # shellcheck disable=SC2154 if [[ "$PODBIN_NAME" != "podman" ]]; then # shellcheck disable=SC2154 @@ -63,8 +61,6 @@ function _run_unit() { } function _run_apiv2() { - _bail_if_test_can_be_skipped test/apiv2 - ( showrun make localapiv2-bash source .venv/requests/bin/activate @@ -73,8 +69,6 @@ function _run_apiv2() { } function _run_compose_v2() { - _bail_if_test_can_be_skipped test/compose - showrun ./test/compose/test-compose |& logformatter } @@ -87,8 +81,6 @@ function _run_sys() { } function _run_upgrade_test() { - _bail_if_test_can_be_skipped test/system test/upgrade - showrun bats test/upgrade |& logformatter } @@ -121,7 +113,6 @@ function _run_endpoint() { } function _run_farm() { - _bail_if_test_can_be_skipped test/farm test/system msg "Testing podman farm." showrun bats test/farm |& logformatter } @@ -245,16 +236,11 @@ function _run_build() { } function _run_altbuild() { - # Subsequent windows-based tasks require a build. Var. defined in .cirrus.yml - # shellcheck disable=SC2154 - if [[ ! "$ALT_NAME" =~ Windows ]]; then - # We can skip all these steps for test-only PRs, but not doc-only ones - _bail_if_test_can_be_skipped docs - fi - local -a arches local arch req_env_vars ALT_NAME + # Var. defined in .cirrus.yml + # shellcheck disable=SC2154 msg "Performing alternate build: $ALT_NAME" msg "************************************************************" set -x @@ -446,59 +432,6 @@ _run_machine-linux() { showrun make localmachine |& logformatter } -# Optimization: will exit if the only PR diffs are under docs/ or tests/ -# with the exception of any given arguments. E.g., don't run e2e or unit -# or bud tests if the only PR changes are in test/system. -function _bail_if_test_can_be_skipped() { - local head base diffs - - # Cirrus sets these for PRs but not branches or cron. In cron and branches, - #we never want to skip. - for v in CIRRUS_CHANGE_IN_REPO CIRRUS_PR DEST_BRANCH; do - if [[ -z "${!v}" ]]; then - msg "[ _cannot do selective skip: \$$v is undefined ]" - return 0 - fi - done - # And if this one *is* defined, it means we're not in PR-land; don't skip. - if [[ -n "$CIRRUS_TAG" ]]; then - msg "[ _cannot do selective skip: \$CIRRUS_TAG is defined ]" - return 0 - fi - - # Defined by Cirrus-CI for all tasks - # shellcheck disable=SC2154 - head=$CIRRUS_CHANGE_IN_REPO - # shellcheck disable=SC2154 - base=$PR_BASE_SHA - echo "_bail_if_test_can_be_skipped: head=$head base=$base" - diffs=$(git diff --name-only $base $head) - - # If PR touches any files in an argument directory, we cannot skip - for subdir in "$@"; do - if grep -E -q "^$subdir/" <<<"$diffs"; then - return 0 - fi - done - - # PR does not touch any files under our input directories. Now see - # if the PR touches files outside of the following directories, by - # filtering these out from the diff results. - for subdir in docs test; do - # || true needed because we're running with set -e - diffs=$(grep -E -v "^$subdir/" <<<"$diffs" || true) - done - - # If we still have diffs, they indicate files outside of docs & test. - # It is not safe to skip. - if [[ -n "$diffs" ]]; then - return 0 - fi - - msg "SKIPPING: This is a doc- and/or test-only PR with no changes under $*" - exit 0 -} - # Nearly every task in .cirrus.yml makes use of this shell script # wrapped by /usr/bin/time to collect runtime statistics. Because the # --output option is used to log stats to a file, every child-process