From 89ed04cb3c31d5d0b23d3a4491f90e2d527822dd Mon Sep 17 00:00:00 2001 From: Adam Cmiel Date: Thu, 11 Jul 2024 14:27:50 +0200 Subject: [PATCH] source-build: read base image from SBOM STONEBLD-2607 When the BASE_IMAGES param is empty, fall back to reading the base image from the SBOM of the BINARY_IMAGE. This removes the source-build task's dependency on the BASE_IMAGES_DIGESTS result, finally allowing us to stop returning this result from build tasks. Signed-off-by: Adam Cmiel --- task/source-build-oci-ta/0.1/README.md | 2 +- .../0.1/source-build-oci-ta.yaml | 75 ++++++++++++++--- task/source-build/0.1/README.md | 2 +- task/source-build/0.1/source-build.yaml | 81 ++++++++++++++++--- 4 files changed, 138 insertions(+), 22 deletions(-) diff --git a/task/source-build-oci-ta/0.1/README.md b/task/source-build-oci-ta/0.1/README.md index 43bf1b2d43..5f00e17afa 100644 --- a/task/source-build-oci-ta/0.1/README.md +++ b/task/source-build-oci-ta/0.1/README.md @@ -5,7 +5,7 @@ Source image build. ## Parameters |name|description|default value|required| |---|---|---|---| -|BASE_IMAGES|Base images used to build the binary image. Each image per line in the same order of FROM instructions specified in a multistage Dockerfile. Default to an empty string, which means to skip handling a base image.|""|false| +|BASE_IMAGES|By default, the task inspects the SBOM of the binary image to find the base image. With this parameter, you can override that behavior and pass the base image directly. The value should be a newline-separated list of images, in the same order as the FROM instructions specified in a multistage Dockerfile.|""|false| |BINARY_IMAGE|Binary image name from which to generate the source image name.||true| |CACHI2_ARTIFACT|The Trusted Artifact URI pointing to the artifact with the prefetched dependencies.|""|false| |SOURCE_ARTIFACT|The Trusted Artifact URI pointing to the artifact with the application source code.||true| diff --git a/task/source-build-oci-ta/0.1/source-build-oci-ta.yaml b/task/source-build-oci-ta/0.1/source-build-oci-ta.yaml index 8eab41005a..716ef4bef8 100644 --- a/task/source-build-oci-ta/0.1/source-build-oci-ta.yaml +++ b/task/source-build-oci-ta/0.1/source-build-oci-ta.yaml @@ -12,10 +12,11 @@ spec: description: Source image build. params: - name: BASE_IMAGES - description: Base images used to build the binary image. Each image - per line in the same order of FROM instructions specified in a multistage - Dockerfile. Default to an empty string, which means to skip handling - a base image. + description: By default, the task inspects the SBOM of the binary image + to find the base image. With this parameter, you can override that + behavior and pass the base image directly. The value should be a newline-separated + list of images, in the same order as the FROM instructions specified + in a multistage Dockerfile. type: string default: "" - name: BINARY_IMAGE @@ -42,6 +43,11 @@ spec: - name: workdir emptyDir: {} stepTemplate: + env: + - name: BASE_IMAGES_FILE + value: /var/source-build/base-images.txt + - name: BINARY_IMAGE + value: $(params.BINARY_IMAGE) volumeMounts: - mountPath: /var/workdir name: workdir @@ -52,16 +58,65 @@ spec: - use - $(params.SOURCE_ARTIFACT)=/var/workdir/source - $(params.CACHI2_ARTIFACT)=/var/workdir/cachi2 + - name: get-base-images + image: quay.io/konflux-ci/appstudio-utils:ab6b0b8e40e440158e7288c73aff1cf83a2cc8a9@sha256:24179f0efd06c65d16868c2d7eb82573cce8e43533de6cea14fec3b7446e0b14 + env: + - name: BASE_IMAGES + value: $(params.BASE_IMAGES) + script: | + #!/usr/bin/env bash + set -euo pipefail + + if [[ -n "$BASE_IMAGES" ]]; then + echo "BASE_IMAGES param received:" + printf "%s" "$BASE_IMAGES" | tee "$BASE_IMAGES_FILE" + exit + fi + + echo "BASE_IMAGES param is empty, inspecting the SBOM instead" + + raw_inspect=$(skopeo inspect --raw "docker://$BINARY_IMAGE") + if manifest_digest=$(jq -e -r '.manifests[0].digest' <<<"$raw_inspect"); then + # The BINARY_IMAGE is an index image, each manifest in the list has its own SBOM. + # We're gonna assume the base images are the same or similar enough in all the SBOMs. + echo "BINARY_IMAGE ($BINARY_IMAGE) is a manifest list, picking an arbitrary image from the list" + image_without_digest=${BINARY_IMAGE%@*} + image_without_tag=${image_without_digest%:*} + image=${image_without_tag}@${manifest_digest} + else + # The image is a single manifest + image=$BINARY_IMAGE + fi + + for i in {1..5}; do + echo "Downloading SBOM for $image (attempt $i)" + sbom=$(cosign download sbom "$image") && break + [[ "$i" -lt 5 ]] && sleep 1 + done + + if [[ -z "$sbom" ]]; then + echo "Failed to download SBOM after 5 attempts. Proceeding anyway." + echo "WARNING: the source image will not include sources for the base image." + exit 0 + fi + + echo -n "Looking for base image in SBOM" + echo " (.formulation[].components[] with 'konflux:container:is_base_image' property)" + # Note: the SBOM should contain at most one image with the is_base_image property - the + # base image for the last FROM instruction. That is the only base image we care about. + jq -r ' + .formulation[]? + | .components[]? + | select(any(.properties[]?; .name == "konflux:container:is_base_image")) + | (.purl | capture("^pkg:oci/.*?@(?.*?:[a-f0-9]*)")) as $matched + | .name + "@" + $matched.digest + ' <<<"$sbom" | tee "$BASE_IMAGES_FILE" - name: build image: quay.io/konflux-ci/source-container-build:9ad131acf5154d2f280b7b46a1abc543952d325c@sha256:94271c32e4578208ac90308695d2b625d4e932d65f0cdd116b200c39228f5ece workingDir: /var/workdir env: - - name: BINARY_IMAGE - value: $(params.BINARY_IMAGE) - name: SOURCE_DIR value: /var/workdir/source - - name: BASE_IMAGES - value: $(params.BASE_IMAGES) - name: RESULT_FILE value: $(results.BUILD_RESULT.path) - name: CACHI2_ARTIFACTS_DIR @@ -88,11 +143,13 @@ spec: ## git config --global --add safe.directory $SOURCE_DIR + base_images=$(if [[ -f "$BASE_IMAGES_FILE" ]]; then cat "$BASE_IMAGES_FILE"; fi) + ${app_dir}/appenv/bin/python3 ${app_dir}/source_build.py \ --output-binary-image "$BINARY_IMAGE" \ --workspace /var/workdir \ --source-dir "$SOURCE_DIR" \ - --base-images "$BASE_IMAGES" \ + --base-images "$base_images" \ --write-result-to "$RESULT_FILE" \ --cachi2-artifacts-dir "$CACHI2_ARTIFACTS_DIR" \ --registry-allowlist="$registry_allowlist" diff --git a/task/source-build/0.1/README.md b/task/source-build/0.1/README.md index e1f45d9299..f81fac13aa 100644 --- a/task/source-build/0.1/README.md +++ b/task/source-build/0.1/README.md @@ -6,7 +6,7 @@ Source image build. |name|description|default value|required| |---|---|---|---| |BINARY_IMAGE|Binary image name from which to generate the source image name.||true| -|BASE_IMAGES|Base images used to build the binary image. Each image per line in the same order of FROM instructions specified in a multistage Dockerfile. Default to an empty string, which means to skip handling a base image.|""|false| +|BASE_IMAGES|By default, the task inspects the SBOM of the binary image to find the base image. With this parameter, you can override that behavior and pass the base image directly. The value should be a newline-separated list of images, in the same order as the FROM instructions specified in a multistage Dockerfile.|""|false| ## Results |name|description| diff --git a/task/source-build/0.1/source-build.yaml b/task/source-build/0.1/source-build.yaml index 41798aea59..e24ad9a0ac 100644 --- a/task/source-build/0.1/source-build.yaml +++ b/task/source-build/0.1/source-build.yaml @@ -16,9 +16,10 @@ spec: type: string - name: BASE_IMAGES description: >- - Base images used to build the binary image. Each image per line in the same order of FROM - instructions specified in a multistage Dockerfile. Default to an empty string, which means - to skip handling a base image. + By default, the task inspects the SBOM of the binary image to find the base image. + With this parameter, you can override that behavior and pass the base image directly. + The value should be a newline-separated list of images, in the same order as the FROM + instructions specified in a multistage Dockerfile. type: string default: "" results: @@ -34,7 +35,70 @@ spec: volumes: - name: source-build-work-place emptyDir: {} + stepTemplate: + env: + - name: BINARY_IMAGE + value: "$(params.BINARY_IMAGE)" + - name: BASE_IMAGES_FILE + value: /var/source-build/base-images.txt + volumeMounts: + - name: source-build-work-place + mountPath: /var/source-build steps: + - name: get-base-images + image: quay.io/konflux-ci/appstudio-utils:ab6b0b8e40e440158e7288c73aff1cf83a2cc8a9@sha256:24179f0efd06c65d16868c2d7eb82573cce8e43533de6cea14fec3b7446e0b14 + env: + - name: BASE_IMAGES + value: "$(params.BASE_IMAGES)" + script: | + #!/usr/bin/env bash + set -euo pipefail + + if [[ -n "$BASE_IMAGES" ]]; then + echo "BASE_IMAGES param received:" + printf "%s" "$BASE_IMAGES" | tee "$BASE_IMAGES_FILE" + exit + fi + + echo "BASE_IMAGES param is empty, inspecting the SBOM instead" + + raw_inspect=$(skopeo inspect --raw "docker://$BINARY_IMAGE") + if manifest_digest=$(jq -e -r '.manifests[0].digest' <<< "$raw_inspect"); then + # The BINARY_IMAGE is an index image, each manifest in the list has its own SBOM. + # We're gonna assume the base images are the same or similar enough in all the SBOMs. + echo "BINARY_IMAGE ($BINARY_IMAGE) is a manifest list, picking an arbitrary image from the list" + image_without_digest=${BINARY_IMAGE%@*} + image_without_tag=${image_without_digest%:*} + image=${image_without_tag}@${manifest_digest} + else + # The image is a single manifest + image=$BINARY_IMAGE + fi + + for i in {1..5}; do + echo "Downloading SBOM for $image (attempt $i)" + sbom=$(cosign download sbom "$image") && break + [[ "$i" -lt 5 ]] && sleep 1 + done + + if [[ -z "$sbom" ]]; then + echo "Failed to download SBOM after 5 attempts. Proceeding anyway." + echo "WARNING: the source image will not include sources for the base image." + exit 0 + fi + + echo -n "Looking for base image in SBOM" + echo " (.formulation[].components[] with 'konflux:container:is_base_image' property)" + # Note: the SBOM should contain at most one image with the is_base_image property - the + # base image for the last FROM instruction. That is the only base image we care about. + jq -r ' + .formulation[]? + | .components[]? + | select(any(.properties[]?; .name == "konflux:container:is_base_image")) + | (.purl | capture("^pkg:oci/.*?@(?.*?:[a-f0-9]*)")) as $matched + | .name + "@" + $matched.digest + ' <<< "$sbom" | tee "$BASE_IMAGES_FILE" + - name: build image: quay.io/konflux-ci/source-container-build:9ad131acf5154d2f280b7b46a1abc543952d325c@sha256:94271c32e4578208ac90308695d2b625d4e932d65f0cdd116b200c39228f5ece # per https://kubernetes.io/docs/concepts/containers/images/#imagepullpolicy-defaulting @@ -51,16 +115,9 @@ spec: capabilities: add: - SETFCAP - volumeMounts: - - name: source-build-work-place - mountPath: /var/source-build env: - - name: BINARY_IMAGE - value: "$(params.BINARY_IMAGE)" - name: SOURCE_DIR value: "$(workspaces.workspace.path)/source" - - name: BASE_IMAGES - value: "$(params.BASE_IMAGES)" - name: RESULT_FILE value: "$(results.BUILD_RESULT.path)" - name: CACHI2_ARTIFACTS_DIR @@ -87,11 +144,13 @@ spec: ## git config --global --add safe.directory $SOURCE_DIR + base_images=$(if [[ -f "$BASE_IMAGES_FILE" ]]; then cat "$BASE_IMAGES_FILE"; fi) + ${app_dir}/appenv/bin/python3 ${app_dir}/source_build.py \ --output-binary-image "$BINARY_IMAGE" \ --workspace /var/source-build \ --source-dir "$SOURCE_DIR" \ - --base-images "$BASE_IMAGES" \ + --base-images "$base_images" \ --write-result-to "$RESULT_FILE" \ --cachi2-artifacts-dir "$CACHI2_ARTIFACTS_DIR" \ --registry-allowlist="$registry_allowlist"