From 14b83ec1226b6d883fdbafff2039653e7a6db85b Mon Sep 17 00:00:00 2001 From: Wai Cheang Date: Tue, 3 Dec 2024 23:24:39 -0500 Subject: [PATCH] feat(ISV-5447): add multi-arch support to component sbom update Signed-off-by: Wai Cheang --- sbom/test_update_component_sbom.py | 44 ++++++++++++++++++++---------- sbom/update_component_sbom.py | 35 ++++++++++++++++++++++-- utils/get-image-architectures | 9 +++--- 3 files changed, 68 insertions(+), 20 deletions(-) diff --git a/sbom/test_update_component_sbom.py b/sbom/test_update_component_sbom.py index 8b95c50..2409437 100644 --- a/sbom/test_update_component_sbom.py +++ b/sbom/test_update_component_sbom.py @@ -11,7 +11,7 @@ class TestUpdateComponentSBOM(unittest.TestCase): - def test_get_component_to_purls_map(self) -> None: + def test_get_component_to_purls_map_single_arch(self) -> None: release_note_images = [ {"component": "comp1", "purl": "purl1"}, {"component": "comp1", "purl": "purl2"}, @@ -24,6 +24,26 @@ def test_get_component_to_purls_map(self) -> None: "comp2": ["purl3"], } + def test_get_component_to_purls_map_multi_arch(self) -> None: + release_note_images = [ + { + "component": "comp1", + "purl": "pkg:oci/bar@sha256%3Aabcde?arch=amd64&repository_url=registry.io/foo", + "multiarch": True, + "arch": "amd64", + "imageSha": "foosha1", + }, + ] + + result = get_component_to_purls_map(release_note_images) + assert result == { + "comp1": ["pkg:oci/bar@sha256%3Afoosha1?repository_url=registry.io/foo"], + "comp1_amd64": [ + "pkg:oci/bar@sha256%3Afoosha1?arch=amd64&repository_url=registry.io/foo", + "pkg:oci/bar@sha256%3Aabcde?repository_url=registry.io/foo", + ], + } + def test_update_cyclonedx_sbom(self) -> None: sbom = { "metadata": { @@ -98,11 +118,6 @@ def test_update_spdx_sbom(self) -> None: { "name": "comp1", "externalRefs": [ - { - "referenceCategory": "PACKAGE-MANAGER", - "referenceType": "purl", - "referenceLocator": "pkg:oci/package@sha256:123", - }, { "referenceCategory": "PACKAGE-MANAGER", "referenceType": "purl", @@ -120,11 +135,6 @@ def test_update_spdx_sbom(self) -> None: { "name": "comp2", "externalRefs": [ - { - "referenceCategory": "PACKAGE-MANAGER", - "referenceType": "purl", - "referenceLocator": "pkg:oci/package@sha256:456", - }, { "referenceCategory": "PACKAGE-MANAGER", "referenceType": "purl", @@ -155,7 +165,10 @@ def test_update_sboms_with_cyclonedex_format( ) -> None: # combining the content of data.json and sbom, since there can only be one read_data # defined in the mock_open - test_cyclonedx_sbom = {"bomFormat": "CycloneDX", "releaseNotes": {"images": "foo"}} + test_cyclonedx_sbom = { + "bomFormat": "CycloneDX", + "releaseNotes": {"content": {"images": "foo"}}, + } with patch( "builtins.open", mock_open(read_data=json.dumps(test_cyclonedx_sbom)) @@ -182,7 +195,7 @@ def test_update_sboms_with_spdx_format( ) -> None: # combining the content of data.json and sbom, since there can only be one read_data # defined in the mock_open - test_spdx_sbom = {"spdxVersion": "2.3", "releaseNotes": {"images": "foo"}} + test_spdx_sbom = {"spdxVersion": "2.3", "releaseNotes": {"content": {"images": "foo"}}} with patch( "builtins.open", mock_open(read_data=json.dumps(test_spdx_sbom)) @@ -207,7 +220,10 @@ def test_update_sboms_with_wrong_format( ) -> None: # combining the content of data.json and sbom, since there can only be one read_data # defined in the mock_open - test_spdx_sbom = {"notSbom": "NoSbomVersion", "releaseNotes": {"images": "foo"}} + test_spdx_sbom = { + "notSbom": "NoSbomVersion", + "releaseNotes": {"content": {"images": "foo"}}, + } with patch( "builtins.open", mock_open(read_data=json.dumps(test_spdx_sbom)) diff --git a/sbom/update_component_sbom.py b/sbom/update_component_sbom.py index 4b920f8..71d507c 100755 --- a/sbom/update_component_sbom.py +++ b/sbom/update_component_sbom.py @@ -9,6 +9,7 @@ import os from collections import defaultdict from typing import DefaultDict, Dict, List +import re from packageurl import PackageURL @@ -19,6 +20,12 @@ def get_component_to_purls_map(images: List[Dict]) -> Dict[str, List[str]]: """ Get dictionary mapping component names to list of image purls. + If the image is single arch, just use the existing purls. + If the image is multi-arch, the purl formats are as follows (SPDX only): + - The index package has one purl with the index sha, and no arch info. + - The child packages have one purl with the index sha and arch info, and one purl with + the child image sha and no arch info. + Args: images: List of image metadata from the given data.json. @@ -30,7 +37,25 @@ def get_component_to_purls_map(images: List[Dict]) -> Dict[str, List[str]]: for image in images: component = image["component"] purl = image["purl"] - component_purls[component].append(purl) + arch = image.get("arch") + multiarch = image.get("multiarch", False) + + if multiarch and arch: + # replace sha for index purl + index_sha = image.get("imageSha") + if index_sha: + index_purl = re.sub("sha256%3A.*\\?", f"sha256%3A{index_sha}?", purl) + + # the index purl needs no arch info + component_purls[component] = [re.sub("arch=.*&|&arch=.*$", "", index_purl)] + + component_purls[f"{component}_{arch}"].append(index_purl) + # remove arch from child image digest, since it's already in index purl + component_purls[f"{component}_{arch}"].append( + re.sub("arch=.*&|&arch=.*$", "", purl) + ) + else: + component_purls[component].append(purl) LOG.debug("Component to purl mapping: %s", component_purls) return dict(component_purls) @@ -84,6 +109,10 @@ def update_spdx_sbom(sbom: Dict, component_to_purls_map: Dict[str, List[str]]) - LOG.info("Updating SPDX sbom") for package in sbom["packages"]: if package["name"] in component_to_purls_map: + # Remove existing purls that contain internal repo info + package["externalRefs"] = list( + filter(lambda n: n.get("referenceType") != "purl", package["externalRefs"]) + ) purls = component_to_purls_map[package["name"]] purl_external_refs = [ { @@ -111,7 +140,9 @@ def update_sboms(data_path: str, input_path: str, output_path: str) -> None: with open(data_path, "r") as data_file: data = json.load(data_file) - component_to_purls_map = get_component_to_purls_map(data["releaseNotes"]["images"]) + component_to_purls_map = get_component_to_purls_map( + data["releaseNotes"]["content"].get("images", []) + ) # get all json files in input dir input_jsons = glob.glob(os.path.join(input_path, "*.json")) # loop through files diff --git a/utils/get-image-architectures b/utils/get-image-architectures index 0eb35a8..fc63f73 100755 --- a/utils/get-image-architectures +++ b/utils/get-image-architectures @@ -37,7 +37,7 @@ if [ "$ARTIFACT_TYPE" != "null" ] ; then # Just report that the image is for linux/amd64, which is not exactly true - but, # downstream release-service-catalog tasks expect to find something. Use this as a default. jq -cr -n --arg digest "$digest" \ - '{"platform": {"architecture": "amd64", "os": "linux"}, "digest": $ARGS.named["digest"]}' + '{"platform": {"architecture": "amd64", "os": "linux"}, "digest": $ARGS.named["digest"], "multiarch": false}' elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.oci.image.manifest.v1+json" ] ; then # Single arch, so need to run skopeo inspect again without --raw RAW_OUTPUT=$(skopeo inspect --no-tags docker://${IMAGE}) @@ -46,7 +46,7 @@ elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.oci.image.manif digest=$(jq -r '.Digest' <<< $RAW_OUTPUT) jq -cr -n --arg architecture "$architecture" --arg os "$os" --arg digest "$digest" \ - '{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"]}' + '{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"], "multiarch": false}' elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.docker.distribution.manifest.v2+json" ] ; then RAW_OUTPUT=$(skopeo inspect --no-tags docker://${IMAGE}) architecture=$(jq -r '.Architecture // ""' <<< $RAW_OUTPUT) @@ -56,8 +56,9 @@ elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.docker.distribu # tekton bundles produced by tkn do not set the architecture or OS so # default to linux/amd64 jq -cr -n --arg architecture "${architecture:-amd64}" --arg os "${os:-linux}" --arg digest "$digest" \ - '{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"]}' + '{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"], "multiarch": false}' else # Multi arch - jq -cr '.manifests[]' <<< $RAW_OUTPUT + manifests=$(jq '.manifests[] += {multiarch: true}' <<< "$RAW_OUTPUT") + jq -cr '.manifests[]' <<< "$manifests" fi