Skip to content

Commit

Permalink
fix(RELEASE-1345): avoid failing on invalid purl string
Browse files Browse the repository at this point in the history
The sboms can contain invalid non-rpm purl strings
and we would fail on parsing them using the packageurl
module.

Avoid these failures by parsing the purl type in our
own function and only pass it to packageurl if it's
an rpm purl. Otherwise skip it.

Signed-off-by: Martin Malina <[email protected]>
  • Loading branch information
mmalina committed Dec 13, 2024
1 parent 597145c commit e53ce2e
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 4 deletions.
23 changes: 23 additions & 0 deletions pyxis/test_upload_rpm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
update_container_content_sets,
load_sbom_packages,
construct_rpm_items_and_content_sets,
get_purl_type,
)

GRAPHQL_API = "myapiurl"
Expand Down Expand Up @@ -458,3 +459,25 @@ def test_construct_rpm_items_and_content_sets__no_packages_result_in_empty_list(

assert rpms == []
assert content_sets == []


def test_get_purl_type__rpm():
purl = (
"pkg:rpm/rhel/[email protected]?arch=x86_64&upstream=acl-2.3.1-4.el9.src.rpm"
"&distro=rhel-9.4&repository_id=myrepo3"
)

type = get_purl_type(purl)

assert type == "rpm"


def test_get_purl_type__invalid_docker():
"""This is an invalid purl that packageurl.PackageURL.from_string() would fail on,
but we can still get the type successfully.
"""
purl = "pkg:github/docker:/#docker.mirror.hashicorp.services/rhysd/actionlint:latest"

type = get_purl_type(purl)

assert type == "github"
23 changes: 23 additions & 0 deletions pyxis/test_upload_rpm_data_cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
load_sbom_components,
check_bom_ref_duplicates,
construct_rpm_items_and_content_sets,
get_purl_type,
)

GRAPHQL_API = "myapiurl"
Expand Down Expand Up @@ -435,3 +436,25 @@ def test_construct_rpm_items_and_content_sets__no_components_result_in_empty_lis

assert rpms == []
assert content_sets == []


def test_get_purl_type__rpm():
purl = (
"pkg:rpm/rhel/[email protected]?arch=x86_64&upstream=acl-2.3.1-4.el9.src.rpm"
"&distro=rhel-9.4&repository_id=myrepo3"
)

type = get_purl_type(purl)

assert type == "rpm"


def test_get_purl_type__invalid_docker():
"""This is an invalid purl that packageurl.PackageURL.from_string() would fail on,
but we can still get the type successfully.
"""
purl = "pkg:github/docker:/#docker.mirror.hashicorp.services/rhysd/actionlint:latest"

type = get_purl_type(purl)

assert type == "github"
33 changes: 31 additions & 2 deletions pyxis/upload_rpm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,10 @@ def construct_rpm_items_and_content_sets(
for externalRef in package.get("externalRefs", []):
if externalRef.get("referenceType") != "purl":
continue
purl_dict = PackageURL.from_string(externalRef["referenceLocator"]).to_dict()
if purl_dict["type"] != "rpm":
type = get_purl_type(externalRef["referenceLocator"])
if type != "rpm":
continue
purl_dict = PackageURL.from_string(externalRef["referenceLocator"]).to_dict()
if purl_dict["name"] in IGNORED_PACKAGES:
continue
rpm_item = {
Expand Down Expand Up @@ -271,6 +272,34 @@ def construct_rpm_items_and_content_sets(
return rpms_items, sorted(content_sets)


def get_purl_type(purl: str):
"""
Return purl type parsed from a purl string.
Copied and adapted from packageurl package. The reason we need this function
and cannot simply use the type component of
packageurl.PackageURL.from_string(purl) is that there can be invalid non-rpm
purls generated by syft. By getting just the type first and skipping those
purls, we avoid failing on those invalid purls.
Raise ValueError on errors.
"""
scheme, sep, remainder = purl.partition(":")
if not sep or scheme != "pkg":
raise ValueError(f'purl is missing the required "pkg" scheme component: {repr(purl)}.')

# this strip '/, // and /// as possible in :// or :///
remainder = remainder.strip().lstrip("/")

type, sep, remainder = remainder.partition("/") # NOQA
if not type or not sep:
raise ValueError(f"purl is missing the required type component: {repr(purl)}.")

type = type.lower()

return type


def main(): # pragma: no cover
"""Main func"""
args = parse_arguments()
Expand Down
33 changes: 31 additions & 2 deletions pyxis/upload_rpm_data_cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,9 @@ def construct_rpm_items_and_content_sets(
content_sets = set()
for component in components:
if "purl" in component:
purl_dict = PackageURL.from_string(component["purl"]).to_dict()
if purl_dict["type"] == "rpm":
type = get_purl_type(component["purl"])
if type == "rpm":
purl_dict = PackageURL.from_string(component["purl"]).to_dict()
if purl_dict["name"] in IGNORED_PACKAGES:
continue
rpm_item = {
Expand Down Expand Up @@ -292,6 +293,34 @@ def construct_rpm_items_and_content_sets(
return rpms_items, sorted(content_sets)


def get_purl_type(purl: str):
"""
Return purl type parsed from a purl string.
Copied and adapted from packageurl package. The reason we need this function
and cannot simply use the type component of
packageurl.PackageURL.from_string(purl) is that there can be invalid non-rpm
purls generated by syft. By getting just the type first and skipping those
purls, we avoid failing on those invalid purls.
Raise ValueError on errors.
"""
scheme, sep, remainder = purl.partition(":")
if not sep or scheme != "pkg":
raise ValueError(f'purl is missing the required "pkg" scheme component: {repr(purl)}.')

# this strip '/, // and /// as possible in :// or :///
remainder = remainder.strip().lstrip("/")

type, sep, remainder = remainder.partition("/") # NOQA
if not type or not sep:
raise ValueError(f"purl is missing the required type component: {repr(purl)}.")

type = type.lower()

return type


def main(): # pragma: no cover
"""Main func"""
args = parse_arguments()
Expand Down

0 comments on commit e53ce2e

Please sign in to comment.