From 758fd7a619cc2de3df625e7dd35502fa465955d6 Mon Sep 17 00:00:00 2001 From: Sebastian Schuberth Date: Tue, 9 Jan 2024 09:36:48 +0100 Subject: [PATCH] feat(model): Add functions to en-/decode provenance into PURL extras In order to identify the source code origin for the package a PURL points to, additional qualifiers are required that describe the provenance. Support creating such PURLs so that e.g. source artifact locations can also be encoded as PURLs. Signed-off-by: Sebastian Schuberth --- model/src/main/kotlin/utils/PurlExtensions.kt | 77 +++++++++++++++++ model/src/main/kotlin/utils/PurlUtils.kt | 17 ++++ .../test/kotlin/utils/PurlExtensionsTest.kt | 82 +++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 model/src/test/kotlin/utils/PurlExtensionsTest.kt diff --git a/model/src/main/kotlin/utils/PurlExtensions.kt b/model/src/main/kotlin/utils/PurlExtensions.kt index 8770ad6b81dbc..63f0b30f8d616 100644 --- a/model/src/main/kotlin/utils/PurlExtensions.kt +++ b/model/src/main/kotlin/utils/PurlExtensions.kt @@ -19,8 +19,19 @@ package org.ossreviewtoolkit.model.utils +import java.net.URLDecoder + +import org.ossreviewtoolkit.model.ArtifactProvenance +import org.ossreviewtoolkit.model.Hash import org.ossreviewtoolkit.model.Identifier +import org.ossreviewtoolkit.model.KnownProvenance import org.ossreviewtoolkit.model.Package +import org.ossreviewtoolkit.model.Provenance +import org.ossreviewtoolkit.model.RemoteArtifact +import org.ossreviewtoolkit.model.RepositoryProvenance +import org.ossreviewtoolkit.model.UnknownProvenance +import org.ossreviewtoolkit.model.VcsInfo +import org.ossreviewtoolkit.model.VcsType /** * Map a [Package]'s type to the string representation of the respective [PurlType], or fall back to [PurlType.GENERIC] @@ -62,3 +73,69 @@ fun Identifier.getPurlType() = @JvmOverloads fun Identifier.toPurl(qualifiers: Map = emptyMap(), subpath: String = "") = if (this == Identifier.EMPTY) "" else createPurl(getPurlType(), namespace, name, version, qualifiers, subpath) + +/** + * Encode a [KnownProvenance] to extra qualifying data / a subpath of PURL. + */ +internal fun KnownProvenance.toPurlExtras(): PurlExtras = + when (this) { + is ArtifactProvenance -> with(sourceArtifact) { + val checksum = "${hash.algorithm.name.lowercase()}:${hash.value}" + PurlExtras( + "download_url" to url, + "checksum" to checksum + ) + } + + is RepositoryProvenance -> with(vcsInfo) { + PurlExtras( + "vcs_type" to type.toString(), + "vcs_url" to url, + "vcs_revision" to revision, + "resolved_revision" to resolvedRevision, + subpath = vcsInfo.path + ) + } + } + +/** + * Decode [Provenance] from extra qualifying data / a subpath of the PURL represented by this [String]. Return + * [UnknownProvenance] if extra data is not present. + */ +internal fun String.toProvenance(): Provenance { + val extras = substringAfter('?') + + fun getQualifierValue(name: String) = extras.substringAfter("$name=").substringBefore('&') + + return when { + "download_url=" in extras -> { + val encodedUrl = getQualifierValue("download_url") + + val percentEncodedColon = "%3A" + val checksum = getQualifierValue("checksum") + val (algorithm, value) = checksum.split(percentEncodedColon, limit = 2) + + ArtifactProvenance( + sourceArtifact = RemoteArtifact( + url = URLDecoder.decode(encodedUrl, "UTF-8"), + hash = Hash.create(value, algorithm) + ) + ) + } + + "vcs_url=" in extras -> { + val encodedUrl = getQualifierValue("vcs_url") + + RepositoryProvenance( + vcsInfo = VcsInfo( + type = VcsType.forName(getQualifierValue("vcs_type")), + url = URLDecoder.decode(encodedUrl, "UTF-8"), + revision = getQualifierValue("vcs_revision") + ), + resolvedRevision = getQualifierValue("resolved_revision") + ) + } + + else -> UnknownProvenance + } +} diff --git a/model/src/main/kotlin/utils/PurlUtils.kt b/model/src/main/kotlin/utils/PurlUtils.kt index 961280d9f50fc..ff40c45241324 100644 --- a/model/src/main/kotlin/utils/PurlUtils.kt +++ b/model/src/main/kotlin/utils/PurlUtils.kt @@ -56,6 +56,23 @@ enum class PurlType(private val value: String) { override fun toString() = value } +/** + * Extra data than can be appended to a "clean" PURL via qualifiers or a subpath. + */ +data class PurlExtras( + /** + * Extra qualifying data as key / value pairs. Needs to be percent-encoded when used in a query string. + */ + val qualifiers: Map, + + /** + * A subpath relative to the root of the package. + */ + val subpath: String +) { + constructor(vararg qualifiers: Pair, subpath: String = "") : this(qualifiers.toMap(), subpath) +} + /** * Create the canonical [package URL](https://github.com/package-url/purl-spec) ("purl") based on given properties: * [type] (which must be a String representation of a [PurlType] instance, [namespace], [name] and [version]. diff --git a/model/src/test/kotlin/utils/PurlExtensionsTest.kt b/model/src/test/kotlin/utils/PurlExtensionsTest.kt new file mode 100644 index 0000000000000..c6729b76c91bb --- /dev/null +++ b/model/src/test/kotlin/utils/PurlExtensionsTest.kt @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.model.utils + +import io.kotest.core.spec.style.StringSpec +import io.kotest.matchers.shouldBe + +import org.ossreviewtoolkit.model.ArtifactProvenance +import org.ossreviewtoolkit.model.Hash +import org.ossreviewtoolkit.model.HashAlgorithm +import org.ossreviewtoolkit.model.Identifier +import org.ossreviewtoolkit.model.RemoteArtifact +import org.ossreviewtoolkit.model.RepositoryProvenance +import org.ossreviewtoolkit.model.UnknownProvenance +import org.ossreviewtoolkit.model.VcsInfo +import org.ossreviewtoolkit.model.VcsType + +class PurlExtensionsTest : StringSpec({ + "Artifact provenance can be converted to PURL extras and back" { + val provenance = ArtifactProvenance( + sourceArtifact = RemoteArtifact( + url = "http://example.com/sources.zip", + hash = Hash( + value = "ddce269a1e3d054cae349621c198dd52", + algorithm = HashAlgorithm.MD5 + ) + ) + ) + val id = Identifier("Maven:com.example:sources:1.2.3") + + val extras = provenance.toPurlExtras() + val purl = id.toPurl(extras.qualifiers, extras.subpath) + + purl shouldBe "pkg:maven/com.example/sources@1.2.3?" + + "download_url=http%3A%2F%2Fexample.com%2Fsources.zip&" + + "checksum=md5%3Addce269a1e3d054cae349621c198dd52" + purl.toProvenance() shouldBe provenance + } + + "Repository provenance can be converted to PURL extras and back" { + val provenance = RepositoryProvenance( + vcsInfo = VcsInfo( + type = VcsType.GIT, + url = "https://github.com/apache/commons-text.git", + revision = "7643b12421100d29fd2b78053e77bcb04a251b2e" + ), + resolvedRevision = "7643b12421100d29fd2b78053e77bcb04a251b2e" + ) + val id = Identifier("Maven:com.example:sources:1.2.3") + + val extras = provenance.toPurlExtras() + val purl = id.toPurl(extras.qualifiers, extras.subpath) + + purl shouldBe "pkg:maven/com.example/sources@1.2.3?" + + "vcs_type=Git&" + + "vcs_url=https%3A%2F%2Fgithub.com%2Fapache%2Fcommons-text.git&" + + "vcs_revision=7643b12421100d29fd2b78053e77bcb04a251b2e&" + + "resolved_revision=7643b12421100d29fd2b78053e77bcb04a251b2e" + purl.toProvenance() shouldBe provenance + } + + "A clean PURL has unknown provenance" { + "pkg:npm/mime-db@1.33.0".toProvenance() shouldBe UnknownProvenance + } +})