Skip to content

Commit

Permalink
feat(fossid-webapp): Retain snippet choice state in FossID
Browse files Browse the repository at this point in the history
When a file has been marked as identified in the current run or a previous
one, it is not pending anymore, thus its snippets won't be listed.
Consequently, proper license findings cannot be created because the license
and the line range information is not stored nor returned by FossID.
It is also important to retain which snippet was chosen, as it will allow
to deal with deletion of snippet choices (which will be added in a future
commit).
This commit solves the issue by storing the snippet artifact and version to
an identification bound to the scan. Identically, the license and source
location is stored as JSON in a comment attached to the scan.
Then, when the ORT FossID scanner runs, this information is loaded to
create a proper license finding out of it.

Signed-off-by: Nicolas Nobelis <[email protected]>
  • Loading branch information
nnobelis committed Mar 18, 2024
1 parent 69d3f91 commit 5d74994
Show file tree
Hide file tree
Showing 5 changed files with 277 additions and 17 deletions.
87 changes: 83 additions & 4 deletions plugins/scanners/fossid/src/main/kotlin/FossId.kt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import kotlinx.coroutines.withTimeoutOrNull
import org.apache.logging.log4j.kotlin.logger

import org.ossreviewtoolkit.clients.fossid.FossIdRestService
import org.ossreviewtoolkit.clients.fossid.addComponentIdentification
import org.ossreviewtoolkit.clients.fossid.addFileComment
import org.ossreviewtoolkit.clients.fossid.checkDownloadStatus
import org.ossreviewtoolkit.clients.fossid.checkResponse
import org.ossreviewtoolkit.clients.fossid.createIgnoreRule
Expand Down Expand Up @@ -79,6 +81,7 @@ import org.ossreviewtoolkit.model.config.ScannerConfiguration
import org.ossreviewtoolkit.model.config.snippet.SnippetChoice
import org.ossreviewtoolkit.model.config.snippet.SnippetChoiceReason
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.model.jsonMapper
import org.ossreviewtoolkit.scanner.PackageScannerWrapper
import org.ossreviewtoolkit.scanner.ProvenanceScannerWrapper
import org.ossreviewtoolkit.scanner.ScanContext
Expand Down Expand Up @@ -116,6 +119,12 @@ class FossId internal constructor(
@JvmStatic
private val GIT_FETCH_DONE_REGEX = Regex("-> FETCH_HEAD(?: Already up to date.)*$")

/**
* A regular expression to extract the artifact and version from a Purl returned by FossID.
*/
@JvmStatic
private val SNIPPET_PURL_REGEX = Regex("^.*/(?<artifact>[^@]+)@(?<version>.+)")

@JvmStatic
private val WAIT_DELAY = 10.seconds

Expand Down Expand Up @@ -905,7 +914,13 @@ class FossId internal constructor(

val snippetLicenseFindings = mutableSetOf<LicenseFinding>()
val snippetFindings = mapSnippetFindings(rawResults, issues, snippetChoices, snippetLicenseFindings)
markFilesWithChosenSnippetsAsIdentified(scanCode, snippetChoices, snippetFindings, rawResults.listPendingFiles)
markFilesWithChosenSnippetsAsIdentified(
scanCode,
snippetChoices,
snippetFindings,
rawResults.listPendingFiles,
snippetLicenseFindings
)

val ignoredFiles = rawResults.listIgnoredFiles.associateBy { it.path }

Expand All @@ -931,15 +946,20 @@ class FossId internal constructor(
}

/**
* Mark all the files having a snippet choice as identified, only if they have no non-chosen source location
* remaining.
* Mark all the files in [snippetChoices] as identified, only after searching in [snippetFindings] that they have no
* non-chosen source location remaining. Only files in [listPendingFiles] are marked.
* Files marked as identified have a license identification and a source location (stored in a comment), using
* [licenseFindings] as reference.
*/
private fun markFilesWithChosenSnippetsAsIdentified(
scanCode: String,
snippetChoices: List<SnippetChoice> = emptyList(),
snippetFindings: Set<SnippetFinding>,
pendingFiles: List<String>
pendingFiles: List<String>,
licenseFindings: Set<LicenseFinding>
) {
val licenseFindingsByPath = licenseFindings.groupBy { it.location.path }

runBlocking(Dispatchers.IO) {
val candidatePathsToMark = snippetChoices.groupBy({ it.given.sourceLocation.path }) {
it.choice.reason
Expand All @@ -962,6 +982,65 @@ class FossId internal constructor(
requests += async {
service.markAsIdentified(config.user, config.apiKey, scanCode, path, false)
}

val filteredSnippetChoicesByPath = snippetChoices.filter {
it.given.sourceLocation.path == path
}

val relevantSnippetChoices = filteredSnippetChoicesByPath.filter {
it.choice.reason == SnippetChoiceReason.ORIGINAL_FINDING
}

relevantSnippetChoices.forEach { filteredSnippetChoice ->
val match = SNIPPET_PURL_REGEX.matchEntire(filteredSnippetChoice.choice.purl.orEmpty())
match?.also {
val artifact = match.groups["artifact"]?.value.orEmpty()
val version = match.groups["version"]?.value.orEmpty()
val location = filteredSnippetChoice.given.sourceLocation

requests += async {
logger.info {
"Adding component identification '$artifact/$version' to '$path' " +
"at ${location.startLine}-${location.endLine}."
}

service.addComponentIdentification(
config.user,
config.apiKey,
scanCode,
path,
artifact,
version,
false
)
}
}
}

// The chosen snippet source location lines can neither be stored in the scan nor the file, so
// it is stored in a comment attached to the identified file instead.
val licenseFindingsByLicense = licenseFindingsByPath[path]?.groupBy({ it.license.toString() }) {
it.location
}.orEmpty()

val relevantChoicesCount = relevantSnippetChoices.size
val notRelevantChoicesCount = filteredSnippetChoicesByPath.count {
it.choice.reason == SnippetChoiceReason.NO_RELEVANT_FINDING
}
val payload = OrtCommentPayload(
licenseFindingsByLicense,
relevantChoicesCount,
notRelevantChoicesCount
)
val comment = OrtComment(payload)
val jsonComment = jsonMapper.writeValueAsString(comment)
requests += async {
logger.info {
"Adding file comment to '$path' with relevant count $relevantChoicesCount and not " +
"relevant count $notRelevantChoicesCount."
}
service.addFileComment(config.user, config.apiKey, scanCode, path, jsonComment)
}
}
}
}
Expand Down
50 changes: 41 additions & 9 deletions plugins/scanners/fossid/src/main/kotlin/FossIdScanResults.kt
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@ import org.ossreviewtoolkit.model.TextLocation
import org.ossreviewtoolkit.model.config.snippet.SnippetChoice
import org.ossreviewtoolkit.model.config.snippet.SnippetChoiceReason
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.model.jsonMapper
import org.ossreviewtoolkit.model.mapLicense
import org.ossreviewtoolkit.model.utils.PurlType
import org.ossreviewtoolkit.utils.common.alsoIfNull
import org.ossreviewtoolkit.utils.common.collapseToRanges
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.prettyPrintRanges
import org.ossreviewtoolkit.utils.ort.DeclaredLicenseProcessor
import org.ossreviewtoolkit.utils.ort.ORT_NAME
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
import org.ossreviewtoolkit.utils.spdx.toSpdx

Expand Down Expand Up @@ -90,6 +92,20 @@ internal fun <T : Summarizable> List<T>.mapSummary(
val files = filterNot { it.getFileName() in ignoredFiles }
files.forEach { summarizable ->
val summary = summarizable.toSummary()
var fileComment: OrtComment? = null

if (summarizable is MarkedAsIdentifiedFile) {
summarizable.comments.values.firstOrNull {
it.comment.contains(ORT_NAME)
}?.also {
runCatching {
fileComment = jsonMapper.readValue(it.comment, OrtComment::class.java)
}.onFailure {
logger.error { "Cannot deserialize comment for ${summary.path}: ${it.message}." }
}
}
}

val defaultLocation = TextLocation(summary.path, TextLocation.UNKNOWN_LINE, TextLocation.UNKNOWN_LINE)

summary.licences.forEach { licenseAddedInTheUI ->
Expand All @@ -98,6 +114,14 @@ internal fun <T : Summarizable> List<T>.mapSummary(
}
}

fileComment?.ort?.licenses?.forEach { (licenseInORTComment, locations) ->
locations.forEach { location ->
mapLicense(licenseInORTComment, location, issues, detectedLicenseMapping)?.let {
licenseFindings += it
}
}
}

summarizable.getCopyright().let {
if (it.isNotEmpty()) {
copyrightFindings += CopyrightFinding(it, defaultLocation)
Expand Down Expand Up @@ -262,15 +286,23 @@ internal fun mapSnippetFindings(
findings.map { SnippetFinding(it.key, it.value) }
}.toSet().also {
remainingSnippetChoices.forEach { snippetChoice ->
val message = "The configuration contains a snippet choice for the snippet ${snippetChoice.choice.purl} " +
"at ${snippetChoice.given.sourceLocation.prettyPrint()}, but the FossID result contains no such " +
"snippet."
logger.warn(message)
issues += Issue(
source = "FossId",
message = message,
severity = Severity.WARNING
)
// The issue is created only if the chosen snippet does not correspond to a file marked by a previous run.
val isNotOldMarkedAsIdentifiedFile = rawResults.markedAsIdentifiedFiles.none { markedFile ->
markedFile.file.path == snippetChoice.given.sourceLocation.path
}

if (isNotOldMarkedAsIdentifiedFile) {
val message =
"The configuration contains a snippet choice for the snippet ${snippetChoice.choice.purl} at " +
"${snippetChoice.given.sourceLocation.prettyPrint()}, but the FossID result contains no such " +
"snippet."
logger.warn(message)
issues += Issue(
source = "FossId",
message = message,
severity = Severity.WARNING
)
}
}
}
}
Expand Down
48 changes: 48 additions & 0 deletions plugins/scanners/fossid/src/main/kotlin/OrtComment.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.plugins.scanners.fossid

import org.ossreviewtoolkit.model.TextLocation

/**
* A class representing a comment generated by ORT and attached to a file marked as identified in FossID.
* This comment contains a mapping between license identifiers and their corresponding source locations.
* This comment is serialized as JSON in the marked as identified file's comment, hence the property being named
* [OrtComment.ort].
*/
data class OrtComment(val ort: OrtCommentPayload)

/**
* The payload of an [OrtComment].
*/
data class OrtCommentPayload(
/**
* The license of chosen snippets mapped to their source location.
*/
val licenses: Map<String, List<TextLocation>>,
/**
* The amount of chosen snippets for this file.
*/
val snippetChoicesCount: Int,
/**
* The amount of not relevant snippets for this file.
*/
val notRelevantSnippetsCount: Int
)
Loading

0 comments on commit 5d74994

Please sign in to comment.