Skip to content

Commit

Permalink
feat(scancode): Add an option to prefer file- over line-level findings
Browse files Browse the repository at this point in the history
See [1] for discussions about the `detected_license_expression_spdx`, in
particular that it "is not merely the accumulation of the underlying
matches".

Optionally making use of this file-level license aligns ORT's behavior
with that of the Double Open Scanner (DOS), see [2], which is useful for
comparison of results.

[1]: aboutcode-org/scancode-toolkit#3458
[2]: https://github.com/doubleopen-project/dos/blob/616c582/apps/api/src/helpers/db_operations.ts#L55-L78

Signed-off-by: Sebastian Schuberth <[email protected]>
  • Loading branch information
sschuberth committed Jan 24, 2024
1 parent 0b858cb commit bc02de5
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 14 deletions.
18 changes: 16 additions & 2 deletions plugins/scanners/scancode/src/main/kotlin/ScanCode.kt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ import org.semver4j.Semver
* looking up results from the [ScanResultsStorage]. Defaults to [DEFAULT_CONFIGURATION_OPTIONS].
* * **"commandLineNonConfig":** Command line options that do not modify the result and should therefore not be
* considered in [configuration], like "--processes". Defaults to [DEFAULT_NON_CONFIGURATION_OPTIONS].
* * **preferFileLicense**: A flag to indicate whether the "high-level" per-file license reported by ScanCode starting
* with version 32 should be used instead of the individual "low-level" per-line license findings. The per-file
* license may be different from the conjunction of per-line licenses and is supposed to contain fewer
* false-positives. However, no exact line numbers can be associated to the per-file license anymore. If enabled, the
* start line of the per-file license finding is set to the minimum of all start lines for per-line findings in that
* file, the end line is set to the maximum of all end lines for per-line findings in that file, and the score is set
* to the arithmetic average of the scores of all per-line findings in that file.
*/
class ScanCode internal constructor(
name: String,
Expand Down Expand Up @@ -90,7 +97,14 @@ class ScanCode internal constructor(
}
}

override val configuration by lazy { config.commandLine.joinToString(" ") }
override val configuration by lazy {
buildList {
addAll(config.commandLine)

// Add this in the style of a fake command line option for consistency with the above.
if (config.preferFileLicense) add("--prefer-file-license")
}.joinToString(" ")
}

override val matcher by lazy { ScannerMatcher.create(details, wrapperConfig.matcherConfig) }

Expand Down Expand Up @@ -140,7 +154,7 @@ class ScanCode internal constructor(
}

override fun createSummary(result: String, startTime: Instant, endTime: Instant): ScanSummary =
parseResult(result).toScanSummary()
parseResult(result).toScanSummary(config.preferFileLicense)

/**
* Execute ScanCode with the configured arguments to scan the given [path] and produce [resultFile].
Expand Down
6 changes: 4 additions & 2 deletions plugins/scanners/scancode/src/main/kotlin/ScanCodeConfig.kt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ import org.ossreviewtoolkit.utils.common.splitOnWhitespace

data class ScanCodeConfig(
val commandLine: List<String>,
val commandLineNonConfig: List<String>
val commandLineNonConfig: List<String>,
val preferFileLicense: Boolean
) {
companion object {
/**
Expand Down Expand Up @@ -60,7 +61,8 @@ data class ScanCodeConfig(
ScanCodeConfig(
options["commandLine"]?.splitOnWhitespace() ?: DEFAULT_COMMAND_LINE_OPTIONS,
options["commandLineNonConfig"]?.splitOnWhitespace()
?: DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS
?: DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS,
options["preferFileLicense"].toBoolean()
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ private data class LicenseMatch(
val score: Float
)

fun ScanCodeResult.toScanSummary(): ScanSummary {
fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummary {
val licenseFindings = mutableSetOf<LicenseFinding>()
val copyrightFindings = mutableSetOf<CopyrightFinding>()
val issues = mutableListOf<Issue>()
Expand Down Expand Up @@ -91,19 +91,31 @@ fun ScanCodeResult.toScanSummary(): ScanSummary {
it.value.first()
}

licenses.mapTo(licenseFindings) { license ->
// ScanCode uses its own license keys as identifiers in license expressions.
val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)

LicenseFinding(
license = spdxLicenseExpression,
if (preferFileLicense && file is FileEntry.Version3 && file.detectedLicenseExpressionSpdx != null) {
licenseFindings += LicenseFinding(
license = file.detectedLicenseExpressionSpdx,
location = TextLocation(
path = file.path,
startLine = license.startLine,
endLine = license.endLine
startLine = licenses.minOf { it.startLine },
endLine = licenses.maxOf { it.endLine }
),
score = license.score
score = licenses.map { it.score }.average().toFloat()
)
} else {
licenses.mapTo(licenseFindings) { license ->
// ScanCode uses its own license keys as identifiers in license expressions.
val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)

LicenseFinding(
license = spdxLicenseExpression,
location = TextLocation(
path = file.path,
startLine = license.startLine,
endLine = license.endLine
),
score = license.score
)
}
}

file.copyrights.mapTo(copyrightFindings) { copyright ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import io.kotest.core.spec.style.FreeSpec
import io.kotest.matchers.Matcher
import io.kotest.matchers.collections.beEmpty
import io.kotest.matchers.collections.containExactlyInAnyOrder
import io.kotest.matchers.collections.shouldContainExactlyInAnyOrder
import io.kotest.matchers.collections.shouldHaveSingleElement
import io.kotest.matchers.collections.shouldHaveSize
import io.kotest.matchers.should
Expand Down Expand Up @@ -86,6 +87,20 @@ class ScanCodeResultParserTest : FreeSpec({
?.license.toString() shouldBe "GPL-2.0-only WITH GCC-exception-2.0"
}
}

"get file-level findings with the 'preferFileLicense' option" {
val resultFile = getAssetFile("scancode-32.0.8_spdx-expression-parse_no-license-references.json")

val summary = parseResult(resultFile).toScanSummary(preferFileLicense = true)

summary.licenseFindings.map { it.license.toString() }.shouldContainExactlyInAnyOrder(
"LicenseRef-scancode-generic-cla AND MIT",
"MIT",
"MIT",
"GPL-2.0-only WITH GCC-exception-2.0 AND JSON AND BSD-2-Clause AND CC-BY-3.0 AND MIT",
"GPL-2.0-only WITH GCC-exception-2.0 AND BSD-3-Clause"
)
}
}

for (version in 1..MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION) {
Expand Down

0 comments on commit bc02de5

Please sign in to comment.