From d6900603103005e975eead77dc89042d90bc7070 Mon Sep 17 00:00:00 2001 From: Sebastian Schuberth Date: Thu, 28 Mar 2024 16:05:34 +0100 Subject: [PATCH] fix(scancode): Filter out non-originary findings that are just references License findings that are references to license findings in other files are now ignored, because they already appear as findings for those other files. Fixes #8190. Signed-off-by: Sebastian Schuberth --- .../scancode/src/main/kotlin/ScanCodeResultModel.kt | 3 ++- .../src/main/kotlin/ScanCodeResultModelMapper.kt | 10 +++++++++- .../src/test/kotlin/ScanCodeResultParserTest.kt | 5 ----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModel.kt b/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModel.kt index a926a066fad6c..988c7c131112c 100644 --- a/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModel.kt +++ b/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModel.kt @@ -157,7 +157,8 @@ sealed interface LicenseEntry { override val score: Float, override val startLine: Int, override val endLine: Int, - override val licenseExpression: String + override val licenseExpression: String, + val fromFile: String? = null // This might be missing in JSON. ) : LicenseEntry } diff --git a/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt b/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt index d4206e35eb3fe..42643baa41fbb 100644 --- a/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt +++ b/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt @@ -64,6 +64,7 @@ fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummar val issues = mutableListOf() val header = headers.single() + val inputName = header.options.input.first().substringAfterLast('/') val outputFormatVersion = header.outputFormatVersion?.let { Semver(it) } if (outputFormatVersion != null && outputFormatVersion.major > MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION) { @@ -82,9 +83,16 @@ fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummar ?: files.flatMap { it.scanCodeKeyToSpdxIdMappings }.toMap() filesOfTypeFile.forEach { file -> + val licensesWithoutReferences = file.licenses.filter { + // Note that "fromFile" contains the name of the input directory, see + // https://github.com/nexB/scancode-toolkit/issues/3712. + it !is LicenseEntry.Version3 || it.fromFile == null || it.fromFile == "$inputName/${file.path}" + || it.fromFile == inputName // Input is a single file. + } + // ScanCode creates separate license entries for each license in an expression. Deduplicate these by grouping by // the same expression. - val licenses = file.licenses.groupBy { + val licenses = licensesWithoutReferences.groupBy { LicenseMatch(it.licenseExpression, it.startLine, it.endLine, it.score) }.map { // Arbitrarily take the first of the duplicate license entries. diff --git a/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt b/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt index 4ac64602d16ee..34f9f0b798128 100644 --- a/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt +++ b/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt @@ -175,11 +175,6 @@ class ScanCodeResultParserTest : FreeSpec({ location = TextLocation("COPYING", 59), score = 100.0f ), - LicenseFinding( - license = "LGPL-2.1-only", - location = TextLocation("COPYING", 1, 502), - score = 100.0f - ), LicenseFinding( license = "LGPL-2.1-only", location = TextLocation("COPYING.LGPLv2.1", 1, 502),