From 20001812c7adc402f7da8538ae8d947a8a205cae Mon Sep 17 00:00:00 2001 From: Sebastian Schuberth Date: Thu, 16 Nov 2023 17:41:10 +0100 Subject: [PATCH] fix: Remove any YAML front matter from ScanCode license files ScanCode 32.0.0 started to prepend its `*.LICENSE` files with YAML-encoded metadata, see [1]. This is a hot fix to remove this header, if present, from the license files. A better solution will be implemented later as part of a larger refactoring of license providers. Different ScanCode versions also differ in whether license files come with a final newline or not. Align on not having a final newline to make tests pass either way. [1]: https://github.com/nexB/scancode-toolkit/pull/3100 Signed-off-by: Sebastian Schuberth --- ...ocument-reporter-expected-output.spdx.json | 2 +- ...document-reporter-expected-output.spdx.yml | 2 +- utils/spdx/src/main/kotlin/Utils.kt | 18 ++++++- utils/spdx/src/test/kotlin/UtilsTest.kt | 54 ++++++++++++++++++- 4 files changed, 71 insertions(+), 5 deletions(-) diff --git a/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.json b/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.json index 9ddc9622e09ef..a7002a2047470 100644 --- a/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.json +++ b/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.json @@ -11,7 +11,7 @@ "dataLicense" : "CC0-1.0", "comment" : "some document comment", "hasExtractedLicensingInfos" : [ { - "extractedText" : "ASMUS License\n\nDisclaimer and legal rights\n---------------------------\n\nThis file contains bugs. All representations to the contrary are void.\n\nSource code in this file and the accompanying headers and included \nfiles may be distributed free of charge by anyone, as long as full \ncredit is given and any and all liabilities are assumed by the \nrecipient.\n", + "extractedText" : "ASMUS License\n\nDisclaimer and legal rights\n---------------------------\n\nThis file contains bugs. All representations to the contrary are void.\n\nSource code in this file and the accompanying headers and included \nfiles may be distributed free of charge by anyone, as long as full \ncredit is given and any and all liabilities are assumed by the \nrecipient.", "licenseId" : "LicenseRef-scancode-asmus" }, { "extractedText" : "To anyone who acknowledges that the file \"sRGB Color Space Profile.icm\" \nis provided \"AS IS\" WITH NO EXPRESS OR IMPLIED WARRANTY:\npermission to use, copy and distribute this file for any purpose is hereby \ngranted without fee, provided that the file is not changed including the HP \ncopyright notice tag, and that the name of Hewlett-Packard Company not be \nused in advertising or publicity pertaining to distribution of the software \nwithout specific, written prior permission. Hewlett-Packard Company makes \nno representations about the suitability of this software for any purpose.", diff --git a/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.yml b/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.yml index 9eefa75bb61a2..d3a2abeeec921 100644 --- a/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.yml +++ b/plugins/reporters/spdx/src/funTest/assets/spdx-document-reporter-expected-output.spdx.yml @@ -15,7 +15,7 @@ hasExtractedLicensingInfos: \nThis file contains bugs. All representations to the contrary are void.\n\nSource\ \ code in this file and the accompanying headers and included \nfiles may be distributed\ \ free of charge by anyone, as long as full \ncredit is given and any and all\ - \ liabilities are assumed by the \nrecipient.\n" + \ liabilities are assumed by the \nrecipient." licenseId: "LicenseRef-scancode-asmus" - extractedText: "To anyone who acknowledges that the file \"sRGB Color Space Profile.icm\"\ \ \nis provided \"AS IS\" WITH NO EXPRESS OR IMPLIED WARRANTY:\npermission to\ diff --git a/utils/spdx/src/main/kotlin/Utils.kt b/utils/spdx/src/main/kotlin/Utils.kt index af7cc287a8886..3c3a45b9dbc08 100644 --- a/utils/spdx/src/main/kotlin/Utils.kt +++ b/utils/spdx/src/main/kotlin/Utils.kt @@ -130,8 +130,12 @@ fun getLicenseTextReader( ): (() -> String)? { return if (id.startsWith(LICENSE_REF_PREFIX)) { getLicenseTextResource(id)?.let { { it.readText() } } - ?: addScanCodeLicenseTextsDir(licenseTextDirectories).firstNotNullOfOrNull { - getLicenseTextFile(id, it)?.let { file -> { file.readText() } } + ?: addScanCodeLicenseTextsDir(licenseTextDirectories).firstNotNullOfOrNull { dir -> + getLicenseTextFile(id, dir)?.let { file -> + { + file.readText().removeYamlFrontMatter() + } + } } } else { SpdxLicense.forId(id.removeSuffix("+"))?.let { { it.text } } @@ -159,5 +163,15 @@ private fun getLicenseTextFile(id: String, dir: File): File? = } } +internal fun String.removeYamlFrontMatter(): String { + val lines = lines() + + // Remove any YAML front matter enclosed by "---" from ScanCode license files. + val licenseLines = lines.takeUnless { it.first() == "---" } + ?: lines.drop(1).dropWhile { it != "---" }.drop(1) + + return licenseLines.dropWhile { it.isEmpty() }.joinToString("\n").trimEnd() +} + private fun addScanCodeLicenseTextsDir(licenseTextDirectories: List): List = (listOfNotNull(scanCodeLicenseTextDir) + licenseTextDirectories).distinct() diff --git a/utils/spdx/src/test/kotlin/UtilsTest.kt b/utils/spdx/src/test/kotlin/UtilsTest.kt index 522e8176c5792..351abe5491bc8 100644 --- a/utils/spdx/src/test/kotlin/UtilsTest.kt +++ b/utils/spdx/src/test/kotlin/UtilsTest.kt @@ -184,7 +184,7 @@ class UtilsTest : WordSpec() { "getLicenseText provided a custom dir" should { "return the custom license text for a license ID not known by ort but in custom dir" { val id = "LicenseRef-ort-abc" - val text = "a\nb\nc\n" + val text = "a\nb\nc" setupTempFile(id, text) @@ -197,5 +197,57 @@ class UtilsTest : WordSpec() { getLicenseText("LicenseRef-not-present", handleExceptions = true, listOf(tempDir)) should beNull() } } + + "removeYamlFrontMatter" should { + "remove a YAML front matter" { + val text = """ + --- + key: alasir + short_name: Alasir Licence + name: The Alasir Licence + category: Proprietary Free + owner: Alasir + homepage_url: http://alasir.com/licence/TAL.txt + spdx_license_key: LicenseRef-scancode-alasir + --- + + The Alasir Licence + + This is a free software. It's provided as-is and carries absolutely no + warranty or responsibility by the author and the contributors, neither in + general nor in particular. No matter if this software is able or unable to + cause any damage to your or third party's computer hardware, software, or any + other asset available, neither the author nor a separate contributor may be + found liable for any harm or its consequences resulting from either proper or + improper use of the software, even if advised of the possibility of certain + injury as such and so forth. + """.trimIndent() + + text.removeYamlFrontMatter() shouldBe """ + The Alasir Licence + + This is a free software. It's provided as-is and carries absolutely no + warranty or responsibility by the author and the contributors, neither in + general nor in particular. No matter if this software is able or unable to + cause any damage to your or third party's computer hardware, software, or any + other asset available, neither the author nor a separate contributor may be + found liable for any harm or its consequences resulting from either proper or + improper use of the software, even if advised of the possibility of certain + injury as such and so forth. + """.trimIndent() + } + + "remove trailing whitespace" { + "last sentence\n".removeYamlFrontMatter() shouldBe "last sentence" + } + + "remove leading empty lines" { + "\nfirst sentence".removeYamlFrontMatter() shouldBe "first sentence" + } + + "keep leading whitespace" { + " indented title".removeYamlFrontMatter() shouldBe " indented title" + } + } } }