From 654b14265412241bb90e3445229cbf5834990784 Mon Sep 17 00:00:00 2001 From: Martin Nonnenmacher Date: Wed, 31 Jan 2024 00:59:55 +0100 Subject: [PATCH] feat(spdx-utils): Add a new SPDX expression parser implementation Add a handwritten parser for SPDX expressions which will replace the parser generated by ANTLR in a later commit. Having a parser that is implemented in pure Kotlin will make it possible to make `spdx-utils` a multiplatform module. Performance tests with various SPDX expressions have shown that the handwritten parser is on average seven times faster than the one generated by ANTLR. Signed-off-by: Martin Nonnenmacher --- .../spdx/src/main/kotlin/parser/Exceptions.kt | 52 +++ .../main/kotlin/parser/SpdxExpressionLexer.kt | 103 ++++++ .../kotlin/parser/SpdxExpressionParser.kt | 196 ++++++++++++ utils/spdx/src/main/kotlin/parser/Token.kt | 36 +++ .../kotlin/parser/SpdxExpressionLexerTest.kt | 255 +++++++++++++++ .../kotlin/parser/SpdxExpressionParserTest.kt | 297 ++++++++++++++++++ 6 files changed, 939 insertions(+) create mode 100644 utils/spdx/src/main/kotlin/parser/Exceptions.kt create mode 100644 utils/spdx/src/main/kotlin/parser/SpdxExpressionLexer.kt create mode 100644 utils/spdx/src/main/kotlin/parser/SpdxExpressionParser.kt create mode 100644 utils/spdx/src/main/kotlin/parser/Token.kt create mode 100644 utils/spdx/src/test/kotlin/parser/SpdxExpressionLexerTest.kt create mode 100644 utils/spdx/src/test/kotlin/parser/SpdxExpressionParserTest.kt diff --git a/utils/spdx/src/main/kotlin/parser/Exceptions.kt b/utils/spdx/src/main/kotlin/parser/Exceptions.kt new file mode 100644 index 0000000000000..b5e4258c030ef --- /dev/null +++ b/utils/spdx/src/main/kotlin/parser/Exceptions.kt @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.utils.spdx.parser + +import kotlin.reflect.KClass + +import org.ossreviewtoolkit.utils.spdx.SpdxException + +/** + * An exception to indicate that an [SpdxExpressionLexer] error occurred. + */ +class SpdxExpressionLexerException(val char: Char, val position: Int) : + SpdxException("Unexpected character '$char' at position $position.") + +/** + * An exception to indicate that an [SpdxExpressionParser] error occurred. [token] is the unexpected token that caused + * the exception, if it is `null` that means the end of the input was reached unexpectedly. [expectedTokenTypes] are the + * expected token types, if available. + */ +class SpdxExpressionParserException( + val token: Token?, + vararg val expectedTokenTypes: KClass = emptyArray() +) : SpdxException( + buildString { + append("Unexpected token '$token'") + + if (expectedTokenTypes.size == 1) { + append(", expected ${expectedTokenTypes.first().simpleName}") + } else if (expectedTokenTypes.size > 1) { + append(", expected one of ${expectedTokenTypes.joinToString { it.simpleName.orEmpty() }}") + } + + append(".") + } +) diff --git a/utils/spdx/src/main/kotlin/parser/SpdxExpressionLexer.kt b/utils/spdx/src/main/kotlin/parser/SpdxExpressionLexer.kt new file mode 100644 index 0000000000000..cb22937f9cdc0 --- /dev/null +++ b/utils/spdx/src/main/kotlin/parser/SpdxExpressionLexer.kt @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.utils.spdx.parser + +import org.ossreviewtoolkit.utils.common.nextOrNull + +/** + * A lexer for SPDX expressions. It consumes a sequence of characters and produces a sequence of [Token]s. For details + * on the grammar see [SpdxExpressionParser]. + */ +class SpdxExpressionLexer(input: Sequence) { + constructor(input: String) : this(input.asSequence()) + + companion object { + /** The uppercase characters allowed by the SPDX specification. */ + private val UPPERCASE = 'A'..'Z' + /** The lowercase characters allowed by the SPDX specification. */ + private val LOWERCASE = 'a'..'z' + /** The digits allowed by the SPDX specification. */ + private val DIGITS = '0'..'9' + + /** Return true if the character is an uppercase character allowed by the SPDX specification. */ + private fun Char.isUpper() = this in UPPERCASE + /** Return true if the character is a lowercase character allowed by the SPDX specification. */ + private fun Char.isLower() = this in LOWERCASE + /** Return true if the character is a digit allowed by the SPDX specification. */ + private fun Char.isDigit() = this in DIGITS + /** Return true if the character is a valid identifier character allowed by the SPDX specification. */ + private fun Char.isIdentifier() = isUpper() || isLower() || isDigit() || this == '.' || this == '-' + } + + private val iterator = input.iterator() + private var next = iterator.nextOrNull() + private var position = 0 + + fun tokens(): Sequence = generateSequence { nextToken() } + + private fun nextToken(): Token? { + var cur = consumeChar() + + while (cur != null) { + if (cur == ' ') { + cur = consumeChar() + continue + } + + if (cur == '(') return Token.OPEN(position) + if (cur == ')') return Token.CLOSE(position) + if (cur == '+') return Token.PLUS(position) + if (cur == ':') return Token.COLON(position) + + if (cur.isIdentifier()) { + val start = position + val value = buildString { + append(cur) + + while (next?.isIdentifier() == true) { + cur = consumeChar() + append(cur) + } + } + + return when (value.uppercase()) { + "AND" -> Token.AND(start) + "OR" -> Token.OR(start) + "WITH" -> Token.WITH(start) + else -> when { + value.startsWith("DocumentRef-") -> Token.DOCUMENTREF(start, position, value) + value.startsWith("LicenseRef-") -> Token.LICENSEREF(start, position, value) + else -> Token.IDENTIFIER(start, position, value) + } + } + } + + cur?.let { throw SpdxExpressionLexerException(it, position) } + } + + return null + } + + private fun consumeChar(): Char? = + next?.also { + position++ + next = iterator.nextOrNull() + } +} diff --git a/utils/spdx/src/main/kotlin/parser/SpdxExpressionParser.kt b/utils/spdx/src/main/kotlin/parser/SpdxExpressionParser.kt new file mode 100644 index 0000000000000..bab5d8a5c3d4b --- /dev/null +++ b/utils/spdx/src/main/kotlin/parser/SpdxExpressionParser.kt @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.utils.spdx.parser + +import org.ossreviewtoolkit.utils.common.nextOrNull +import org.ossreviewtoolkit.utils.spdx.SpdxCompoundExpression +import org.ossreviewtoolkit.utils.spdx.SpdxExpression +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseReferenceExpression +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseWithExceptionExpression +import org.ossreviewtoolkit.utils.spdx.SpdxOperator + +/** + * A parser for SPDX expressions. It consumes a sequence of [Token]s and produces an [SpdxExpression]. + * + * This parser implements the grammar defined in the + * [SPDX specification](https://spdx.github.io/spdx-spec/v2.2.2/SPDX-license-expressions/): + * + * ``` + * license-expression -> simple-expression | compound-expression + * compound-expression -> simple-expression | + * simple-expression "WITH" license-exception-id | + * compound-expression "AND" compound-expression | + * compound-expression "OR" compound-expression | + * "(" compound-expression ")" ) + * simple-expression -> license-id | license-id"+" | license-ref + * license-ref -> ["DocumentRef-" idstring ":"] "LicenseRef-" idstring + * license-exception-id -> + * license-id -> + * idstring -> 1*(ALPHA / DIGIT / "-" / "." ) + * ``` + * + * For more efficient parsing, this is transformed into the following form which implements the operator precedence as + * part of the grammar: + * + * ``` + * license-expression -> or-expression + * or-expression -> and-expression ( "OR" and-expression ) * + * and-expression -> primary ( "AND" primary ) * + * primary -> "(" license-expression ")" | simple-expression + * simple-expression -> ( IDENTIFIER [ "+" ] | [ "DOCUMENTREF" ":" ] LICENSEREF ) [ "WITH" IDENTIFIER ] + * ``` + * + * This allows implementing a + * [recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser) with + * [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing). The implementation is + * loosely based on this + * [example](https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) but with many + * simplifications as the SPDX grammar has only one operator per level of precedence and the parser does not need to be + * extensible. + * + * Also, the rules for `license-id` and `license-exception-id` are changed to allow any valid `idstring` as the + * [strictness] decides if only the SPDX identifiers are allowed for license and exception ids and therefore these rules + * cannot be part of the grammar. + * + * For backward compatibility with the previously used SPDX expression parser, operators are case-insensitive. This is + * also planned for future SPDX versions, see https://github.com/spdx/spdx-spec/pull/876. + */ +class SpdxExpressionParser( + tokens: Sequence, + private val strictness: SpdxExpression.Strictness = SpdxExpression.Strictness.ALLOW_ANY +) { + constructor( + input: String, + strictness: SpdxExpression.Strictness = SpdxExpression.Strictness.ALLOW_ANY + ) : this(SpdxExpressionLexer(input).tokens(), strictness) + + private val iterator = tokens.iterator() + private var next = iterator.nextOrNull() + + fun parse(): SpdxExpression { + val result = parseOrExpression() + if (next != null) throw SpdxExpressionParserException(next) + return result + } + + /** + * Parse an OR expression of the form `or-expression -> and-expression ( "OR" and-expression ) *`. + */ + private fun parseOrExpression(): SpdxExpression { + var left = parseAndExpression() + while (next is Token.OR) { + consume() + val right = parseAndExpression() + left = SpdxCompoundExpression(left, SpdxOperator.OR, right) + } + return left + } + + /** + * Parse an AND expression of the form `and-expression -> primary ( "AND" primary ) *`. + */ + private fun parseAndExpression(): SpdxExpression { + var left = parsePrimary() + while (next is Token.AND) { + consume() + val right = parsePrimary() + left = SpdxCompoundExpression(left, SpdxOperator.AND, right) + } + return left + } + + /** + * Parse a primary of the form `primary -> "(" license-expression ")" | simple-expression`. + */ + private fun parsePrimary(): SpdxExpression { + if (next is Token.OPEN) { + consume() + val expression = parseOrExpression() + consume() + return expression + } + + return parseSimpleExpression() + } + + /** + * Parse a simple expression of the form + * `simple-expression -> ( IDENTIFIER [ "+" ] | [ "DOCUMENTREF" ":" ] LICENSEREF ) [ "WITH" IDENTIFIER ]`. + */ + private fun parseSimpleExpression(): SpdxExpression { + val left = when (next) { + is Token.IDENTIFIER -> { + val identifier = consume() + + val orLaterVersion = next is Token.PLUS || identifier.value.endsWith("-or-later") + if (next is Token.PLUS) consume() + + SpdxLicenseIdExpression(identifier.value, orLaterVersion).apply { validate(strictness) } + } + + is Token.DOCUMENTREF -> { + val documentRef = consume() + consume() + val licenseRef = consume() + + SpdxLicenseReferenceExpression("${documentRef.value}:${licenseRef.value}") + .apply { validate(strictness) } + } + + is Token.LICENSEREF -> { + val licenseRef = consume() + + SpdxLicenseReferenceExpression(licenseRef.value).apply { validate(strictness) } + } + + else -> throw SpdxExpressionParserException(next) + } + + if (next is Token.WITH) { + consume() + val exception = when (next) { + is Token.IDENTIFIER -> consume().value + is Token.LICENSEREF -> consume().value + is Token.DOCUMENTREF -> "${consume().value}:${consume().value}" + else -> throw SpdxExpressionParserException( + next, + Token.IDENTIFIER::class, + Token.LICENSEREF::class, + Token.DOCUMENTREF::class + ) + } + return SpdxLicenseWithExceptionExpression(left, exception).apply { validate(strictness) } + } + + return left + } + + /** + * Consume the [next] token and return it if it is of the expected type [T], otherwise throw an + * [SpdxExpressionParserException]. + */ + private inline fun consume(): T { + val token = next + if (token !is T) throw SpdxExpressionParserException(token, T::class) + next = iterator.nextOrNull() + return token + } +} diff --git a/utils/spdx/src/main/kotlin/parser/Token.kt b/utils/spdx/src/main/kotlin/parser/Token.kt new file mode 100644 index 0000000000000..1b7fec3703d01 --- /dev/null +++ b/utils/spdx/src/main/kotlin/parser/Token.kt @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.utils.spdx.parser + +/** + * The tokens created by the [SpdxExpressionLexer] and consumed by the [SpdxExpressionParser]. + */ +sealed class Token { + data class OPEN(val position: Int) : Token() + data class CLOSE(val position: Int) : Token() + data class PLUS(val position: Int) : Token() + data class COLON(val position: Int) : Token() + data class AND(val position: Int) : Token() + data class OR(val position: Int) : Token() + data class WITH(val position: Int) : Token() + data class IDENTIFIER(val start: Int, val end: Int, val value: String) : Token() + data class LICENSEREF(val start: Int, val end: Int, val value: String) : Token() + data class DOCUMENTREF(val start: Int, val end: Int, val value: String) : Token() +} diff --git a/utils/spdx/src/test/kotlin/parser/SpdxExpressionLexerTest.kt b/utils/spdx/src/test/kotlin/parser/SpdxExpressionLexerTest.kt new file mode 100644 index 0000000000000..9f8d6ca09299a --- /dev/null +++ b/utils/spdx/src/test/kotlin/parser/SpdxExpressionLexerTest.kt @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.utils.spdx.parser + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.core.spec.style.FunSpec +import io.kotest.core.spec.style.scopes.FunSpecContainerScope +import io.kotest.datatest.withData +import io.kotest.matchers.sequences.shouldContainExactly +import io.kotest.matchers.shouldBe + +class SpdxExpressionLexerTest : FunSpec({ + context("identifiers") { + verifyTokens( + "a" to sequenceOf( + Token.IDENTIFIER(1, 1, "a") + ), + "a+" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.PLUS(2) + ), + "Apache-2.0" to sequenceOf( + Token.IDENTIFIER(1, 10, "Apache-2.0") + ), + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-" to sequenceOf( + Token.IDENTIFIER(1, 64, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-") + ) + ) + } + + context("license references") { + verifyTokens( + "LicenseRef-a" to sequenceOf( + Token.LICENSEREF(1, 12, "LicenseRef-a") + ), + "LicenseRef-ort-license" to sequenceOf( + Token.LICENSEREF(1, 22, "LicenseRef-ort-license") + ), + "DocumentRef-a:LicenseRef-b" to sequenceOf( + Token.DOCUMENTREF(1, 13, "DocumentRef-a"), + Token.COLON(14), + Token.LICENSEREF(15, 26, "LicenseRef-b") + ) + ) + } + + context("WITH expressions") { + verifyTokens( + "a WITH b" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.WITH(3), + Token.IDENTIFIER(8, 8, "b") + ), + "GPL-3.0-or-later WITH GPL-3.0-linking-exception" to sequenceOf( + Token.IDENTIFIER(1, 16, "GPL-3.0-or-later"), + Token.WITH(18), + Token.IDENTIFIER(23, 47, "GPL-3.0-linking-exception") + ) + ) + } + + context("AND expressions") { + verifyTokens( + "AND" to sequenceOf( + Token.AND(1) + ), + "AND AND" to sequenceOf( + Token.AND(1), + Token.AND(5) + ), + "a AND b" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.AND(3), + Token.IDENTIFIER(7, 7, "b") + ), + "a AND b AND c" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.AND(3), + Token.IDENTIFIER(7, 7, "b"), + Token.AND(9), + Token.IDENTIFIER(13, 13, "c") + ), + "a AND b AND c AND d" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.AND(3), + Token.IDENTIFIER(7, 7, "b"), + Token.AND(9), + Token.IDENTIFIER(13, 13, "c"), + Token.AND(15), + Token.IDENTIFIER(19, 19, "d") + ) + ) + } + + context("OR expressions") { + verifyTokens( + "OR" to sequenceOf( + Token.OR(1) + ), + "OR OR" to sequenceOf( + Token.OR(1), + Token.OR(4) + ), + "a OR b" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.OR(3), + Token.IDENTIFIER(6, 6, "b") + ), + "a OR b OR c" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.OR(3), + Token.IDENTIFIER(6, 6, "b"), + Token.OR(8), + Token.IDENTIFIER(11, 11, "c") + ), + "a OR b OR c OR d" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.OR(3), + Token.IDENTIFIER(6, 6, "b"), + Token.OR(8), + Token.IDENTIFIER(11, 11, "c"), + Token.OR(13), + Token.IDENTIFIER(16, 16, "d") + ) + ) + } + + context("lowercase operators") { + verifyTokens( + "and" to sequenceOf(Token.AND(1)), + "or" to sequenceOf(Token.OR(1)), + "with" to sequenceOf(Token.WITH(1)) + ) + } + + context("mixed-case operators") { + verifyTokens( + "And" to sequenceOf(Token.AND(1)), + "aND" to sequenceOf(Token.AND(1)), + "Or" to sequenceOf(Token.OR(1)), + "oR" to sequenceOf(Token.OR(1)), + "With" to sequenceOf(Token.WITH(1)), + "wITH" to sequenceOf(Token.WITH(1)) + ) + } + + context("compound expressions") { + verifyTokens( + "()" to sequenceOf( + Token.OPEN(1), + Token.CLOSE(2) + ), + "(a)" to sequenceOf( + Token.OPEN(1), + Token.IDENTIFIER(2, 2, "a"), + Token.CLOSE(3) + ), + "(a AND b)" to sequenceOf( + Token.OPEN(1), + Token.IDENTIFIER(2, 2, "a"), + Token.AND(4), + Token.IDENTIFIER(8, 8, "b"), + Token.CLOSE(9) + ), + "a AND (b OR c)" to sequenceOf( + Token.IDENTIFIER(1, 1, "a"), + Token.AND(3), + Token.OPEN(7), + Token.IDENTIFIER(8, 8, "b"), + Token.OR(10), + Token.IDENTIFIER(13, 13, "c"), + Token.CLOSE(14) + ), + "(a AND b) OR c" to sequenceOf( + Token.OPEN(1), + Token.IDENTIFIER(2, 2, "a"), + Token.AND(4), + Token.IDENTIFIER(8, 8, "b"), + Token.CLOSE(9), + Token.OR(11), + Token.IDENTIFIER(14, 14, "c") + ), + "(a OR b) AND (c OR d)" to sequenceOf( + Token.OPEN(1), + Token.IDENTIFIER(2, 2, "a"), + Token.OR(4), + Token.IDENTIFIER(7, 7, "b"), + Token.CLOSE(8), + Token.AND(10), + Token.OPEN(14), + Token.IDENTIFIER(15, 15, "c"), + Token.OR(17), + Token.IDENTIFIER(20, 20, "d"), + Token.CLOSE(21) + ) + ) + } + + context("invalid expressions") { + verifyExceptions( + "_" to SpdxExpressionLexerException('_', 1), + "a_" to SpdxExpressionLexerException('_', 2), + "a AND {b OR c}" to SpdxExpressionLexerException('{', 7), + "a\nb" to SpdxExpressionLexerException('\n', 2), + "a\tb" to SpdxExpressionLexerException('\t', 2), + "LicenseRef-ort-lißense" to SpdxExpressionLexerException('ß', 18) + ) + } +}) + +/** + * Verify that the [SpdxExpressionLexer] produces the expected tokens for the given input. + */ +private suspend fun FunSpecContainerScope.verifyTokens(vararg input: Pair>) { + withData( + nameFn = { it.first }, + input.asSequence() + ) { (expression, expectedTokens) -> + SpdxExpressionLexer(expression).tokens() shouldContainExactly expectedTokens + } +} + +/** + * Verify that the [SpdxExpressionLexer] produces the expected [SpdxExpressionLexerException] for the given input. + */ +private suspend fun FunSpecContainerScope.verifyExceptions(vararg input: Pair) { + withData( + nameFn = { it.first }, + input.asSequence() + ) { (expression, expectedException) -> + shouldThrow { + SpdxExpressionLexer(expression).tokens().toList() + }.apply { + char shouldBe expectedException.char + position shouldBe expectedException.position + } + } +} diff --git a/utils/spdx/src/test/kotlin/parser/SpdxExpressionParserTest.kt b/utils/spdx/src/test/kotlin/parser/SpdxExpressionParserTest.kt new file mode 100644 index 0000000000000..1e64f7b024110 --- /dev/null +++ b/utils/spdx/src/test/kotlin/parser/SpdxExpressionParserTest.kt @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2024 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.utils.spdx.parser + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.core.spec.style.FunSpec +import io.kotest.core.spec.style.scopes.FunSpecContainerScope +import io.kotest.datatest.withData +import io.kotest.matchers.shouldBe + +import org.ossreviewtoolkit.utils.spdx.SpdxCompoundExpression +import org.ossreviewtoolkit.utils.spdx.SpdxExpression +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseReferenceExpression +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseWithExceptionExpression +import org.ossreviewtoolkit.utils.spdx.SpdxOperator + +class SpdxExpressionParserTest : FunSpec({ + context("identifiers") { + verifyExceptions( + "a" to SpdxLicenseIdExpression("a"), + "a+" to SpdxLicenseIdExpression("a", true), + "Apache-2.0" to SpdxLicenseIdExpression("Apache-2.0"), + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-" to SpdxLicenseIdExpression( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-" + ) + ) + } + + context("license references") { + verifyExceptions( + "LicenseRef-a" to SpdxLicenseReferenceExpression("LicenseRef-a"), + "LicenseRef-ort-license" to SpdxLicenseReferenceExpression("LicenseRef-ort-license"), + "DocumentRef-a:LicenseRef-b" to SpdxLicenseReferenceExpression("DocumentRef-a:LicenseRef-b") + ) + } + + context("WITH expressions") { + verifyExceptions( + "a WITH b" to SpdxLicenseWithExceptionExpression( + SpdxLicenseIdExpression("a"), + "b" + ), + "GPL-3.0-or-later WITH GPL-3.0-linking-exception" to SpdxLicenseWithExceptionExpression( + SpdxLicenseIdExpression("GPL-3.0-or-later", orLaterVersion = true), + "GPL-3.0-linking-exception" + ) + ) + } + + context("AND expressions") { + verifyExceptions( + "a AND b" to SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + "a AND b AND c" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.AND, + SpdxLicenseIdExpression("c") + ), + "a AND b AND c AND d" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.AND, + SpdxLicenseIdExpression("c") + ), + SpdxOperator.AND, + SpdxLicenseIdExpression("d") + ) + ) + } + + context("OR expressions") { + verifyExceptions( + "a OR b" to SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxLicenseIdExpression("b") + ), + "a OR b OR c" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("c") + ), + "a OR b OR c OR d" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("c") + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("d") + ) + ) + } + + context("compound expressions") { + verifyExceptions( + "(a)" to SpdxLicenseIdExpression("a"), + "(a AND b)" to SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + "a AND (b OR c)" to SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxCompoundExpression( + SpdxLicenseIdExpression("b"), + SpdxOperator.OR, + SpdxLicenseIdExpression("c") + ) + ), + "(a AND b) OR c" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("c") + ), + "(a OR b) AND (c OR d)" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.AND, + SpdxCompoundExpression( + SpdxLicenseIdExpression("c"), + SpdxOperator.OR, + SpdxLicenseIdExpression("d") + ) + ) + ) + } + + context("operator precedence") { + verifyExceptions( + "a AND b OR c" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("c") + ), + "a OR b AND c" to SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxCompoundExpression( + SpdxLicenseIdExpression("b"), + SpdxOperator.AND, + SpdxLicenseIdExpression("c") + ) + ), + "a OR b AND c OR d" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxCompoundExpression( + SpdxLicenseIdExpression("b"), + SpdxOperator.AND, + SpdxLicenseIdExpression("c") + ) + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("d") + ), + "a AND b OR c AND d" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.AND, + SpdxLicenseIdExpression("b") + ), + SpdxOperator.OR, + SpdxCompoundExpression( + SpdxLicenseIdExpression("c"), + SpdxOperator.AND, + SpdxLicenseIdExpression("d") + ) + ), + "a WITH b AND c OR d" to SpdxCompoundExpression( + SpdxCompoundExpression( + SpdxLicenseWithExceptionExpression( + SpdxLicenseIdExpression("a"), + "b" + ), + SpdxOperator.AND, + SpdxLicenseIdExpression("c") + ), + SpdxOperator.OR, + SpdxLicenseIdExpression("d") + ), + "a OR b AND c WITH d" to SpdxCompoundExpression( + SpdxLicenseIdExpression("a"), + SpdxOperator.OR, + SpdxCompoundExpression( + SpdxLicenseIdExpression("b"), + SpdxOperator.AND, + SpdxLicenseWithExceptionExpression( + SpdxLicenseIdExpression("c"), + "d" + ) + ) + ) + ) + } + + context("invalid expressions") { + verifyErrors( + "a a" to SpdxExpressionParserException(Token.IDENTIFIER(3, 3, "a")), + "a AND" to SpdxExpressionParserException(null), + "AND a" to SpdxExpressionParserException(Token.AND(1)), + "a OR" to SpdxExpressionParserException(null), + "OR a" to SpdxExpressionParserException(Token.OR(1)), + "a ( b" to SpdxExpressionParserException(Token.OPEN(3)), + "a ) b" to SpdxExpressionParserException(Token.CLOSE(3)), + "a WITH b+" to SpdxExpressionParserException(Token.PLUS(9)), + "a WITH WITH b" to SpdxExpressionParserException( + Token.WITH(8), + Token.IDENTIFIER::class, + Token.LICENSEREF::class, + Token.DOCUMENTREF::class + ), + "LicenseRef-a+ WITH b" to SpdxExpressionParserException(Token.PLUS(13)), + "a:b" to SpdxExpressionParserException(Token.COLON(2)), + "((a AND b) OR c" to SpdxExpressionParserException(null, Token.CLOSE::class) + ) + } +}) + +/** + * Verify that the [SpdxExpressionParser] produces the expected [SpdxExpression] for the given input. + */ +private suspend fun FunSpecContainerScope.verifyExceptions(vararg input: Pair) { + withData( + nameFn = { it.first }, + input.asList() + ) { (expression, expectedExpression) -> + SpdxExpressionParser(expression, SpdxExpression.Strictness.ALLOW_ANY).parse() shouldBe expectedExpression + } +} + +/** + * Verify that the [SpdxExpressionParser] produces the expected [SpdxExpressionParserException] for the given input. + */ +private suspend fun FunSpecContainerScope.verifyErrors(vararg input: Pair) { + withData( + nameFn = { it.first }, + input.asList() + ) { (expression, expectedException) -> + shouldThrow { + SpdxExpressionParser(expression, SpdxExpression.Strictness.ALLOW_ANY).parse() + }.apply { + token shouldBe expectedException.token + expectedTokenTypes shouldBe expectedException.expectedTokenTypes + } + } +}