Skip to content

Commit

Permalink
fix: parsing modulemaps with comments (#1105)
Browse files Browse the repository at this point in the history
Fixes the tokenizer and modulemap parser to support C99 formatted
comments. This is probably not amazing but we'll be able to move off
this soon: bazelbuild/rules_swift#1212
  • Loading branch information
luispadron authored Jun 12, 2024
1 parent b89c552 commit 701d755
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 1 deletion.
2 changes: 1 addition & 1 deletion swiftpkg/internal/modulemap_parser/parser.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _parse(text):
collect_result = None
err = None

if token.type == tts.newline:
if token.type == tts.newline or token.type == tts.comment:
pass

elif token.type == tts.reserved:
Expand Down
43 changes: 43 additions & 0 deletions swiftpkg/internal/modulemap_parser/tokenizer.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,41 @@ def _error(char, msg):
msg = msg,
)

def _collect_single_line_comment(chars):
collected_chars = []

for char in chars:
if char == "\n":
break
collected_chars.append(char)

return _collection_result(
chars = collected_chars,
)

def _collect_multi_line_comment(chars):
chars = chars[2:]
collected_chars = ["/", "*"]
terminated = False

for idx, char in enumerate(chars):
collected_chars.append(char)

if char == "*" and idx + 1 < len(chars) and chars[idx + 1] == "/":
collected_chars.append("/")
terminated = True
break

if not terminated:
return _collection_result(
chars = collected_chars,
errors = [_error("".join(collected_chars), "Unclosed multi-line comment")],
)
else:
return _collection_result(
chars = collected_chars,
)

def _collect_chars_in_set(chars, target_set):
collected_chars = []
for char in chars:
Expand Down Expand Up @@ -114,6 +149,14 @@ def _tokenize(text):
elif sets.contains(character_sets.whitespaces, char):
pass

elif char == "/":
if idx + 1 < chars_len and chars[idx + 1] == "*":
collect_result = _collect_multi_line_comment(chars[idx:])
else:
collect_result = _collect_single_line_comment(chars[idx:])

collected_tokens.append(tokens.comment(collect_result.value))

elif sets.contains(character_sets.newlines, char):
collect_result = _collect_chars_in_set(chars[idx:], character_sets.newlines)
collected_tokens.append(tokens.newline())
Expand Down
22 changes: 22 additions & 0 deletions swiftpkg/tests/modulemap_parser/tokenizer_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,28 @@ def _tokenize_test(ctx):
result = tokenizer.tokenize(text)
asserts.equals(env, expected, result, "consume string literals")

text = "// single line comment 1\n/* single line comment 2 */"
expected = tokenizer.result(
tokens = [
tokens.comment("// single line comment 1"),
tokens.newline(),
tokens.comment("/* single line comment 2 */"),
],
)
result = tokenizer.tokenize(text)
asserts.equals(env, expected, result, "consume single line comments")

text = "/*\nmulti line comment\nline 1\n // line 2\n*/\n// single line comment"
expected = tokenizer.result(
tokens = [
tokens.comment("/*\nmulti line comment\nline 1\n // line 2\n*/"),
tokens.newline(),
tokens.comment("// single line comment"),
],
)
result = tokenizer.tokenize(text)
asserts.equals(env, expected, result, "consume multi line comments")

return unittest.end(env)

tokenize_test = unittest.make(_tokenize_test)
Expand Down

0 comments on commit 701d755

Please sign in to comment.