From eb3a46344870e3e184179a84059a040caaec4bef Mon Sep 17 00:00:00 2001 From: Jay Bosamiya Date: Tue, 20 Aug 2024 13:48:11 -0700 Subject: [PATCH 1/2] Support space between `verus !` for macro While _technically_ this is not necessary, it does make it _much_ easier to deal with `creduce`d files, which otherwise easily go and hit this particular low-hanging issue, rather than actually interesting issues. Fwiw, `rustfmt` automatically collapses any `macro !` to `macro!`, so we would not be wrong in handling this specific extra amount of whitespace during parsing. --- src/verus-minimal.pest | 4 ++-- src/verus.pest | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/verus-minimal.pest b/src/verus-minimal.pest index 2a28443..7c3bfdd 100644 --- a/src/verus-minimal.pest +++ b/src/verus-minimal.pest @@ -59,12 +59,12 @@ file = { /// Region of code that doesn't contain any Verus macro use whatsoever non_verus = @{ - (!("verus!" ~ WHITESPACE* ~ "{") ~ (string | raw_string | byte_string | raw_byte_string | ANY))* + (!("verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE* ~ "{") ~ (string | raw_string | byte_string | raw_byte_string | ANY))* } /// An actual use of the `verus! { ... }` macro verus_macro_use = ${ - "verus!" ~ WHITESPACE* ~ "{" ~ (WHITESPACE | COMMENT)* ~ verus_macro_body ~ (WHITESPACE | COMMENT)* ~ "}" ~ WHITESPACE* ~ ("//" ~ WHITESPACE* ~ "verus!" ~ WHITESPACE*)? + "verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE* ~ "{" ~ (WHITESPACE | COMMENT)* ~ verus_macro_body ~ (WHITESPACE | COMMENT)* ~ "}" ~ WHITESPACE* ~ ("//" ~ WHITESPACE* ~ "verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE*)? } /// Anything inside the `verus! { ... }` macro diff --git a/src/verus.pest b/src/verus.pest index 04223d9..04e3284 100644 --- a/src/verus.pest +++ b/src/verus.pest @@ -82,12 +82,12 @@ file = { /// happens, then we need to make sure that we aren't accidentally starting /// parsing in the middle of the macro_rules macro body itself. Similarly, strings. non_verus = @{ - (!("verus!" ~ WHITESPACE* ~ "{") ~ (macro_rules | string | raw_string | byte_string | raw_byte_string | ANY))* + (!("verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE* ~ "{") ~ (macro_rules | string | raw_string | byte_string | raw_byte_string | ANY))* } /// An actual use of the `verus! { ... }` macro verus_macro_use = ${ - "verus!" ~ WHITESPACE* ~ "{" ~ (WHITESPACE | COMMENT)* ~ verus_macro_body ~ (WHITESPACE | COMMENT)* ~ "}" ~ WHITESPACE* ~ ("//" ~ WHITESPACE* ~ "verus!")? + "verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE* ~ "{" ~ (WHITESPACE | COMMENT)* ~ verus_macro_body ~ (WHITESPACE | COMMENT)* ~ "}" ~ WHITESPACE* ~ ("//" ~ WHITESPACE* ~ "verus" ~ WHITESPACE* ~ "!")? } /// Anything inside the `verus! { ... }` macro From 5e101e3b802bbcde90a086941b7923701b4dff4c Mon Sep 17 00:00:00 2001 From: Jay Bosamiya Date: Tue, 20 Aug 2024 14:01:23 -0700 Subject: [PATCH 2/2] Min-parse strings/chars/... with parens/... inside Otherwise, the following code would parse just fine on `--verus-only` formatting, but would fail when attempting to do the rustfmt part: ``` let x = '(' == '('; ``` --- src/verus-minimal.pest | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/verus-minimal.pest b/src/verus-minimal.pest index 7c3bfdd..1f77655 100644 --- a/src/verus-minimal.pest +++ b/src/verus-minimal.pest @@ -45,6 +45,10 @@ raw_byte_string = @{ "b" ~ raw_string } +char = @{ + "'" ~ ("\\'" | !"'" ~ ANY) ~ "'" +} + multiline_comment = @{ "/*" ~ (multiline_comment | (!"*/" ~ ANY))* ~ "*/" } @@ -59,7 +63,7 @@ file = { /// Region of code that doesn't contain any Verus macro use whatsoever non_verus = @{ - (!("verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE* ~ "{") ~ (string | raw_string | byte_string | raw_byte_string | ANY))* + (!("verus" ~ WHITESPACE* ~ "!" ~ WHITESPACE* ~ "{") ~ (string | raw_string | byte_string | raw_byte_string | char | ANY))* } /// An actual use of the `verus! { ... }` macro @@ -90,5 +94,5 @@ paren_tree = { /// _Technically_ not a Rust token, but we're trying to do a minimal parser _anyways_ token = { - !("{" | "}" | "[" | "]" | "(" | ")") ~ ANY + !("{" | "}" | "[" | "]" | "(" | ")") ~ (string | raw_string | byte_string | raw_byte_string | char | ANY) }