diff --git a/lib/src/fileset.pest b/lib/src/fileset.pest index 83e17780cc..c06a2ce73f 100644 --- a/lib/src/fileset.pest +++ b/lib/src/fileset.pest @@ -14,10 +14,17 @@ whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" } -// TODO: adjust identifier rule for file names -identifier_part = @{ (ASCII_ALPHANUMERIC | "_" | "/")+ } +// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax +// +, -, ., @, _: commonly used in file name including "." and ".." +// /: path separator +// \: path separator (Windows) +// TODO: accept glob characters as identifier? identifier = @{ - identifier_part ~ (("." | "-" | "+") ~ identifier_part)* + (XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "/" | "\\")+ +} +strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ } +strict_identifier = @{ + strict_identifier_part ~ ("-" ~ strict_identifier_part)* } string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } @@ -42,7 +49,7 @@ function_arguments = { } // TODO: change rhs to string_literal to require quoting? #2101 -string_pattern = { identifier ~ pattern_kind_op ~ (identifier | string_literal) } +string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) } primary = { "(" ~ whitespace* ~ expression ~ whitespace* ~ ")" diff --git a/lib/src/fileset.rs b/lib/src/fileset.rs index 1cbcab2f68..31368ae28d 100644 --- a/lib/src/fileset.rs +++ b/lib/src/fileset.rs @@ -396,8 +396,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - // TODO: adjust identifier rule and test the expression parser instead - let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern); + let parse = |text| parse(text, &ctx); // cwd-relative patterns assert_eq!( diff --git a/lib/src/fileset_parser.rs b/lib/src/fileset_parser.rs index 10a072cf05..ec41f4db80 100644 --- a/lib/src/fileset_parser.rs +++ b/lib/src/fileset_parser.rs @@ -40,8 +40,9 @@ impl Rule { match self { Rule::EOI => None, Rule::whitespace => None, - Rule::identifier_part => None, Rule::identifier => None, + Rule::strict_identifier_part => None, + Rule::strict_identifier => None, Rule::string_escape => None, Rule::string_content_char => None, Rule::string_content => None, @@ -237,7 +238,7 @@ fn parse_primary_node(pair: Pair) -> FilesetParseResult { } Rule::string_pattern => { let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap(); - assert_eq!(lhs.as_rule(), Rule::identifier); + assert_eq!(lhs.as_rule(), Rule::strict_identifier); assert_eq!(op.as_rule(), Rule::pattern_kind_op); let kind = lhs.as_str(); let value = match rhs.as_rule() { @@ -399,6 +400,18 @@ mod tests { parse_into_kind("dir/foo-bar_0.baz"), Ok(ExpressionKind::Identifier("dir/foo-bar_0.baz")) ); + assert_eq!( + parse_into_kind("cli-reference@.md.snap"), + Ok(ExpressionKind::Identifier("cli-reference@.md.snap")) + ); + assert_eq!( + parse_into_kind("柔術.jj"), + Ok(ExpressionKind::Identifier("柔術.jj")) + ); + assert_eq!( + parse_into_kind(r#"Windows\Path"#), + Ok(ExpressionKind::Identifier(r#"Windows\Path"#)) + ); } #[test]