From a8a6104149011bf1b7f3cc4104bb7a14f1fcbb1f Mon Sep 17 00:00:00 2001 From: Yuchao Liang Date: Wed, 25 Dec 2024 02:55:42 +0900 Subject: [PATCH] fix(formatjs): Failed to parse unicode Closes #372, closes #367 --- .../src/parser.rs | 22 ++++++++-- .../tests/fixtures/unicode_1 | 43 +++++++++++++++++++ .../tests/run_parser_e2e.rs | 1 + 3 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 diff --git a/crates/swc_icu_messageformat_parser/src/parser.rs b/crates/swc_icu_messageformat_parser/src/parser.rs index c7d3feec..2f35211a 100644 --- a/crates/swc_icu_messageformat_parser/src/parser.rs +++ b/crates/swc_icu_messageformat_parser/src/parser.rs @@ -832,7 +832,16 @@ impl<'s> Parser<'s> { self.bump(); } - &self.message[start_offset..self.offset()] + let length = self.offset() - start_offset; + #[cfg(feature = "utf16")] + let (start_offset, length) = ( + self.message_utf16[..start_offset].to_string().len(), + self.message_utf16[start_offset..start_offset + length] + .to_string() + .len(), + ); + + &self.message[start_offset..start_offset + length] } fn parse_literal(&self, nesting_level: usize, parent_arg_type: &str) -> Result { @@ -1735,9 +1744,16 @@ impl<'s> Parser<'s> { if self.is_eof() { return None; } - self.message[self.offset() + self.char().len_utf8()..] + + #[cfg(feature = "utf16")] + return self.message_utf16[self.offset() + self.char().len_utf16()..] .chars() - .next() + .next(); + + #[cfg(not(feature = "utf16"))] + return self.message[self.offset() + self.char().len_utf8()..] + .chars() + .next(); } /// Returns true if the next call to `bump` would return false. diff --git a/crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 b/crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 new file mode 100644 index 00000000..0d4408eb --- /dev/null +++ b/crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 @@ -0,0 +1,43 @@ +รถ๐Ÿš€ +--- +{} +--- +{ + "err": null, + "val": [ + { + "children": [ + { + "location": { + "end": { + "column": 6, + "line": 1, + "offset": 6 + }, + "start": { + "column": 4, + "line": 1, + "offset": 3 + } + }, + "type": 0, + "value": "รถ๐Ÿš€" + } + ], + "location": { + "end": { + "column": 10, + "line": 1, + "offset": 10 + }, + "start": { + "column": 1, + "line": 1, + "offset": 0 + } + }, + "type": 8, + "value": "a" + } + ] +} \ No newline at end of file diff --git a/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs b/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs index ac3e0fef..2845569f 100644 --- a/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs +++ b/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs @@ -38,6 +38,7 @@ fn read_sections(file: PathBuf) -> TestFixtureSections { fixture("tests/fixtures/treat_unicode_nbsp_as_whitespace") )] #[cfg_attr(feature = "utf16", fixture("tests/fixtures/trivial_2"))] +#[cfg_attr(feature = "utf16", fixture("tests/fixtures/unicode_1"))] #[fixture("tests/fixtures/uppercase_tag_1")] #[fixture("tests/fixtures/expect_number_arg_skeleton_token_1")] #[fixture("tests/fixtures/self_closing_tag_1")]