From 025596a6555e8afd2caba1eb5d5ed07e6a8a79da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Tokodi?= Date: Mon, 1 Jul 2024 11:27:20 +0200 Subject: [PATCH] Raise parser error on invalid unicode escape sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Máté Tokodi mate.tokodi@szteszoftver.hu --- jerry-core/parser/js/js-lexer.c | 42 +++++++++++++++---- jerry-core/parser/js/js-parser-internal.h | 2 +- .../jerry/fail/regression-test-issue-5134.js | 15 +++++++ 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 tests/jerry/fail/regression-test-issue-5134.js diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c index e9429f9f58..cae55a6dd6 100644 --- a/jerry-core/parser/js/js-lexer.c +++ b/jerry-core/parser/js/js-lexer.c @@ -147,7 +147,7 @@ lexer_hex_in_braces_to_code_point (const uint8_t *source_p, /**< current source /** * Parse hexadecimal character sequence * - * @return character value + * @return character value (-1 if the escape sequence is invalid) */ static lit_code_point_t lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] current source position */ @@ -174,13 +174,19 @@ lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] curre } else { - JERRY_ASSERT ((byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F) - || (byte >= LIT_CHAR_UPPERCASE_A && byte <= LIT_CHAR_UPPERCASE_F)); + if (!((byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F) + || (byte >= LIT_CHAR_UPPERCASE_A && byte <= LIT_CHAR_UPPERCASE_F))) + { + return (lit_code_point_t) -1; + } result += LEXER_TO_ASCII_LOWERCASE (byte) - (LIT_CHAR_LOWERCASE_A - 10); } - JERRY_ASSERT (result <= LIT_UNICODE_CODE_POINT_MAX); + if (result > LIT_UNICODE_CODE_POINT_MAX) + { + return (lit_code_point_t) -1; + } if (length == 0) { @@ -2068,8 +2074,10 @@ lexer_scan_private_identifier (parser_context_t *context_p) /**< context */ /** * Convert an ident with escapes to a utf8 string. + * + * @return false if source contains invalid unicode escape sequence, true otherwise */ -void +bool lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */ const uint8_t *source_p, /**< source string */ prop_length_t length) /**< length of destination string */ @@ -2083,7 +2091,12 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */ if (*source_p == LIT_CHAR_BACKSLASH) { source_p += 2; - destination_p += lit_code_point_to_cesu8_bytes (destination_p, lexer_unchecked_hex_to_character (&source_p)); + lit_code_point_t code_point = lexer_unchecked_hex_to_character (&source_p); + if (code_point == (lit_code_point_t) -1) + { + return false; + } + destination_p += lit_code_point_to_cesu8_bytes (destination_p, code_point); continue; } @@ -2098,6 +2111,7 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */ *destination_p++ = *source_p++; } while (destination_p < destination_end_p); + return true; } /* lexer_convert_ident_to_cesu8 */ /** @@ -2130,7 +2144,10 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */ if (literal_p->type == LEXER_IDENT_LITERAL) { - lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length); + if (!lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length)) + { + parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE); + } return destination_start_p; } @@ -2229,7 +2246,12 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */ if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U) { source_p++; - destination_p += lit_code_point_to_cesu8_bytes (destination_p, lexer_unchecked_hex_to_character (&source_p)); + lit_code_point_t code_point = lexer_unchecked_hex_to_character (&source_p); + if (code_point == (lit_code_point_t) -1) + { + parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE); + } + destination_p += lit_code_point_to_cesu8_bytes (destination_p, code_point); continue; } @@ -3308,6 +3330,10 @@ lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier * { left_p += 2; lit_code_point_t code_point = lexer_unchecked_hex_to_character (&left_p); + if (code_point == (lit_code_point_t) -1) + { + return false; + } escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point); } diff --git a/jerry-core/parser/js/js-parser-internal.h b/jerry-core/parser/js/js-parser-internal.h index 4e46ea7cdd..3bcf23f1a6 100644 --- a/jerry-core/parser/js/js-parser-internal.h +++ b/jerry-core/parser/js/js-parser-internal.h @@ -748,7 +748,7 @@ void lexer_parse_string (parser_context_t *context_p, lexer_string_options_t opt void lexer_expect_identifier (parser_context_t *context_p, uint8_t literal_type); bool lexer_scan_identifier (parser_context_t *context_p, lexer_parse_options_t opts); void lexer_check_property_modifier (parser_context_t *context_p); -void lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length); +bool lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length); const uint8_t *lexer_convert_literal_to_chars (parser_context_t *context_p, const lexer_lit_location_t *literal_p, diff --git a/tests/jerry/fail/regression-test-issue-5134.js b/tests/jerry/fail/regression-test-issue-5134.js new file mode 100644 index 0000000000..589eedcb17 --- /dev/null +++ b/tests/jerry/fail/regression-test-issue-5134.js @@ -0,0 +1,15 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import{a as "\{{12,34}"