From 5d9b04897107b3fb0a639acc233b8861a73f287f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 30 Aug 2023 17:21:50 -0400 Subject: [PATCH] Add a value to numbered references --- config.yml | 3 ++ include/yarp.h | 1 + src/yarp.c | 35 ++++++++++++++++++- .../unparser/corpus/literal/dstr.txt | 2 +- .../unparser/corpus/literal/variables.txt | 2 +- test/yarp/snapshots/whitequark/nth_ref.txt | 2 +- 6 files changed, 41 insertions(+), 4 deletions(-) diff --git a/config.yml b/config.yml index 32a605c61d9..8ebfb772bf4 100644 --- a/config.yml +++ b/config.yml @@ -1703,6 +1703,9 @@ nodes: ^^^^^ end - name: NumberedReferenceReadNode + fields: + - name: number + type: uint32 comment: | Represents reading a numbered reference to a capture in the previous match. diff --git a/include/yarp.h b/include/yarp.h index 6488d01adcb..378efe0c937 100644 --- a/include/yarp.h +++ b/include/yarp.h @@ -16,6 +16,7 @@ #include "yarp/version.h" #include +#include #include #include #include diff --git a/src/yarp.c b/src/yarp.c index 5381eabe006..7df7ca0653c 100644 --- a/src/yarp.c +++ b/src/yarp.c @@ -613,6 +613,38 @@ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) { /* Node creation functions */ /******************************************************************************/ +// Parse the decimal number represented by the range of bytes. returns +// UINT32_MAX if the number fails to parse. This function assumes that the range +// of bytes has already been validated to contain only decimal digits. +static uint32_t +parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) { + ptrdiff_t diff = end - start; + assert(diff > 0 && ((unsigned long) diff < SIZE_MAX)); + size_t length = (size_t) diff; + + char *digits = calloc(length + 1, sizeof(char)); + memcpy(digits, start, length); + digits[length] = '\0'; + + char *endptr; + errno = 0; + unsigned long value = strtoul(digits, &endptr, 10); + + if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) { + yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number"); + value = UINT32_MAX; + } + + free(digits); + + if (value > UINT32_MAX) { + yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number"); + value = UINT32_MAX; + } + + return (uint32_t) value; +} + // Parse out the options for a regular expression. static inline yp_node_flags_t yp_regular_expression_flags_create(const yp_token_t *closing) { @@ -3289,7 +3321,8 @@ yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *na { .type = YP_NODE_NUMBERED_REFERENCE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(name), - } + }, + .number = parse_decimal_number(parser, name->start + 1, name->end) }; return node; diff --git a/test/yarp/snapshots/unparser/corpus/literal/dstr.txt b/test/yarp/snapshots/unparser/corpus/literal/dstr.txt index facf0171e79..396ebcd3daf 100644 --- a/test/yarp/snapshots/unparser/corpus/literal/dstr.txt +++ b/test/yarp/snapshots/unparser/corpus/literal/dstr.txt @@ -57,7 +57,7 @@ ProgramNode(0...299)( [StringNode(146...147)(nil, (146...147), nil, "a"), EmbeddedVariableNode(147...150)( (147...148), - NumberedReferenceReadNode(148...150)() + NumberedReferenceReadNode(148...150)(1) )], (150...151) ), diff --git a/test/yarp/snapshots/unparser/corpus/literal/variables.txt b/test/yarp/snapshots/unparser/corpus/literal/variables.txt index dfb85ec0137..fe98a8cd57f 100644 --- a/test/yarp/snapshots/unparser/corpus/literal/variables.txt +++ b/test/yarp/snapshots/unparser/corpus/literal/variables.txt @@ -5,7 +5,7 @@ ProgramNode(0...66)( InstanceVariableReadNode(2...4)(:@a), ClassVariableReadNode(5...8)(:@@a), GlobalVariableReadNode(9...11)(), - NumberedReferenceReadNode(12...14)(), + NumberedReferenceReadNode(12...14)(1), BackReferenceReadNode(15...17)(), ConstantReadNode(18...23)(), ConstantPathNode(24...37)( diff --git a/test/yarp/snapshots/whitequark/nth_ref.txt b/test/yarp/snapshots/whitequark/nth_ref.txt index 3bfe4c9a5c3..ca131a208ce 100644 --- a/test/yarp/snapshots/whitequark/nth_ref.txt +++ b/test/yarp/snapshots/whitequark/nth_ref.txt @@ -1,4 +1,4 @@ ProgramNode(0...3)( [], - StatementsNode(0...3)([NumberedReferenceReadNode(0...3)()]) + StatementsNode(0...3)([NumberedReferenceReadNode(0...3)(10)]) )