Skip to content

Commit

Permalink
Reconfigure rationals
Browse files Browse the repository at this point in the history
This eliminates the subnode on RationalNode and replaces it with two
integer fields, which represent the ratio for the rational. It also
reduces those two integers if they both fit into 32 bits.

Importantly, this PR does not implement bignum reduction. That's something
I'd like to consider for the future, but it's simple enough for now to
leave them unreduced, which makes it more useful than it used to be.
  • Loading branch information
kddnewton committed May 21, 2024
1 parent b2dc7e9 commit 2ba5735
Show file tree
Hide file tree
Showing 16 changed files with 262 additions and 238 deletions.
17 changes: 15 additions & 2 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3229,8 +3229,21 @@ nodes:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- name: RationalNode
fields:
- name: numeric
type: node
- name: flags
type: flags
kind: IntegerBaseFlags
- name: numerator
type: integer
comment: |
The numerator of the rational number.
1.5r # numerator 3
- name: denominator
type: integer
comment: |
The denominator of the rational number.
1.5r # denominator 2
comment: |
Represents a rational number literal.
Expand Down
12 changes: 12 additions & 0 deletions include/prism/util/pm_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,18 @@ size_t pm_integer_memsize(const pm_integer_t *integer);
*/
int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);

/**
* Reduce a ratio of integers to its simplest form.
*
* If either the numerator or denominator do not fit into a 32-bit integer, then
* this function is a no-op. In the future, we may consider reducing even the
* larger numbers, but for now we're going to keep it simple.
*
* @param numerator The numerator of the ratio.
* @param denominator The denominator of the ratio.
*/
void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);

/**
* Convert an integer to a decimal string.
*
Expand Down
2 changes: 1 addition & 1 deletion lib/prism/node_ext.rb
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def value
class RationalNode < Node
# Returns the value of the node as a Ruby Rational.
def value
Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
Rational(numerator, denominator)
end
end

Expand Down
24 changes: 5 additions & 19 deletions lib/prism/translation/parser/compiler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ def visit_if_node(node)
# 1i
# ^^
def visit_imaginary_node(node)
visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
visit_numeric(node, builder.complex([Complex(0, node.numeric.value), srange(node.location)]))
end

# { foo: }
Expand Down Expand Up @@ -1514,7 +1514,7 @@ def visit_range_node(node)
# 1r
# ^^
def visit_rational_node(node)
visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
visit_numeric(node, builder.rational([node.value, srange(node.location)]))
end

# redo
Expand Down Expand Up @@ -1940,12 +1940,6 @@ def find_forwarding(node)
forwarding
end

# Because we have mutated the AST to allow for newlines in the middle of
# a rational, we need to manually handle the value here.
def imaginary_value(node)
Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
end

# Negate the value of a numeric node. This is a special case where you
# have a negative sign on one line and then a number on the next line.
# In normal Ruby, this will always be a method call. The parser gem,
Expand All @@ -1955,7 +1949,9 @@ def numeric_negate(message_loc, receiver)
case receiver.type
when :integer_node, :float_node
receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
when :rational_node, :imaginary_node
when :rational_node
receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location))
when :imaginary_node
receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
end
end
Expand All @@ -1974,16 +1970,6 @@ def procarg0?(parameters)
parameters.block.nil?
end

# Because we have mutated the AST to allow for newlines in the middle of
# a rational, we need to manually handle the value here.
def rational_value(node)
if node.numeric.is_a?(IntegerNode)
Rational(node.numeric.value)
else
Rational(node.slice.gsub(/\s/, "").chomp("r"))
end
end

# Locations in the parser gem AST are generated using this class. We
# store a reference to its constant to make it slightly faster to look
# up.
Expand Down
67 changes: 50 additions & 17 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -4290,7 +4290,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
}

/**
* Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
* Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
*/
static pm_rational_node_t *
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
Expand All @@ -4300,16 +4300,39 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
*node = (pm_rational_node_t) {
{
.type = PM_RATIONAL_NODE,
.flags = PM_NODE_FLAG_STATIC_LITERAL,
.flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
.location = PM_LOCATION_TOKEN_VALUE(token)
},
.numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT,
.start = token->start,
.end = token->end - 1
}))
.numerator = { 0 },
.denominator = { 0 }
};

const uint8_t *start = token->start;
const uint8_t *end = token->end - 1; // r

while (start < end && *start == '0') start++; // 0.1 -> .1
while (end > start && end[-1] == '0') end--; // 1.0 -> 1.

size_t length = (size_t) (end - start);
const uint8_t *point = memchr(start, '.', length);
assert(point && "should have a decimal point");

uint8_t *digits = malloc(length - 1);
if (digits == NULL) {
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
abort();
}

memcpy(digits, start, (unsigned long) (point - start));
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);

digits[0] = '1';
memset(digits + 1, '0', (size_t) (end - point - 1));
pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DECIMAL, digits, digits + (end - point));
free(digits);

pm_integers_reduce(&node->numerator, &node->denominator);
return node;
}

Expand Down Expand Up @@ -4943,7 +4966,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
}

/**
* Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
* Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
* token.
*/
static pm_rational_node_t *
Expand All @@ -4954,16 +4977,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
*node = (pm_rational_node_t) {
{
.type = PM_RATIONAL_NODE,
.flags = PM_NODE_FLAG_STATIC_LITERAL,
.flags = base | PM_NODE_FLAG_STATIC_LITERAL,
.location = PM_LOCATION_TOKEN_VALUE(token)
},
.numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER,
.start = token->start,
.end = token->end - 1
}))
.numerator = { 0 },
.denominator = { .value = 1, 0 }
};

pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
switch (base) {
case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
default: assert(false && "unreachable"); break;
}

pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);

return node;
}

Expand Down Expand Up @@ -16857,10 +16888,12 @@ parse_negative_numeric(pm_node_t *node) {
cast->value = -cast->value;
break;
}
case PM_RATIONAL_NODE:
node->location.start--;
parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
case PM_RATIONAL_NODE: {
pm_rational_node_t *cast = (pm_rational_node_t *) node;
cast->base.location.start--;
cast->numerator.negative = true;
break;
}
case PM_IMAGINARY_NODE:
node->location.start--;
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
Expand Down
118 changes: 43 additions & 75 deletions src/static_literals.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ murmur_hash(const uint8_t *key, size_t length) {
return hash;
}

/**
* Hash the value of an integer and return it.
*/
static uint32_t
integer_hash(const pm_integer_t *integer) {
uint32_t hash;
if (integer->values) {
hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
} else {
hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
}

if (integer->negative) {
hash ^= murmur_scramble((uint32_t) 1);
}

return hash;
}

/**
* Return the hash of the given node. It is important that nodes that have
* equivalent static literal values have the same hash. This is because we use
Expand All @@ -68,19 +87,8 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE: {
// Integers hash their value.
const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
uint32_t hash;
if (integer->values) {
hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
} else {
hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
}

if (integer->negative) {
hash ^= murmur_scramble((uint32_t) 1);
}

return hash;
const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
return integer_hash(&cast->value);
}
case PM_SOURCE_LINE_NODE: {
// Source lines hash their line number.
Expand All @@ -94,11 +102,9 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
return murmur_hash((const uint8_t *) value, sizeof(double));
}
case PM_RATIONAL_NODE: {
// Rationals hash their numeric value. Because their numeric value
// is stored as a subnode, we hash that node and then mix in the
// fact that this is a rational node.
const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
// Rationals hash their numerator and denominator.
const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
}
case PM_IMAGINARY_NODE: {
// Imaginaries hash their numeric value. Because their numeric value
Expand Down Expand Up @@ -275,8 +281,15 @@ pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_
switch (PM_NODE_TYPE(left)) {
case PM_IMAGINARY_NODE:
return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
case PM_RATIONAL_NODE:
return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
case PM_RATIONAL_NODE: {
const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;

int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
if (result != 0) return result;

return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
}
case PM_INTEGER_NODE:
return pm_compare_integer_nodes(metadata, left, right);
case PM_FLOAT_NODE:
Expand Down Expand Up @@ -456,7 +469,7 @@ pm_static_literal_positive_p(const pm_node_t *node) {
case PM_INTEGER_NODE:
return !((const pm_integer_node_t *) node)->value.negative;
case PM_RATIONAL_NODE:
return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
return !((const pm_rational_node_t *) node)->numerator.negative;
case PM_IMAGINARY_NODE:
return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
default:
Expand All @@ -465,43 +478,6 @@ pm_static_literal_positive_p(const pm_node_t *node) {
}
}

/**
* Inspect a rational node that wraps a float node. This is going to be a
* poor-man's version of the Ruby `Rational#to_s` method, because we're not
* going to try to reduce the rational by finding the GCD. We'll leave that for
* a future improvement.
*/
static void
pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
const uint8_t *start = node->base.location.start;
const uint8_t *end = node->base.location.end - 1; // r

while (start < end && *start == '0') start++; // 0.1 -> .1
while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
size_t length = (size_t) (end - start);

const uint8_t *point = memchr(start, '.', length);
assert(point && "should have a decimal point");

uint8_t *digits = malloc(length - 1);
if (digits == NULL) return;

memcpy(digits, start, (unsigned long) (point - start));
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));

pm_integer_t numerator = { 0 };
pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);

pm_buffer_append_byte(buffer, '(');
pm_integer_string(buffer, &numerator);
pm_buffer_append_string(buffer, "/1", 2);
for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
pm_buffer_append_byte(buffer, ')');

pm_integer_free(&numerator);
free(digits);
}

/**
* Create a string-based representation of the given static literal.
*/
Expand Down Expand Up @@ -544,7 +520,9 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
pm_buffer_append_string(buffer, "(0", 2);
if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
pm_static_literal_inspect_node(buffer, metadata, numeric);
if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
pm_buffer_append_byte(buffer, '*');
}
pm_buffer_append_string(buffer, "i)", 2);
break;
}
Expand All @@ -555,22 +533,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
pm_buffer_append_string(buffer, "nil", 3);
break;
case PM_RATIONAL_NODE: {
const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;

switch (PM_NODE_TYPE(numeric)) {
case PM_INTEGER_NODE:
pm_buffer_append_byte(buffer, '(');
pm_static_literal_inspect_node(buffer, metadata, numeric);
pm_buffer_append_string(buffer, "/1)", 3);
break;
case PM_FLOAT_NODE:
pm_rational_inspect(buffer, (pm_rational_node_t *) node);
break;
default:
assert(false && "unreachable");
break;
}

const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
pm_buffer_append_byte(buffer, '(');
pm_integer_string(buffer, &rational->numerator);
pm_buffer_append_byte(buffer, '/');
pm_integer_string(buffer, &rational->denominator);
pm_buffer_append_byte(buffer, ')');
break;
}
case PM_REGULAR_EXPRESSION_NODE: {
Expand Down
Loading

0 comments on commit 2ba5735

Please sign in to comment.