Skip to content

Commit

Permalink
Add filepath to yp_parse_serialize metadata
Browse files Browse the repository at this point in the history
This adds the ability to pass the filepath through the
yp_parse_serialize function in the metadata. It also adds
documentation about this new API and fixes a couple of places that
were calling it with the old signature. Finally, it adds a specific
test to ensure yp_parse_serialize doesn't break.
  • Loading branch information
kddnewton committed Aug 7, 2023
1 parent c0f9223 commit d5c9ff2
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 48 deletions.
28 changes: 26 additions & 2 deletions docs/serialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void yp_buffer_free(yp_buffer_t *);

// Parse and serialize the AST represented by the given source to the given
// buffer.
void yp_parse_serialize(const char *, size_t, yp_buffer_t *);
void yp_parse_serialize(const char *, size_t, yp_buffer_t *, const char *);
```
Typically you would use a stack-allocated `yp_buffer_t` and call `yp_parse_serialize`, as in:
Expand All @@ -86,9 +86,33 @@ serialize(const char *source, size_t length) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) return;
yp_parse_serialize(source, length, &buffer);
yp_parse_serialize(source, length, &buffer, NULL);
// Do something with the serialized string.
yp_buffer_free(&buffer);
}
```

The final argument to `yp_parse_serialize` controls the metadata of the source. This includes the filepath that the source is associated with, and any nested local variables scopes that are necessary to properly parse the file (in the case of parsing an `eval`). The metadata is a serialized format itself, and is structured as follows:

| # bytes | field |
| --- | --- |
| `4` | the size of the filepath string |
| | the filepath string |
| `4` | the number of local variable scopes |

Then, each local variable scope is encoded as:

| # bytes | field |
| --- | --- |
| `4` | the number of local variables in the scope |
| | the local variables |

Each local variable within each scope is encoded as:

| # bytes | field |
| --- | --- |
| `4` | the size of the local variable name |
| | the local variable name |

The metadata can be `NULL` (as seen in the example above). If it is not null, then a minimal metadata string would be `"\0\0\0\0\0\0\0\0"` which would use 4 bytes to indicate an empty filepath string and 4 bytes to indicate that there were no local variable scopes.
55 changes: 37 additions & 18 deletions ext/yarp/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,23 @@ VALUE rb_cYARPParseResult;
/* IO of Ruby code */
/******************************************************************************/

// Check if the given filepath is a string. If it's nil, then return NULL. If
// it's not a string, then raise a type error. Otherwise return the filepath as
// a C string.
// Check if the given VALUE is a string. If it's nil, then return NULL. If it's
// not a string, then raise a type error. Otherwise return the VALUE as a C
// string.
static const char *
check_filepath(VALUE filepath) {
// If the filepath is nil, then we don't need to do anything.
if (NIL_P(filepath)) {
check_string(VALUE value) {
// If the value is nil, then we don't need to do anything.
if (NIL_P(value)) {
return NULL;
}

// Check if the filepath is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(filepath, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
// Check if the value is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(value, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
}

// Otherwise, return the filepath as a C string.
return StringValueCStr(filepath);
// Otherwise, return the value as a C string.
return RSTRING_PTR(value);
}

// Load the contents and size of the given string into the given yp_string_t.
Expand Down Expand Up @@ -82,15 +82,15 @@ dump(int argc, VALUE *argv, VALUE self) {

yp_string_t input;
input_load_string(&input, string);
return dump_input(&input, check_filepath(filepath));
return dump_input(&input, check_string(filepath));
}

// Dump the AST corresponding to the given file to a string.
static VALUE
dump_file(VALUE self, VALUE filepath) {
yp_string_t input;

const char *checked = check_filepath(filepath);
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;

VALUE value = dump_input(&input, checked);
Expand Down Expand Up @@ -280,15 +280,15 @@ lex(int argc, VALUE *argv, VALUE self) {

yp_string_t input;
input_load_string(&input, string);
return lex_input(&input, check_filepath(filepath));
return lex_input(&input, check_string(filepath));
}

// Return an array of tokens corresponding to the given file.
static VALUE
lex_file(VALUE self, VALUE filepath) {
yp_string_t input;

const char *checked = check_filepath(filepath);
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;

VALUE value = lex_input(&input, checked);
Expand Down Expand Up @@ -344,7 +344,7 @@ parse(int argc, VALUE *argv, VALUE self) {
yp_string_constant_init(&input, dup, length);
#endif

VALUE value = parse_input(&input, check_filepath(filepath));
VALUE value = parse_input(&input, check_string(filepath));

#ifdef YARP_DEBUG_MODE_BUILD
free(dup);
Expand All @@ -358,7 +358,7 @@ static VALUE
parse_file(VALUE self, VALUE filepath) {
yp_string_t input;

const char *checked = check_filepath(filepath);
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;

VALUE value = parse_input(&input, checked);
Expand Down Expand Up @@ -457,7 +457,7 @@ static VALUE
profile_file(VALUE self, VALUE filepath) {
yp_string_t input;

const char *checked = check_filepath(filepath);
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;

yp_parser_t parser;
Expand All @@ -470,6 +470,24 @@ profile_file(VALUE self, VALUE filepath) {
return Qnil;
}

// Parse the file and serialize the result. This is mostly used to test this
// path since it is used by client libraries.
static VALUE
parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
yp_string_t input;
yp_buffer_t buffer;
yp_buffer_init(&buffer);

const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;

yp_parse_serialize(yp_string_source(&input), yp_string_length(&input), &buffer, check_string(metadata));
VALUE result = rb_str_new(buffer.value, buffer.length);

yp_buffer_free(&buffer);
return result;
}

/******************************************************************************/
/* Initialization of the extension */
/******************************************************************************/
Expand Down Expand Up @@ -519,6 +537,7 @@ Init_yarp(void) {
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);
rb_define_singleton_method(rb_cYARPDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);

// Next, initialize the pack API.
Init_yarp_pack();
Expand Down
11 changes: 5 additions & 6 deletions fuzz/parse.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#include <yarp.h>

void
harness (const char *input, size_t size) {
yp_buffer_t *buffer = malloc (sizeof (yp_buffer_t));
yp_buffer_init (buffer);
yp_parse_serialize ((const char *)input, size, buffer);
yp_buffer_free (buffer);
free (buffer);
harness(const char *input, size_t size) {
yp_buffer_t buffer;
yp_buffer_init(&buffer);
yp_parse_serialize(input, size, &buffer, NULL);
yp_buffer_free(&buffer);
}
4 changes: 4 additions & 0 deletions lib/yarp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,10 @@ module Debug
def self.newlines(source)
YARP.parse(source).source.offsets
end

def self.parse_serialize_file(filepath)
parse_serialize_file_metadata(filepath, [filepath.bytesize, filepath.b, 0].pack("LA*L"))
end
end

# Marking this as private so that consumers don't see it. It makes it a little
Expand Down
59 changes: 37 additions & 22 deletions src/yarp.c
Original file line number Diff line number Diff line change
Expand Up @@ -12945,8 +12945,11 @@ parse_program(yp_parser_t *parser) {
return (yp_node_t *) yp_program_node_create(parser, &locals, statements);
}

// Read a 32-bit unsigned integer from a pointer. This function is used to read
// the metadata that is passed into the parser from the Ruby implementation. It
// handles aligned and unaligned reads.
static uint32_t
yp_read_u32(const char *ptr) {
yp_metadata_read_u32(const char *ptr) {
if (((uintptr_t) ptr) % sizeof(uint32_t) == 0) {
return *((uint32_t *) ptr);
} else {
Expand All @@ -12956,45 +12959,57 @@ yp_read_u32(const char *ptr) {
}
}

// Process any additional metadata being passed into a parse. Since the source
// of these calls will be from Ruby implementation internals we assume it is from
// a trusted source.
// Process any additional metadata being passed into a call to the parser via
// the yp_parse_serialize function. Since the source of these calls will be from
// Ruby implementation internals we assume it is from a trusted source.
//
// Currently, this is only passing in variable scoping surrounding an eval, but
// eventually it will be extended to hold any additional metadata. This data
// is serialized to reduce the calling complexity for a foreign function call
// vs a foreign runtime making a bindable in-memory version of a C structure.
//
// *Format*
//
// No metadata should just be NULL. For variable scopes it should be:
// metadata is assumed to be a valid pointer pointing to well-formed data. The
// format is described below:
//
// ```text
// [number_of_variable_scopes: uint32_t,
// [number_of_variables: uint32_t,
// [data_length: uint32_t, data: char*]*
// [
// filepath_size: uint32_t,
// filepath: char*,
// scopes_count: uint32_t,
// [
// locals_count: uint32_t,
// [local_size: uint32_t, local: char*]*
// ]*
// ]
// ```
static void
yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
const char *p = metadata;
uint32_t number_of_scopes = yp_read_u32(p);
p += 4;
uint32_t filepath_size = yp_metadata_read_u32(metadata);
metadata += 4;

for (size_t scope_index = 0; scope_index < number_of_scopes; scope_index++) {
uint32_t number_of_variables = yp_read_u32(p);
p += 4;
if (filepath_size) {
yp_string_t filepath_string;
yp_string_constant_init(&filepath_string, metadata, filepath_size);

yp_parser_scope_push(parser, scope_index == 0);
parser->filepath_string = filepath_string;
metadata += filepath_size;
}

for (size_t variable_index = 0; variable_index < number_of_variables; variable_index++) {
uint32_t length = yp_read_u32(p);
p += 4;
uint32_t scopes_count = yp_metadata_read_u32(metadata);
metadata += 4;

for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
uint32_t locals_count = yp_metadata_read_u32(metadata);
metadata += 4;

yp_parser_scope_push(parser, scope_index == 0);

yp_parser_local_add_location(parser, p, p + length);
for (size_t local_index = 0; local_index < locals_count; local_index++) {
uint32_t local_size = yp_metadata_read_u32(metadata);
metadata += 4;

p += length;
yp_parser_local_add_location(parser, metadata, metadata + local_size);
metadata += local_size;
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions test/newline_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ def visit(node)
end

root = File.dirname(__dir__)

Dir["{lib,test}/**/*.rb", base: root].each do |relative|
# Our newlines are not exact, so for now skip a couple of files that are
# marked as incorrect.
next if relative == "test/parse_serialize_test.rb"

filepath = File.join(root, relative)

define_method "test_newline_flags_#{relative}" do
Expand Down
26 changes: 26 additions & 0 deletions test/parse_serialize_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# frozen_string_literal: true

require "yarp_test_helper"

class ParseSerializeTest < Test::Unit::TestCase
def test_parse_serialize
dumped = YARP.const_get(:Debug).parse_serialize_file(__FILE__)
result = YARP.load(File.read(__FILE__), dumped)

assert_kind_of YARP::ProgramNode, result, "Expected the root node to be a ProgramNode"
assert_equal __FILE__, find_file_node(result)&.filepath, "Expected the filepath to be set correctly"
end

private

def find_file_node(program)
queue = [program]

while (node = queue.shift)
return node if node.is_a?(YARP::SourceFileNode)
queue.concat(node.child_nodes.compact)
end

nil
end
end

0 comments on commit d5c9ff2

Please sign in to comment.