diff --git a/docs/ruby_api.md b/docs/ruby_api.md index 59c9c310b63..8daba218b15 100644 --- a/docs/ruby_api.md +++ b/docs/ruby_api.md @@ -23,3 +23,5 @@ The full API is documented below. * `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens * `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens * `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree +* `Prism.parse_inline_comments(source)` - parse the inline comments corresponding to the given source string and return them +* `Prism.parse_file_inline_comments(source)` - parse the inline comments corresponding to the given source file and return them diff --git a/include/prism.h b/include/prism.h index 99a6a7e2eb7..227b233ea1d 100644 --- a/include/prism.h +++ b/include/prism.h @@ -30,6 +30,10 @@ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); +void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer); + +void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); + void pm_parser_metadata(pm_parser_t *parser, const char *metadata); // The prism version and the serialization format. @@ -61,6 +65,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, // Parse the given source to the AST and serialize the AST to the given buffer. PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); +// Parse and serialize the inline comments in the given source to the given +// buffer. +PRISM_EXPORTED_FUNCTION void pm_parse_serialize_inline_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); + // Lex the given source and serialize to the given buffer. PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer); diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index cc7d94fb3f0..ae13474c317 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -70,6 +70,7 @@ def self.load_exported_functions_from(header, *functions) "prism.h", "pm_version", "pm_parse_serialize", + "pm_parse_serialize_inline_comments", "pm_lex_serialize", "pm_parse_lex_serialize" ) @@ -224,6 +225,30 @@ def self.parse_file(filepath) end end + # Mirror the Prism.parse_inline_comments API by using the serialization API. + def self.parse_inline_comments(code, filepath = nil) + LibRubyParser::PrismBuffer.with do |buffer| + metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath + LibRubyParser.pm_parse_serialize_inline_comments(code, code.bytesize, buffer.pointer, metadata) + + source = Source.new(code) + loader = Serialize::Loader.new(source, buffer.read) + + loader.load_header + loader.load_force_encoding + loader.load_comments + end + end + + # Mirror the Prism.parse_file_inline_comments API by using the serialization + # API. This uses native strings instead of Ruby strings because it allows us + # to use mmap when it is available. + def self.parse_file_inline_comments(filepath) + LibRubyParser::PrismString.with(filepath) do |string| + parse_inline_comments(string.read, filepath) + end + end + # Mirror the Prism.parse_lex API by using the serialization API. def self.parse_lex(code, filepath = nil) LibRubyParser::PrismBuffer.with do |buffer| diff --git a/src/prism.c b/src/prism.c index 2cbf664b5da..9d40d28d015 100644 --- a/src/prism.c +++ b/src/prism.c @@ -15689,14 +15689,18 @@ pm_parse(pm_parser_t *parser) { return parse_program(parser); } -PRISM_EXPORTED_FUNCTION void -pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { +static inline void +pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_string(buffer, "PRISM", 5); pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR); pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR); pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH); pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0); +} +PRISM_EXPORTED_FUNCTION void +pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { + pm_serialize_header(buffer); pm_serialize_content(parser, node, buffer); pm_buffer_append_string(buffer, "\0", 1); } @@ -15710,7 +15714,27 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons if (metadata) pm_parser_metadata(&parser, metadata); pm_node_t *node = pm_parse(&parser); - pm_serialize(&parser, node, buffer); + + pm_serialize_header(buffer); + pm_serialize_content(&parser, node, buffer); + pm_buffer_append_byte(buffer, '\0'); + + pm_node_destroy(&parser, node); + pm_parser_free(&parser); +} + +// Parse and serialize the inline comments in the given source to the given +// buffer. +PRISM_EXPORTED_FUNCTION void +pm_parse_serialize_inline_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) { + pm_parser_t parser; + pm_parser_init(&parser, source, size, NULL); + if (metadata) pm_parser_metadata(&parser, metadata); + + pm_node_t *node = pm_parse(&parser); + pm_serialize_header(buffer); + pm_serialize_encoding(&parser.encoding, buffer); + pm_serialize_comment_list(&parser, &parser.comment_list, buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); diff --git a/templates/lib/prism/serialize.rb.erb b/templates/lib/prism/serialize.rb.erb index 01588c6dae3..7f8b61ddfe8 100644 --- a/templates/lib/prism/serialize.rb.erb +++ b/templates/lib/prism/serialize.rb.erb @@ -50,12 +50,30 @@ module Prism define_load_node_lambdas unless RUBY_ENGINE == 'ruby' end + def load_header + raise "Invalid serialization" if io.read(5) != "PRISM" + raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION] + only_semantic_fields = io.read(1).unpack1("C") + unless only_semantic_fields == 0 + raise "Invalid serialization (location fields must be included but are not)" + end + end + def load_encoding Encoding.find(io.read(load_varint)) end + def load_force_encoding + @encoding = load_encoding + @input = input.force_encoding(@encoding).freeze + end + + def load_comments + load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } + end + def load_metadata - comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } + comments = load_comments magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) } errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) } warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) } @@ -89,15 +107,8 @@ module Prism end def load_nodes - raise "Invalid serialization" if io.read(5) != "PRISM" - raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION] - only_semantic_fields = io.read(1).unpack1("C") - unless only_semantic_fields == 0 - raise "Invalid serialization (location fields must be included but are not)" - end - - @encoding = load_encoding - @input = input.force_encoding(@encoding).freeze + load_header + load_force_encoding comments, magic_comments, errors, warnings = load_metadata diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 69d6d4094fe..d46284d3b20 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -136,7 +136,7 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start)); } -static void +void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list))); @@ -189,7 +189,7 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t * } } -static void +void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { size_t encoding_length = strlen(encoding->name); pm_buffer_append_varint(buffer, pm_sizet_to_u32(encoding_length)); diff --git a/test/prism/parse_inline_comments_test.rb b/test/prism/parse_inline_comments_test.rb index 0087e1e9af0..d90d0abf883 100644 --- a/test/prism/parse_inline_comments_test.rb +++ b/test/prism/parse_inline_comments_test.rb @@ -2,8 +2,6 @@ require_relative "test_helper" -return if Prism::BACKEND == :FFI - module Prism class ParseInlineCommentsTest < TestCase def test_parse_inline_comments