From 44090d9f262d54fe4bdd494db173113eb59db12f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 27 Oct 2023 13:34:25 -0400 Subject: [PATCH 1/2] Parse inline comments --- ext/prism/extension.c | 60 ++++++++++++++++++++++++ test/prism/parse_inline_comments_test.rb | 23 +++++++++ 2 files changed, 83 insertions(+) create mode 100644 test/prism/parse_inline_comments_test.rb diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 786adb2c0f9..27dbc5c8f71 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -396,6 +396,37 @@ parse_input(pm_string_t *input, const char *filepath) { return result; } +// Parse the given input and return an array of Comment objects. +static VALUE +parse_input_inline_comments(pm_string_t *input, const char *filepath) { + pm_parser_t parser; + pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath); + + pm_node_t *node = pm_parse(&parser); + rb_encoding *encoding = rb_enc_find(parser.encoding.name); + + VALUE source = pm_source_new(&parser, encoding); + VALUE comments = rb_ary_new(); + + for (pm_comment_t *comment = (pm_comment_t *) parser.comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) { + if (comment->type != PM_COMMENT_INLINE) continue; + + VALUE location_argv[] = { + source, + LONG2FIX(comment->start - parser.start), + LONG2FIX(comment->end - comment->start) + }; + + VALUE comment_argv[] = { ID2SYM(rb_intern("inline")), rb_class_new_instance(3, location_argv, rb_cPrismLocation) }; + rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment)); + } + + pm_node_destroy(&parser, node); + pm_parser_free(&parser); + + return comments; +} + // Parse the given string and return a ParseResult instance. static VALUE parse(int argc, VALUE *argv, VALUE self) { @@ -436,6 +467,33 @@ parse_file(VALUE self, VALUE filepath) { return value; } +// Parse the given string and return an array of Comment objects. +static VALUE +parse_inline_comments(int argc, VALUE *argv, VALUE self) { + VALUE string; + VALUE filepath; + rb_scan_args(argc, argv, "11", &string, &filepath); + + pm_string_t input; + input_load_string(&input, string); + + return parse_input_inline_comments(&input, check_string(filepath)); +} + +// Parse the given file and return an array of Comment objects. +static VALUE +parse_file_inline_comments(VALUE self, VALUE filepath) { + pm_string_t input; + + const char *checked = check_string(filepath); + if (!pm_string_mapped_init(&input, checked)) return Qnil; + + VALUE value = parse_input_inline_comments(&input, checked); + pm_string_free(&input); + + return value; +} + // Parse the given string and return a ParseResult instance. static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { @@ -621,6 +679,8 @@ Init_prism(void) { rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1); rb_define_singleton_method(rb_cPrism, "parse", parse, -1); rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1); + rb_define_singleton_method(rb_cPrism, "parse_inline_comments", parse_inline_comments, -1); + rb_define_singleton_method(rb_cPrism, "parse_file_inline_comments", parse_file_inline_comments, 1); rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1); rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1); diff --git a/test/prism/parse_inline_comments_test.rb b/test/prism/parse_inline_comments_test.rb new file mode 100644 index 00000000000..0087e1e9af0 --- /dev/null +++ b/test/prism/parse_inline_comments_test.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +return if Prism::BACKEND == :FFI + +module Prism + class ParseInlineCommentsTest < TestCase + def test_parse_inline_comments + comments = Prism.parse_inline_comments("# foo") + + assert_kind_of Array, comments + assert_equal 1, comments.length + end + + def test_parse_file_inline_comments + comments = Prism.parse_file_inline_comments(__FILE__) + + assert_kind_of Array, comments + assert_equal 1, comments.length + end + end +end From 5b72f8448070c9c7bef517a9fe9ae7836378ec31 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 27 Oct 2023 13:55:48 -0400 Subject: [PATCH 2/2] Prism.parse_inline_comments --- docs/ruby_api.md | 2 ++ include/prism.h | 8 ++++++ lib/prism/ffi.rb | 25 +++++++++++++++++++ src/prism.c | 30 ++++++++++++++++++++--- templates/lib/prism/serialize.rb.erb | 31 ++++++++++++++++-------- templates/src/serialize.c.erb | 4 +-- test/prism/parse_inline_comments_test.rb | 2 -- 7 files changed, 85 insertions(+), 17 deletions(-) diff --git a/docs/ruby_api.md b/docs/ruby_api.md index 59c9c310b63..8daba218b15 100644 --- a/docs/ruby_api.md +++ b/docs/ruby_api.md @@ -23,3 +23,5 @@ The full API is documented below. * `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens * `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens * `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree +* `Prism.parse_inline_comments(source)` - parse the inline comments corresponding to the given source string and return them +* `Prism.parse_file_inline_comments(source)` - parse the inline comments corresponding to the given source file and return them diff --git a/include/prism.h b/include/prism.h index 99a6a7e2eb7..227b233ea1d 100644 --- a/include/prism.h +++ b/include/prism.h @@ -30,6 +30,10 @@ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); +void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer); + +void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); + void pm_parser_metadata(pm_parser_t *parser, const char *metadata); // The prism version and the serialization format. @@ -61,6 +65,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, // Parse the given source to the AST and serialize the AST to the given buffer. PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); +// Parse and serialize the inline comments in the given source to the given +// buffer. +PRISM_EXPORTED_FUNCTION void pm_parse_serialize_inline_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); + // Lex the given source and serialize to the given buffer. PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer); diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index cc7d94fb3f0..ae13474c317 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -70,6 +70,7 @@ def self.load_exported_functions_from(header, *functions) "prism.h", "pm_version", "pm_parse_serialize", + "pm_parse_serialize_inline_comments", "pm_lex_serialize", "pm_parse_lex_serialize" ) @@ -224,6 +225,30 @@ def self.parse_file(filepath) end end + # Mirror the Prism.parse_inline_comments API by using the serialization API. + def self.parse_inline_comments(code, filepath = nil) + LibRubyParser::PrismBuffer.with do |buffer| + metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath + LibRubyParser.pm_parse_serialize_inline_comments(code, code.bytesize, buffer.pointer, metadata) + + source = Source.new(code) + loader = Serialize::Loader.new(source, buffer.read) + + loader.load_header + loader.load_force_encoding + loader.load_comments + end + end + + # Mirror the Prism.parse_file_inline_comments API by using the serialization + # API. This uses native strings instead of Ruby strings because it allows us + # to use mmap when it is available. + def self.parse_file_inline_comments(filepath) + LibRubyParser::PrismString.with(filepath) do |string| + parse_inline_comments(string.read, filepath) + end + end + # Mirror the Prism.parse_lex API by using the serialization API. def self.parse_lex(code, filepath = nil) LibRubyParser::PrismBuffer.with do |buffer| diff --git a/src/prism.c b/src/prism.c index 2cbf664b5da..9d40d28d015 100644 --- a/src/prism.c +++ b/src/prism.c @@ -15689,14 +15689,18 @@ pm_parse(pm_parser_t *parser) { return parse_program(parser); } -PRISM_EXPORTED_FUNCTION void -pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { +static inline void +pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_string(buffer, "PRISM", 5); pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR); pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR); pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH); pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0); +} +PRISM_EXPORTED_FUNCTION void +pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { + pm_serialize_header(buffer); pm_serialize_content(parser, node, buffer); pm_buffer_append_string(buffer, "\0", 1); } @@ -15710,7 +15714,27 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons if (metadata) pm_parser_metadata(&parser, metadata); pm_node_t *node = pm_parse(&parser); - pm_serialize(&parser, node, buffer); + + pm_serialize_header(buffer); + pm_serialize_content(&parser, node, buffer); + pm_buffer_append_byte(buffer, '\0'); + + pm_node_destroy(&parser, node); + pm_parser_free(&parser); +} + +// Parse and serialize the inline comments in the given source to the given +// buffer. +PRISM_EXPORTED_FUNCTION void +pm_parse_serialize_inline_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) { + pm_parser_t parser; + pm_parser_init(&parser, source, size, NULL); + if (metadata) pm_parser_metadata(&parser, metadata); + + pm_node_t *node = pm_parse(&parser); + pm_serialize_header(buffer); + pm_serialize_encoding(&parser.encoding, buffer); + pm_serialize_comment_list(&parser, &parser.comment_list, buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); diff --git a/templates/lib/prism/serialize.rb.erb b/templates/lib/prism/serialize.rb.erb index 01588c6dae3..7f8b61ddfe8 100644 --- a/templates/lib/prism/serialize.rb.erb +++ b/templates/lib/prism/serialize.rb.erb @@ -50,12 +50,30 @@ module Prism define_load_node_lambdas unless RUBY_ENGINE == 'ruby' end + def load_header + raise "Invalid serialization" if io.read(5) != "PRISM" + raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION] + only_semantic_fields = io.read(1).unpack1("C") + unless only_semantic_fields == 0 + raise "Invalid serialization (location fields must be included but are not)" + end + end + def load_encoding Encoding.find(io.read(load_varint)) end + def load_force_encoding + @encoding = load_encoding + @input = input.force_encoding(@encoding).freeze + end + + def load_comments + load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } + end + def load_metadata - comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } + comments = load_comments magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) } errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) } warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) } @@ -89,15 +107,8 @@ module Prism end def load_nodes - raise "Invalid serialization" if io.read(5) != "PRISM" - raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION] - only_semantic_fields = io.read(1).unpack1("C") - unless only_semantic_fields == 0 - raise "Invalid serialization (location fields must be included but are not)" - end - - @encoding = load_encoding - @input = input.force_encoding(@encoding).freeze + load_header + load_force_encoding comments, magic_comments, errors, warnings = load_metadata diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 69d6d4094fe..d46284d3b20 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -136,7 +136,7 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start)); } -static void +void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list))); @@ -189,7 +189,7 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t * } } -static void +void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { size_t encoding_length = strlen(encoding->name); pm_buffer_append_varint(buffer, pm_sizet_to_u32(encoding_length)); diff --git a/test/prism/parse_inline_comments_test.rb b/test/prism/parse_inline_comments_test.rb index 0087e1e9af0..d90d0abf883 100644 --- a/test/prism/parse_inline_comments_test.rb +++ b/test/prism/parse_inline_comments_test.rb @@ -2,8 +2,6 @@ require_relative "test_helper" -return if Prism::BACKEND == :FFI - module Prism class ParseInlineCommentsTest < TestCase def test_parse_inline_comments