Skip to content

Commit

Permalink
Merge pull request #1745 from ruby/inline-comments
Browse files Browse the repository at this point in the history
Inline comments
  • Loading branch information
kddnewton authored Oct 27, 2023
2 parents d85e576 + 5b72f84 commit 4ff8fe2
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 15 deletions.
2 changes: 2 additions & 0 deletions docs/ruby_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ The full API is documented below.
* `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
* `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
* `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
* `Prism.parse_inline_comments(source)` - parse the inline comments corresponding to the given source string and return them
* `Prism.parse_file_inline_comments(source)` - parse the inline comments corresponding to the given source file and return them
60 changes: 60 additions & 0 deletions ext/prism/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,37 @@ parse_input(pm_string_t *input, const char *filepath) {
return result;
}

// Parse the given input and return an array of Comment objects.
static VALUE
parse_input_inline_comments(pm_string_t *input, const char *filepath) {
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);

pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);

VALUE source = pm_source_new(&parser, encoding);
VALUE comments = rb_ary_new();

for (pm_comment_t *comment = (pm_comment_t *) parser.comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
if (comment->type != PM_COMMENT_INLINE) continue;

VALUE location_argv[] = {
source,
LONG2FIX(comment->start - parser.start),
LONG2FIX(comment->end - comment->start)
};

VALUE comment_argv[] = { ID2SYM(rb_intern("inline")), rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
}

pm_node_destroy(&parser, node);
pm_parser_free(&parser);

return comments;
}

// Parse the given string and return a ParseResult instance.
static VALUE
parse(int argc, VALUE *argv, VALUE self) {
Expand Down Expand Up @@ -436,6 +467,33 @@ parse_file(VALUE self, VALUE filepath) {
return value;
}

// Parse the given string and return an array of Comment objects.
static VALUE
parse_inline_comments(int argc, VALUE *argv, VALUE self) {
VALUE string;
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);

pm_string_t input;
input_load_string(&input, string);

return parse_input_inline_comments(&input, check_string(filepath));
}

// Parse the given file and return an array of Comment objects.
static VALUE
parse_file_inline_comments(VALUE self, VALUE filepath) {
pm_string_t input;

const char *checked = check_string(filepath);
if (!pm_string_mapped_init(&input, checked)) return Qnil;

VALUE value = parse_input_inline_comments(&input, checked);
pm_string_free(&input);

return value;
}

// Parse the given string and return a ParseResult instance.
static VALUE
parse_lex(int argc, VALUE *argv, VALUE self) {
Expand Down Expand Up @@ -621,6 +679,8 @@ Init_prism(void) {
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
rb_define_singleton_method(rb_cPrism, "parse_inline_comments", parse_inline_comments, -1);
rb_define_singleton_method(rb_cPrism, "parse_file_inline_comments", parse_file_inline_comments, 1);
rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);

Expand Down
8 changes: 8 additions & 0 deletions include/prism.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@

void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);

void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);

void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);

void pm_parser_metadata(pm_parser_t *parser, const char *metadata);

// The prism version and the serialization format.
Expand Down Expand Up @@ -61,6 +65,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node,
// Parse the given source to the AST and serialize the AST to the given buffer.
PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);

// Parse and serialize the inline comments in the given source to the given
// buffer.
PRISM_EXPORTED_FUNCTION void pm_parse_serialize_inline_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);

// Lex the given source and serialize to the given buffer.
PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer);

Expand Down
25 changes: 25 additions & 0 deletions lib/prism/ffi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def self.load_exported_functions_from(header, *functions)
"prism.h",
"pm_version",
"pm_parse_serialize",
"pm_parse_serialize_inline_comments",
"pm_lex_serialize",
"pm_parse_lex_serialize"
)
Expand Down Expand Up @@ -224,6 +225,30 @@ def self.parse_file(filepath)
end
end

# Mirror the Prism.parse_inline_comments API by using the serialization API.
def self.parse_inline_comments(code, filepath = nil)
LibRubyParser::PrismBuffer.with do |buffer|
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
LibRubyParser.pm_parse_serialize_inline_comments(code, code.bytesize, buffer.pointer, metadata)

source = Source.new(code)
loader = Serialize::Loader.new(source, buffer.read)

loader.load_header
loader.load_force_encoding
loader.load_comments
end
end

# Mirror the Prism.parse_file_inline_comments API by using the serialization
# API. This uses native strings instead of Ruby strings because it allows us
# to use mmap when it is available.
def self.parse_file_inline_comments(filepath)
LibRubyParser::PrismString.with(filepath) do |string|
parse_inline_comments(string.read, filepath)
end
end

# Mirror the Prism.parse_lex API by using the serialization API.
def self.parse_lex(code, filepath = nil)
LibRubyParser::PrismBuffer.with do |buffer|
Expand Down
30 changes: 27 additions & 3 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -15689,14 +15689,18 @@ pm_parse(pm_parser_t *parser) {
return parse_program(parser);
}

PRISM_EXPORTED_FUNCTION void
pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
static inline void
pm_serialize_header(pm_buffer_t *buffer) {
pm_buffer_append_string(buffer, "PRISM", 5);
pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
}

PRISM_EXPORTED_FUNCTION void
pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_header(buffer);
pm_serialize_content(parser, node, buffer);
pm_buffer_append_string(buffer, "\0", 1);
}
Expand All @@ -15710,7 +15714,27 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
if (metadata) pm_parser_metadata(&parser, metadata);

pm_node_t *node = pm_parse(&parser);
pm_serialize(&parser, node, buffer);

pm_serialize_header(buffer);
pm_serialize_content(&parser, node, buffer);
pm_buffer_append_byte(buffer, '\0');

pm_node_destroy(&parser, node);
pm_parser_free(&parser);
}

// Parse and serialize the inline comments in the given source to the given
// buffer.
PRISM_EXPORTED_FUNCTION void
pm_parse_serialize_inline_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
pm_parser_t parser;
pm_parser_init(&parser, source, size, NULL);
if (metadata) pm_parser_metadata(&parser, metadata);

pm_node_t *node = pm_parse(&parser);
pm_serialize_header(buffer);
pm_serialize_encoding(&parser.encoding, buffer);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);

pm_node_destroy(&parser, node);
pm_parser_free(&parser);
Expand Down
31 changes: 21 additions & 10 deletions templates/lib/prism/serialize.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,30 @@ module Prism
define_load_node_lambdas unless RUBY_ENGINE == 'ruby'
end

def load_header
raise "Invalid serialization" if io.read(5) != "PRISM"
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
only_semantic_fields = io.read(1).unpack1("C")
unless only_semantic_fields == 0
raise "Invalid serialization (location fields must be included but are not)"
end
end

def load_encoding
Encoding.find(io.read(load_varint))
end

def load_force_encoding
@encoding = load_encoding
@input = input.force_encoding(@encoding).freeze
end

def load_comments
load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) }
end

def load_metadata
comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) }
comments = load_comments
magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
Expand Down Expand Up @@ -89,15 +107,8 @@ module Prism
end

def load_nodes
raise "Invalid serialization" if io.read(5) != "PRISM"
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
only_semantic_fields = io.read(1).unpack1("C")
unless only_semantic_fields == 0
raise "Invalid serialization (location fields must be included but are not)"
end

@encoding = load_encoding
@input = input.force_encoding(@encoding).freeze
load_header
load_force_encoding

comments, magic_comments, errors, warnings = load_metadata

Expand Down
4 changes: 2 additions & 2 deletions templates/src/serialize.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
}

static void
void
pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));

Expand Down Expand Up @@ -189,7 +189,7 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
}
}

static void
void
pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
size_t encoding_length = strlen(encoding->name);
pm_buffer_append_varint(buffer, pm_sizet_to_u32(encoding_length));
Expand Down
21 changes: 21 additions & 0 deletions test/prism/parse_inline_comments_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true

require_relative "test_helper"

module Prism
class ParseInlineCommentsTest < TestCase
def test_parse_inline_comments
comments = Prism.parse_inline_comments("# foo")

assert_kind_of Array, comments
assert_equal 1, comments.length
end

def test_parse_file_inline_comments
comments = Prism.parse_file_inline_comments(__FILE__)

assert_kind_of Array, comments
assert_equal 1, comments.length
end
end
end

0 comments on commit 4ff8fe2

Please sign in to comment.