From 8f85dd504364847ea027980df522b5e35720bfb5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 20 Jun 2023 15:02:23 -0400 Subject: [PATCH] Serialize the encoding --- docs/serialization.md | 2 ++ templates/java/org/yarp/Loader.java.erb | 6 ++++++ templates/lib/yarp/serialize.rb.erb | 11 ++++++++--- templates/src/serialize.c.erb | 5 +++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/serialization.md b/docs/serialization.md index c2bc849891f..474d52cdd9e 100644 --- a/docs/serialization.md +++ b/docs/serialization.md @@ -14,6 +14,8 @@ The header is structured like the following table: | `1` | major version number | | `1` | minor version number | | `1` | patch version number | +| varint | the length of the encoding name | +| | the encoding name | | `4` | content pool offset | | varint | content pool size | diff --git a/templates/java/org/yarp/Loader.java.erb b/templates/java/org/yarp/Loader.java.erb index 8245bac833d..e0bbbdfc4be 100644 --- a/templates/java/org/yarp/Loader.java.erb +++ b/templates/java/org/yarp/Loader.java.erb @@ -57,6 +57,12 @@ public class Loader { expect((byte) 4); expect((byte) 0); + // This loads the name of the encoding. We don't actually do anything + // with it just yet. + int encodingLength = loadVarInt(); + byte[] encodingName = new byte[encodingLength]; + buffer.get(encodingName); + int constantPoolBufferOffset = buffer.getInt(); int constantPoolLength = loadVarInt(); this.constantPool = new ConstantPool(source, constantPoolBufferOffset, constantPoolLength); diff --git a/templates/lib/yarp/serialize.rb.erb b/templates/lib/yarp/serialize.rb.erb index 0ee0023617b..96150c1e6b2 100644 --- a/templates/lib/yarp/serialize.rb.erb +++ b/templates/lib/yarp/serialize.rb.erb @@ -14,18 +14,23 @@ module YARP attr_reader :constant_pool_offset, :constant_pool def initialize(source, serialized, io) - # TODO: This is wrong, we should be dumping and loading the encoding. - # For now, we assume that the source is UTF-8. @encoding = Encoding::UTF_8 - @source = source.dup.force_encoding(@encoding).freeze + + @source = source.dup @serialized = serialized @io = io + + @constant_pool_offset = nil + @constant_pool = nil end def load io.read(4) => "YARP" io.read(3).unpack("C3") => [0, 4, 0] + @encoding = Encoding.find(io.read(load_varint)) + @source = source.force_encoding(@encoding).freeze + @constant_pool_offset = io.read(4).unpack1("L") @constant_pool = Array.new(load_varint, nil) diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 37f6a8d247f..7bbf40b601e 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -105,6 +105,11 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { + // First, serialize the encoding of the parser. + size_t encoding_length = strlen(parser->encoding.name); + yp_buffer_append_u32(buffer, yp_ulong_to_u32(encoding_length)); + yp_buffer_append_str(buffer, parser->encoding.name, encoding_length); + // Here we're going to leave space for the offset of the constant pool in // the buffer. size_t offset = buffer->length;