From 990d28965fd04cffdbe31dfa5c6b169e423d8663 Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Fri, 6 May 2016 11:42:39 -0400 Subject: [PATCH 1/6] lib/core: Added surrogate detection methods to `Char` Signed-off-by: Lucas Bajolet --- lib/core/text/abstract_text.nit | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index c15901ecae..6c2acd4fd7 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -1755,6 +1755,18 @@ redef class Char return cp >= 0xD800 and cp <= 0xDFFF end + # Is `self` a UTF-16 high surrogate ? + fun is_hi_surrogate: Bool do + var cp = code_point + return cp >= 0xD800 and cp <= 0xDBFF + end + + # Is `self` a UTF-16 low surrogate ? + fun is_lo_surrogate: Bool do + var cp = code_point + return cp >= 0xDC00 and cp <= 0xDFFF + end + # Length of `self` in a UTF-8 String fun u8char_len: Int do var c = self.code_point From ed5b1d72b1c59e6790d1d236490225ac2eaea1b1 Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Fri, 6 May 2016 11:43:09 -0400 Subject: [PATCH 2/6] lib/json: Implemented `pretty_json_visit` on JsonParseError Signed-off-by: Lucas Bajolet --- lib/json/static.nit | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/json/static.nit b/lib/json/static.nit index 7c0e2cf4a5..565da1f2ea 100644 --- a/lib/json/static.nit +++ b/lib/json/static.nit @@ -439,6 +439,11 @@ redef class JsonParseError "\"position\":{position.to_json}," + "\"message\":{message.to_json}\}" end + + redef fun pretty_json_visit(buf, indents) do + buf.clear + buf.append(to_json) + end end redef class Position From 163d604dc5389fe5b93ccce30bb4c88ad260b165 Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Fri, 6 May 2016 16:32:04 -0400 Subject: [PATCH 3/6] lib/json: Faster `parse_json_string` implementation Signed-off-by: Lucas Bajolet --- lib/json/string_parser.nit | 91 +++++++++++++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 10 deletions(-) diff --git a/lib/json/string_parser.nit b/lib/json/string_parser.nit index d429697c63..51d302e93e 100644 --- a/lib/json/string_parser.nit +++ b/lib/json/string_parser.nit @@ -231,32 +231,103 @@ class JSONStringParser return val end + private var parse_str_buf = new FlatBuffer + # Parses and returns a Nit string from a JSON String fun parse_json_string: Jsonable do + var src = src var ln = src.length var p = pos p += 1 if p > ln then return make_parse_error("Malformed JSON String") var c = src[p] - var st = p + var ret = parse_str_buf + var chunk_st = p while c != '"' do - if c == '\\' then - if p + 1 >= ln then return make_parse_error("Malformed Escape sequence in JSON string") + if c != '\\' then p += 1 + if p >= ln then return make_parse_error("Malformed JSON string") c = src[p] - if c == 'u' then + continue + end + ret.append_substring_impl(src, chunk_st, p - chunk_st) + p += 1 + if p >= ln then return make_parse_error("Malformed Escape sequence in JSON string") + c = src[p] + if c == 'r' then + ret.add '\r' + p += 1 + else if c == 'n' then + ret.add '\n' + p += 1 + else if c == 't' then + ret.add '\t' + p += 1 + else if c == 'u' then + var cp = 0 + p += 1 + for i in [0 .. 4[ do + cp <<= 4 + if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + c = src[p] + if c >= '0' and c <= '9' then + cp += c.code_point - '0'.code_point + else if c >= 'a' and c <= 'f' then + cp += c.code_point - 'a'.code_point + 10 + else if c >= 'A' and c <= 'F' then + cp += c.code_point - 'A'.code_point + 10 + else + make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + end p += 1 - if p + 3 >= ln then return make_parse_error("Bad Unicode escape sequence in string") - for i in [0 .. 4[ do if not src[p + i].is_hexdigit then return make_parse_error("Bad Unicode escape sequence in string") - p += 3 end + c = cp.code_point + if cp >= 0xD800 and cp <= 0xDBFF then + if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + c = src[p] + if c != '\\' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + p += 1 + c = src[p] + if c != 'u' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + var locp = 0 + p += 1 + for i in [0 .. 4[ do + locp <<= 4 + if p > ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + c = src[p] + if c >= '0' and c <= '9' then + locp += c.code_point - '0'.code_point + else if c >= 'a' and c <= 'f' then + locp += c.code_point - 'a'.code_point + 10 + else if c >= 'A' and c <= 'F' then + locp += c.code_point - 'A'.code_point + 10 + else + make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + end + p += 1 + end + c = (((locp & 0x3FF) | ((cp & 0x3FF) << 10)) + 0x10000).code_point + end + ret.add c + else if c == 'b' then + ret.add 8.code_point + p += 1 + else if c == 'f' then + ret.add '\f' + p += 1 + else + p += 1 + ret.add c end - p += 1 - if p >= ln then return make_parse_error("Malformed JSON String") + chunk_st = p c = src[p] end pos = p + 1 - return src.substring(st, p - st).unescape_json + if ret.is_empty then return src.substring(chunk_st, p - chunk_st) + ret.append_substring_impl(src, chunk_st, p - chunk_st) + var rets = ret.to_s + ret.clear + return rets end # Ignores any character until a JSON separator is encountered From 2bed81ce94323bdb79401bcc0e10bc60890d774f Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Fri, 6 May 2016 16:32:45 -0400 Subject: [PATCH 4/6] lib/core: Added new `append_substring` service to avoid creating ephemeral instances Signed-off-by: Lucas Bajolet --- lib/core/text/abstract_text.nit | 36 +++++++++++++++++++++++++++++++++ lib/core/text/flat.nit | 17 +++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index 6c2acd4fd7..c9d127909b 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -1514,6 +1514,42 @@ abstract class Buffer # In Buffers, the internal sequence of character is mutable # Thus, `chars` can be used to modify the buffer. redef fun chars: Sequence[Char] is abstract + + # Appends `length` chars from `s` starting at index `from` + # + # ~~~nit + # var b = new Buffer + # b.append_substring("abcde", 1, 2) + # assert b == "bc" + # b.append_substring("vwxyz", 2, 3) + # assert b == "bcxyz" + # b.append_substring("ABCDE", 4, 300) + # assert b == "bcxyzE" + # b.append_substring("VWXYZ", 400, 1) + # assert b == "bcxyzE" + # ~~~ + fun append_substring(s: Text, from, length: Int) do + if from < 0 then + length += from + from = 0 + end + var ln = s.length + if (length + from) > ln then length = ln - from + if length <= 0 then return + append_substring_impl(s, from, length) + end + + # Unsafe version of `append_substring` for performance + # + # NOTE: Use only if sure about `from` and `length`, no checks + # or bound recalculation is done + fun append_substring_impl(s: Text, from, length: Int) do + var pos = from + for i in [0 .. length[ do + self.add s[pos] + pos += 1 + end + end end # View for chars on Buffer objects, extends Sequence diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 45ad144d4a..83cae37da5 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -411,7 +411,7 @@ abstract class FlatString if from < 0 then count += from - if count < 0 then return "" + if count <= 0 then return "" from = 0 end @@ -1054,6 +1054,21 @@ class FlatBuffer return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count) end + redef fun append_substring_impl(s, from, length) do + if length <= 0 then return + if not s isa FlatText then + super + return + end + var bytest = s.char_to_byte_index(from) + var bytend = s.char_to_byte_index(from + length - 1) + var btln = bytend - bytest + 1 + enlarge(btln + _bytelen) + s._items.copy_to(_items, btln, bytest, _bytelen) + _bytelen += btln + _length += length + end + redef fun reverse do written = false From a267d693163d1771974a201a9082750ef28d636b Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Fri, 6 May 2016 16:33:07 -0400 Subject: [PATCH 5/6] lib/core: Replaced FFI version of set_char_at by pure Nit Signed-off-by: Lucas Bajolet --- lib/core/text/flat.nit | 45 ++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 83cae37da5..67ffacf57e 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -1371,37 +1371,26 @@ redef class NativeString # # Very unsafe, make sure to have room for this char prior to calling this function. private fun set_char_at(pos: Int, c: Char) do - if c.code_point < 128 then - self[pos] = c.code_point.to_b + var cp = c.code_point + if cp < 128 then + self[pos] = cp.to_b return end var ln = c.u8char_len - native_set_char(pos, c, ln) - end - - private fun native_set_char(pos: Int, c: Char, ln: Int) `{ - char* dst = self + pos; - switch(ln){ - case 1: - dst[0] = c; - break; - case 2: - dst[0] = 0xC0 | ((c & 0x7C0) >> 6); - dst[1] = 0x80 | (c & 0x3F); - break; - case 3: - dst[0] = 0xE0 | ((c & 0xF000) >> 12); - dst[1] = 0x80 | ((c & 0xFC0) >> 6); - dst[2] = 0x80 | (c & 0x3F); - break; - case 4: - dst[0] = 0xF0 | ((c & 0x1C0000) >> 18); - dst[1] = 0x80 | ((c & 0x3F000) >> 12); - dst[2] = 0x80 | ((c & 0xFC0) >> 6); - dst[3] = 0x80 | (c & 0x3F); - break; - } - `} + if ln == 2 then + self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b + self[pos + 1] = (0x80 | (cp & 0x3F)).to_b + else if ln == 3 then + self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b + self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 2] = (0x80 | (cp & 0x3F)).to_b + else if ln == 4 then + self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b + self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b + self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 3] = (0x80 | (cp & 0x3F)).to_b + end + end end redef class Int From 5c54927281840db0aac721ee83b6c692d37c239f Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Mon, 9 May 2016 15:11:27 -0400 Subject: [PATCH 6/6] lib/core: Perfize reset in `FlatBuffer` Signed-off-by: Lucas Bajolet --- lib/core/text/flat.nit | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 67ffacf57e..09dc9ad52d 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -924,7 +924,10 @@ class FlatBuffer is_dirty = true _bytelen = 0 _length = 0 - if written then reset + if written then + _capacity = 16 + reset + end end redef fun empty do return new Buffer