diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index c15901ecae..c9d127909b 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -1514,6 +1514,42 @@ abstract class Buffer # In Buffers, the internal sequence of character is mutable # Thus, `chars` can be used to modify the buffer. redef fun chars: Sequence[Char] is abstract + + # Appends `length` chars from `s` starting at index `from` + # + # ~~~nit + # var b = new Buffer + # b.append_substring("abcde", 1, 2) + # assert b == "bc" + # b.append_substring("vwxyz", 2, 3) + # assert b == "bcxyz" + # b.append_substring("ABCDE", 4, 300) + # assert b == "bcxyzE" + # b.append_substring("VWXYZ", 400, 1) + # assert b == "bcxyzE" + # ~~~ + fun append_substring(s: Text, from, length: Int) do + if from < 0 then + length += from + from = 0 + end + var ln = s.length + if (length + from) > ln then length = ln - from + if length <= 0 then return + append_substring_impl(s, from, length) + end + + # Unsafe version of `append_substring` for performance + # + # NOTE: Use only if sure about `from` and `length`, no checks + # or bound recalculation is done + fun append_substring_impl(s: Text, from, length: Int) do + var pos = from + for i in [0 .. length[ do + self.add s[pos] + pos += 1 + end + end end # View for chars on Buffer objects, extends Sequence @@ -1755,6 +1791,18 @@ redef class Char return cp >= 0xD800 and cp <= 0xDFFF end + # Is `self` a UTF-16 high surrogate ? + fun is_hi_surrogate: Bool do + var cp = code_point + return cp >= 0xD800 and cp <= 0xDBFF + end + + # Is `self` a UTF-16 low surrogate ? + fun is_lo_surrogate: Bool do + var cp = code_point + return cp >= 0xDC00 and cp <= 0xDFFF + end + # Length of `self` in a UTF-8 String fun u8char_len: Int do var c = self.code_point diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 9f5659ad68..b7e8e61763 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -411,7 +411,7 @@ abstract class FlatString if from < 0 then count += from - if count < 0 then return "" + if count <= 0 then return "" from = 0 end @@ -924,7 +924,10 @@ class FlatBuffer is_dirty = true _bytelen = 0 _length = 0 - if written then reset + if written then + _capacity = 16 + reset + end end redef fun empty do return new Buffer @@ -1049,6 +1052,21 @@ class FlatBuffer return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count) end + redef fun append_substring_impl(s, from, length) do + if length <= 0 then return + if not s isa FlatText then + super + return + end + var bytest = s.char_to_byte_index(from) + var bytend = s.char_to_byte_index(from + length - 1) + var btln = bytend - bytest + 1 + enlarge(btln + _bytelen) + s._items.copy_to(_items, btln, bytest, _bytelen) + _bytelen += btln + _length += length + end + redef fun reverse do written = false @@ -1351,37 +1369,26 @@ redef class NativeString # # Very unsafe, make sure to have room for this char prior to calling this function. private fun set_char_at(pos: Int, c: Char) do - if c.code_point < 128 then - self[pos] = c.code_point.to_b + var cp = c.code_point + if cp < 128 then + self[pos] = cp.to_b return end var ln = c.u8char_len - native_set_char(pos, c, ln) - end - - private fun native_set_char(pos: Int, c: Char, ln: Int) `{ - char* dst = self + pos; - switch(ln){ - case 1: - dst[0] = c; - break; - case 2: - dst[0] = 0xC0 | ((c & 0x7C0) >> 6); - dst[1] = 0x80 | (c & 0x3F); - break; - case 3: - dst[0] = 0xE0 | ((c & 0xF000) >> 12); - dst[1] = 0x80 | ((c & 0xFC0) >> 6); - dst[2] = 0x80 | (c & 0x3F); - break; - case 4: - dst[0] = 0xF0 | ((c & 0x1C0000) >> 18); - dst[1] = 0x80 | ((c & 0x3F000) >> 12); - dst[2] = 0x80 | ((c & 0xFC0) >> 6); - dst[3] = 0x80 | (c & 0x3F); - break; - } - `} + if ln == 2 then + self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b + self[pos + 1] = (0x80 | (cp & 0x3F)).to_b + else if ln == 3 then + self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b + self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 2] = (0x80 | (cp & 0x3F)).to_b + else if ln == 4 then + self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b + self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b + self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 3] = (0x80 | (cp & 0x3F)).to_b + end + end end redef class Int diff --git a/lib/json/static.nit b/lib/json/static.nit index 7c0e2cf4a5..565da1f2ea 100644 --- a/lib/json/static.nit +++ b/lib/json/static.nit @@ -439,6 +439,11 @@ redef class JsonParseError "\"position\":{position.to_json}," + "\"message\":{message.to_json}\}" end + + redef fun pretty_json_visit(buf, indents) do + buf.clear + buf.append(to_json) + end end redef class Position diff --git a/lib/json/string_parser.nit b/lib/json/string_parser.nit index d429697c63..51d302e93e 100644 --- a/lib/json/string_parser.nit +++ b/lib/json/string_parser.nit @@ -231,32 +231,103 @@ class JSONStringParser return val end + private var parse_str_buf = new FlatBuffer + # Parses and returns a Nit string from a JSON String fun parse_json_string: Jsonable do + var src = src var ln = src.length var p = pos p += 1 if p > ln then return make_parse_error("Malformed JSON String") var c = src[p] - var st = p + var ret = parse_str_buf + var chunk_st = p while c != '"' do - if c == '\\' then - if p + 1 >= ln then return make_parse_error("Malformed Escape sequence in JSON string") + if c != '\\' then p += 1 + if p >= ln then return make_parse_error("Malformed JSON string") c = src[p] - if c == 'u' then + continue + end + ret.append_substring_impl(src, chunk_st, p - chunk_st) + p += 1 + if p >= ln then return make_parse_error("Malformed Escape sequence in JSON string") + c = src[p] + if c == 'r' then + ret.add '\r' + p += 1 + else if c == 'n' then + ret.add '\n' + p += 1 + else if c == 't' then + ret.add '\t' + p += 1 + else if c == 'u' then + var cp = 0 + p += 1 + for i in [0 .. 4[ do + cp <<= 4 + if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + c = src[p] + if c >= '0' and c <= '9' then + cp += c.code_point - '0'.code_point + else if c >= 'a' and c <= 'f' then + cp += c.code_point - 'a'.code_point + 10 + else if c >= 'A' and c <= 'F' then + cp += c.code_point - 'A'.code_point + 10 + else + make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + end p += 1 - if p + 3 >= ln then return make_parse_error("Bad Unicode escape sequence in string") - for i in [0 .. 4[ do if not src[p + i].is_hexdigit then return make_parse_error("Bad Unicode escape sequence in string") - p += 3 end + c = cp.code_point + if cp >= 0xD800 and cp <= 0xDBFF then + if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + c = src[p] + if c != '\\' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + p += 1 + c = src[p] + if c != 'u' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + var locp = 0 + p += 1 + for i in [0 .. 4[ do + locp <<= 4 + if p > ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + c = src[p] + if c >= '0' and c <= '9' then + locp += c.code_point - '0'.code_point + else if c >= 'a' and c <= 'f' then + locp += c.code_point - 'a'.code_point + 10 + else if c >= 'A' and c <= 'F' then + locp += c.code_point - 'A'.code_point + 10 + else + make_parse_error("Malformed \uXXXX Escape sequence in JSON string") + end + p += 1 + end + c = (((locp & 0x3FF) | ((cp & 0x3FF) << 10)) + 0x10000).code_point + end + ret.add c + else if c == 'b' then + ret.add 8.code_point + p += 1 + else if c == 'f' then + ret.add '\f' + p += 1 + else + p += 1 + ret.add c end - p += 1 - if p >= ln then return make_parse_error("Malformed JSON String") + chunk_st = p c = src[p] end pos = p + 1 - return src.substring(st, p - st).unescape_json + if ret.is_empty then return src.substring(chunk_st, p - chunk_st) + ret.append_substring_impl(src, chunk_st, p - chunk_st) + var rets = ret.to_s + ret.clear + return rets end # Ignores any character until a JSON separator is encountered