diff --git a/script.module.ijson/CHANGELOG.md b/script.module.ijson/CHANGELOG.md deleted file mode 100644 index 8a92c9c67..000000000 --- a/script.module.ijson/CHANGELOG.md +++ /dev/null @@ -1,30 +0,0 @@ -# Changelog - -## [2.5] - -* Default backend changed (#5). - Instead of using the python backend, - now the fastest available backend is selected by default. -* Added support for new `map_type` option (#7). -* Fixed bug in `multiple_values` support in C backend (#8). -* Added support for ``multiple_values`` flag in python backend (#9). -* Forwarding `**kwargs` from `ijson.items` to `ijson.parse` and - `ijson.basic_parse` (#10). -* Fixing support for yajl versions < 1.0.12. -* Improving `common.number` implementation. -* Documenting how events and the prefix work (#4). - -## [2.4] - -- New `ijson.backends.yajl2_c` backend written in C - and based on the yajl2 library. - It performs ~10x faster than cffi backend. -- Adding more builds to Travis matrix. -- Preventing memory leaks in `ijson.items` -- Parse numbers consistent with stdlib json -- Correct JSON string parsing in python backend -- Publishing package version in __init__.py -- Various small fixes in cffi backend - -[2.4]: https://github.com/ICRAR/ijson/releases/tag/2.4 -[2.5]: https://github.com/ICRAR/ijson/releases/tag/v2.5 diff --git a/script.module.ijson/LICENSE.txt b/script.module.ijson/LICENSE.txt index 89d119c8b..2d0d278a8 100644 --- a/script.module.ijson/LICENSE.txt +++ b/script.module.ijson/LICENSE.txt @@ -1,3 +1,6 @@ +ijson +===== + Copyright (c) 2010, Ivan Sagalaev All rights reserved. Redistribution and use in source and binary forms, with or without @@ -22,3 +25,21 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +yajl +==== + +Copyright (c) 2007-2014, Lloyd Hilaiel + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/script.module.ijson/README.rst b/script.module.ijson/README.rst deleted file mode 100644 index aba0493a6..000000000 --- a/script.module.ijson/README.rst +++ /dev/null @@ -1,156 +0,0 @@ -.. image:: https://travis-ci.org/ICRAR/ijson.svg?branch=master - :target: https://travis-ci.org/ICRAR/ijson - -.. image:: https://coveralls.io/repos/github/ICRAR/ijson/badge.svg?branch=master - :target: https://coveralls.io/github/ICRAR/ijson?branch=master - -===== -ijson -===== - -Ijson is an iterative JSON parser with a standard Python iterator interface. - - -Usage -===== - -All usage example will be using a JSON document describing geographical -objects:: - - { - "earth": { - "europe": [ - {"name": "Paris", "type": "city", "info": { ... }}, - {"name": "Thames", "type": "river", "info": { ... }}, - // ... - ], - "america": [ - {"name": "Texas", "type": "state", "info": { ... }}, - // ... - ] - } - } - -Most common usage is having ijson yield native Python objects out of a JSON -stream located under a prefix. Here's how to process all European cities:: - - import ijson - - f = urlopen('http://.../') - objects = ijson.items(f, 'earth.europe.item') - cities = (o for o in objects if o['type'] == 'city') - for city in cities: - do_something_with(city) - -For how to build a prefix see the Prefix section below. - -Sometimes when dealing with a particularly large JSON payload it may worth to -not even construct individual Python objects and react on individual events -immediately producing some result:: - - import ijson - - parser = ijson.parse(urlopen('http://.../')) - stream.write('') - for prefix, event, value in parser: - if (prefix, event) == ('earth', 'map_key'): - stream.write('<%s>' % value) - continent = value - elif prefix.endswith('.name'): - stream.write('' % value) - elif (prefix, event) == ('earth.%s' % continent, 'end_map'): - stream.write('' % continent) - stream.write('') - - -Events -====== - -When using the lower-level ``ijson.parse`` function, -three-element tuples are generated -containing a prefix, an event name, and a value. -Events will be one of the following: - -- ``start_map`` and ``end_map`` indicate - the beginning and end of a JSON object, respectively. - They carry a ``None`` as their value. -- ``start_array`` and ``end_array`` indicate - the beginning and end of a JSON array, respectively. - They also carry a ``None`` as their value. -- ``map_key`` indicates the name of a field in a JSON object. - Its associated value is the name itself. -- ``null``, ``boolean``, ``integer``, ``double``, ``number`` and ``string`` - all indicate actual content, which is stored in the associated value. - - -Prefix -====== - -A prefix represents the context within a JSON document -where an event originates at. -It works as follows: - -- It starts as an empty string. -- A ```` part is appended when the parser starts parsing the contents - of a JSON object member called ``name``, - and removed once the content finishes. -- A literal ``item`` part is appended when the parser is parsing - elements of a JSON array, - and removed when the array ends. -- Parts are separated by ``.``. - -When using the ``ijson.items`` function, -the prefix works as the selection -for which objects should be automatically built and returned by ijson. - - -Backends -======== - -Ijson provides several implementations of the actual parsing in the form of -backends located in ijson/backends: - -- ``yajl2_c``: a C extension using `YAJL `_ 2.x. - This is the fastest, but *might* require a compiler and the YAJL development files - to be present when installing this package. - Binary wheel distributions exist for major platforms/architectures to spare users - from having to compile the package. -- ``yajl2_cffi``: wrapper around `YAJL `_ 2.x - using CFFI. -- ``yajl2``: wrapper around YAJL 2.x using ctypes, for when you can't use CFFI - for some reason. -- ``yajl``: deprecated YAJL 1.x + ctypes wrapper, for even older systems. -- ``python``: pure Python parser, good to use with PyPy - -You can import a specific backend and use it in the same way as the top level -library:: - - import ijson.backends.yajl2_cffi as ijson - - for item in ijson.items(...): - # ... - -Importing the top level library as ``import ijson`` -uses the first available backend in the same order of the list above. - - -Acknowledgements -================ - -ijson was originally developed and actively maintained until 2016 -by `Ivan Sagalaev `_. -In 2019 he -`handed over `_ -the maintenance of the project and the PyPI ownership. - -Python parser in ijson is relatively simple thanks to `Douglas Crockford -`_ who invented a strict, easy to parse syntax. - -The `YAJL `_ library by `Lloyd Hilaiel -`_ is the most popular and efficient way to parse JSON in an -iterative fashion. - -Ijson was inspired by `yajl-py `_ wrapper by -`Hatem Nassrat `_. Though ijson borrows almost nothing -from the actual yajl-py code it was used as an example of integration with yajl -using ctypes. diff --git a/script.module.ijson/addon.xml b/script.module.ijson/addon.xml index e1c241d4f..63d21cb3a 100644 --- a/script.module.ijson/addon.xml +++ b/script.module.ijson/addon.xml @@ -1,23 +1,18 @@ - - - - - - - Iterative JSON parser with a standard Python iterator interface - Packed for KODI from https://github.com/isagalaev/ijson - BSD-3-Clause - https://pypi.org/project/ijson/ - https://github.com/ICRAR/ijson - - icon.png - - + + + + + + + Iterative JSON parser with standard Python iterator interfaces + Iterative JSON parser with standard Python iterator interfaces + BSD-3-Clause + all + https://pypi.org/project/ijson/ + https://github.com/ICRAR/ijson + + resources/icon.png + + diff --git a/script.module.ijson/lib/ijson/__init__.py b/script.module.ijson/lib/ijson/__init__.py index cf8053b49..0f3ae03e4 100644 --- a/script.module.ijson/lib/ijson/__init__.py +++ b/script.module.ijson/lib/ijson/__init__.py @@ -13,15 +13,23 @@ also two other backends using the C library yajl in ``ijson.backends`` that have the same API and are faster under CPython. ''' -from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder +from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder, compat +from ijson.utils import coroutine, sendable_list from .version import __version__ -def _default_backend(): +def get_backend(backend): + """Import the backend named ``backend``""" import importlib - for backend in ('yajl2_c', 'yajl2_cffi', 'yajl2', 'python'): + return importlib.import_module('ijson.backends.' + backend) + +def _default_backend(): + import os + if 'IJSON_BACKEND' in os.environ: + return get_backend(os.environ['IJSON_BACKEND']) + for backend in ('yajl2_c', 'yajl2_cffi', 'yajl2', 'yajl', 'python'): try: - return importlib.import_module('ijson.backends.' + backend) + return get_backend(backend) except ImportError: continue raise ImportError('no backends available') @@ -29,6 +37,16 @@ def _default_backend(): del _default_backend basic_parse = backend.basic_parse +basic_parse_coro = backend.basic_parse_coro parse = backend.parse +parse_coro = backend.parse_coro items = backend.items -del backend \ No newline at end of file +items_coro = backend.items_coro +kvitems = backend.kvitems +kvitems_coro = backend.kvitems_coro +if compat.IS_PY35: + basic_parse_async = backend.basic_parse_async + parse_async = backend.parse_async + items_async = backend.items_async + kvitems_async = backend.kvitems_async +backend = backend.backend \ No newline at end of file diff --git a/script.module.ijson/lib/ijson/backends/__init__.py b/script.module.ijson/lib/ijson/backends/__init__.py index de0e936dc..f1b13d800 100644 --- a/script.module.ijson/lib/ijson/backends/__init__.py +++ b/script.module.ijson/lib/ijson/backends/__init__.py @@ -1,3 +1,4 @@ +import os import warnings class YAJLImportError(ImportError): @@ -31,7 +32,7 @@ def find_yajl_ctypes(required): # Example of such environment is Google App Engine (GAE). from ctypes import util, cdll - so_name = util.find_library('yajl') + so_name = os.getenv('YAJL_DLL') or util.find_library('yajl') if so_name is None: raise YAJLImportError('YAJL shared object not found.') try: @@ -47,7 +48,7 @@ def find_yajl_cffi(ffi, required): version (1, 2, ...) using cffi. ''' try: - yajl = ffi.dlopen('yajl') + yajl = ffi.dlopen(os.getenv('YAJL_DLL') or 'yajl') except OSError: raise YAJLImportError('Unable to load YAJL.') require_version(get_yajl_version(yajl), required) diff --git a/script.module.ijson/lib/ijson/backends/_yajl2_ctypes_common.py b/script.module.ijson/lib/ijson/backends/_yajl2_ctypes_common.py new file mode 100644 index 000000000..ae90b1d8d --- /dev/null +++ b/script.module.ijson/lib/ijson/backends/_yajl2_ctypes_common.py @@ -0,0 +1,73 @@ +''' +Common ctypes routines for yajl library handling +''' + +from ctypes import Structure, c_uint, c_char, c_ubyte, c_int, c_long, c_longlong, c_double,\ + c_void_p, c_char_p, CFUNCTYPE, POINTER, string_at, cast + +from ijson import common, backends +from ijson.compat import b2s + + +C_EMPTY = CFUNCTYPE(c_int, c_void_p) +C_INT = CFUNCTYPE(c_int, c_void_p, c_int) +C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) +C_LONGLONG = CFUNCTYPE(c_int, c_void_p, c_longlong) +C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) +C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) + + +def _get_callback_data(yajl_version): + return [ + # Mapping of JSON parser events to callback C types and value converters. + # Used to define the Callbacks structure and actual callback functions + # inside the parse function. + ('null', 'null', C_EMPTY, lambda: None), + ('boolean', 'boolean', C_INT, lambda v: bool(v)), + ('integer', 'number', C_LONG if yajl_version == 1 else C_LONGLONG, lambda v: int(v)), + ('double', 'number', C_DOUBLE, lambda v: v), + ('number', 'number', C_STR, lambda v, l: common.integer_or_decimal(b2s(string_at(v, l)))), + ('string', 'string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), + ('start_map', 'start_map', C_EMPTY, lambda: None), + ('map_key', 'map_key', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), + ('end_map', 'end_map', C_EMPTY, lambda: None), + ('start_array', 'start_array', C_EMPTY, lambda: None), + ('end_array', 'end_array', C_EMPTY, lambda: None), + ] + + +YAJL_OK = 0 +YAJL_CANCELLED = 1 +YAJL_INSUFFICIENT_DATA = 2 +YAJL_ERROR = 3 + + +def get_yajl(version): + yajl = backends.find_yajl_ctypes(version) + yajl.yajl_alloc.restype = POINTER(c_char) + yajl.yajl_get_error.restype = POINTER(c_char) + return yajl + +def _callback(send, use_float, field, event, func_type, func): + if use_float and field == 'number': + return func_type() + def c_callback(_context, *args): + send((event, func(*args))) + return 1 + return func_type(c_callback) + +def make_callbaks(send, use_float, yajl_version): + callback_data = _get_callback_data(yajl_version) + class Callbacks(Structure): + _fields_ = [(name, type) for name, _, type, _ in callback_data] + return Callbacks(*[_callback(send, use_float, *data) for data in callback_data]) + +def yajl_get_error(yajl, handle, buffer): + perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) + error = cast(perror, c_char_p).value + try: + error = error.decode('utf-8') + except UnicodeDecodeError: + pass + yajl.yajl_free_error(handle, perror) + return error diff --git a/script.module.ijson/lib/ijson/backends/python.py b/script.module.ijson/lib/ijson/backends/python.py index ba7ab242a..8efda7955 100644 --- a/script.module.ijson/lib/ijson/backends/python.py +++ b/script.module.ijson/lib/ijson/backends/python.py @@ -1,17 +1,16 @@ ''' Pure-python parsing backend. ''' -from __future__ import unicode_literals -import re -from codecs import getreader from json.decoder import scanstring +import re -from ijson import common -from ijson.compat import bytetype +from ijson import common, utils +import codecs -BUFSIZE = 16 * 1024 LEXEME_RE = re.compile(r'[a-z0-9eE\.\+-]+|\S') +UNARY_LEXEMES = set('[]{},') +EOF = -1, None class UnexpectedSymbol(common.JSONError): @@ -21,12 +20,48 @@ def __init__(self, symbol, pos): ) -def Lexer(f, buf_size=BUFSIZE): - if type(f.read(0)) == bytetype: - f = getreader('utf-8')(f) - buf = f.read(buf_size) +@utils.coroutine +def utf8_encoder(target): + decoder = codecs.getincrementaldecoder('utf-8')() + decode = decoder.decode + send = target.send + while True: + try: + final = False + bdata = (yield) + except GeneratorExit: + final = True + bdata = b'' + try: + sdata = decode(bdata, final) + except UnicodeDecodeError as e: + try: + target.close() + except: + pass + raise common.IncompleteJSONError(e) + if sdata: + send(sdata) + elif not bdata: + target.close() + break + +@utils.coroutine +def Lexer(target): + """ + Parses lexemes out of the incoming content, and sends them to parse_value. + A special EOF result is sent when the data source has been exhausted to + give parse_value the possibility of raising custom exceptions due to missing + content. + """ + try: + data = (yield) + except GeneratorExit: + data = '' + buf = data pos = 0 discarded = 0 + send = target.send while True: match = LEXEME_RE.search(buf, pos) if match: @@ -45,107 +80,199 @@ def Lexer(f, buf_size=BUFSIZE): else: break except ValueError: - data = f.read(buf_size) + try: + data = (yield) + except GeneratorExit: + data = '' if not data: raise common.IncompleteJSONError('Incomplete string lexeme') buf += data - yield discarded + pos, buf[pos:end + 1] + send((discarded + pos, buf[pos:end + 1])) pos = end + 1 else: - while match.end() == len(buf): - data = f.read(buf_size) + while lexeme not in UNARY_LEXEMES and match.end() == len(buf): + try: + data = (yield) + except GeneratorExit: + data = '' if not data: break buf += data match = LEXEME_RE.search(buf, pos) lexeme = match.group() - yield discarded + match.start(), lexeme + send((discarded + match.start(), lexeme)) pos = match.end() else: - data = f.read(buf_size) + # Don't ask data from an already exhausted source + if data: + try: + data = (yield) + except GeneratorExit: + data = '' if not data: + # Normally should raise StopIteration, but can raise + # IncompleteJSONError too, which is the point of sending EOF + try: + target.send(EOF) + except StopIteration: + pass break discarded += len(buf) buf = data pos = 0 -def parse_value(lexer, symbol=None, pos=0): - try: - if symbol is None: - pos, symbol = next(lexer) - if symbol == 'null': - yield ('null', None) - elif symbol == 'true': - yield ('boolean', True) - elif symbol == 'false': - yield ('boolean', False) - elif symbol == '[': - for event in parse_array(lexer): - yield event - elif symbol == '{': - for event in parse_object(lexer): - yield event - elif symbol[0] == '"': - yield ('string', parse_string(symbol)) - else: - try: - yield ('number', common.number(symbol)) - except: - raise UnexpectedSymbol(symbol, pos) - except StopIteration: - raise common.IncompleteJSONError('Incomplete JSON data') +# Parsing states +_PARSE_VALUE = 0 +_PARSE_ARRAY_ELEMENT_END = 1 +_PARSE_OBJECT_KEY = 2 +_PARSE_OBJECT_END = 3 +# infinity singleton for overflow checks +inf = float("inf") -def parse_string(symbol): - return scanstring(symbol, 1)[0] +@utils.coroutine +def parse_value(target, multivalue, use_float): + """ + Parses results coming out of the Lexer into ijson events, which are sent to + `target`. A stack keeps track of the type of object being parsed at the time + (a value, and object or array -- the last two being values themselves). + A special EOF result coming from the Lexer indicates that no more content is + expected. This is used to check for incomplete content and raise the + appropriate exception, which wouldn't be possible if the Lexer simply closed + this co-routine (either explicitly via .close(), or implicitly by itself + finishing and decreasing the only reference to the co-routine) since that + causes a GeneratorExit exception that cannot be replaced with a custom one. + """ -def parse_array(lexer): - yield ('start_array', None) - try: - pos, symbol = next(lexer) - if symbol != ']': - while True: - for event in parse_value(lexer, symbol, pos): - yield event - pos, symbol = next(lexer) - if symbol == ']': - break - if symbol != ',': - raise UnexpectedSymbol(symbol, pos) - pos, symbol = next(lexer) - yield ('end_array', None) - except StopIteration: - raise common.IncompleteJSONError('Incomplete JSON data') + state_stack = [_PARSE_VALUE] + pop = state_stack.pop + push = state_stack.append + send = target.send + prev_pos, prev_symbol = None, None + to_number = common.integer_or_float if use_float else common.integer_or_decimal + while True: + if prev_pos is None: + pos, symbol = (yield) + if (pos, symbol) == EOF: + if state_stack: + raise common.IncompleteJSONError('Incomplete JSON content') + break + else: + pos, symbol = prev_pos, prev_symbol + prev_pos, prev_symbol = None, None + try: + state = state_stack[-1] + except IndexError: + if multivalue: + state = _PARSE_VALUE + push(state) + else: + raise common.JSONError('Additional data found') + assert state_stack -def parse_object(lexer): - yield ('start_map', None) - try: - pos, symbol = next(lexer) - if symbol != '}': - while True: - if symbol[0] != '"': - raise UnexpectedSymbol(symbol, pos) - yield ('map_key', parse_string(symbol)) - pos, symbol = next(lexer) - if symbol != ':': - raise UnexpectedSymbol(symbol, pos) - for event in parse_value(lexer, None, pos): - yield event - pos, symbol = next(lexer) + if state == _PARSE_VALUE: + # Simple, common cases + if symbol == 'null': + send(('null', None)) + pop() + elif symbol == 'true': + send(('boolean', True)) + pop() + elif symbol == 'false': + send(('boolean', False)) + pop() + elif symbol[0] == '"': + send(('string', parse_string(symbol))) + pop() + # Array start + elif symbol == '[': + send(('start_array', None)) + pos, symbol = (yield) + if (pos, symbol) == EOF: + raise common.IncompleteJSONError('Incomplete JSON content') + if symbol == ']': + send(('end_array', None)) + pop() + else: + prev_pos, prev_symbol = pos, symbol + push(_PARSE_ARRAY_ELEMENT_END) + push(_PARSE_VALUE) + # Object start + elif symbol == '{': + send(('start_map', None)) + pos, symbol = (yield) + if (pos, symbol) == EOF: + raise common.IncompleteJSONError('Incomplete JSON content') if symbol == '}': - break - if symbol != ',': + send(('end_map', None)) + pop() + else: + prev_pos, prev_symbol = pos, symbol + push(_PARSE_OBJECT_KEY) + # A number + else: + # JSON numbers can't contain leading zeros + if ((len(symbol) > 1 and symbol[0] == '0' and symbol[1] not in ('e', 'E', '.')) or + (len(symbol) > 2 and symbol[0:2] == '-0' and symbol[2] not in ('e', 'E', '.'))): + raise common.JSONError('Invalid JSON number: %s' % (symbol,)) + # Fractions need a leading digit and must be followed by a digit + if symbol[0] == '.' or symbol[-1] == '.': + raise common.JSONError('Invalid JSON number: %s' % (symbol,)) + try: + number = to_number(symbol) + if number == inf: + raise common.JSONError("float overflow: %s" % (symbol,)) + except: + if 'true'.startswith(symbol) or 'false'.startswith(symbol) or 'null'.startswith(symbol): + raise common.IncompleteJSONError('Incomplete JSON content') raise UnexpectedSymbol(symbol, pos) - pos, symbol = next(lexer) - yield ('end_map', None) - except StopIteration: - raise common.IncompleteJSONError('Incomplete JSON data') + else: + send(('number', number)) + pop() + + elif state == _PARSE_OBJECT_KEY: + if symbol[0] != '"': + raise UnexpectedSymbol(symbol, pos) + send(('map_key', parse_string(symbol))) + pos, symbol = (yield) + if (pos, symbol) == EOF: + raise common.IncompleteJSONError('Incomplete JSON content') + if symbol != ':': + raise UnexpectedSymbol(symbol, pos) + state_stack[-1] = _PARSE_OBJECT_END + push(_PARSE_VALUE) + + elif state == _PARSE_OBJECT_END: + if symbol == ',': + state_stack[-1] = _PARSE_OBJECT_KEY + elif symbol != '}': + raise UnexpectedSymbol(symbol, pos) + else: + send(('end_map', None)) + pop() + pop() + elif state == _PARSE_ARRAY_ELEMENT_END: + if symbol == ',': + state_stack[-1] = _PARSE_ARRAY_ELEMENT_END + push(_PARSE_VALUE) + elif symbol != ']': + raise UnexpectedSymbol(symbol, pos) + else: + send(('end_array', None)) + pop() + pop() -def basic_parse(file, buf_size=BUFSIZE, multiple_values=False): + +def parse_string(symbol): + return scanstring(symbol, 1)[0] + + +def basic_parse_basecoro(target, multiple_values=False, allow_comments=False, + use_float=False): ''' Iterator yielding unprefixed events. @@ -153,30 +280,9 @@ def basic_parse(file, buf_size=BUFSIZE, multiple_values=False): - file: a readable file-like object with JSON input ''' - lexer = iter(Lexer(file, buf_size)) - symbol = None - pos = 0 - while True: - for value in parse_value(lexer, symbol, pos): - yield value - try: - pos, symbol = next(lexer) - except StopIteration: - break - else: - if not multiple_values: - raise common.JSONError('Additional data') + if allow_comments: + raise ValueError("Comments are not supported by the python backend") + return utf8_encoder(Lexer(parse_value(target, multiple_values, use_float))) -def parse(file, **kwargs): - ''' - Backend-specific wrapper for ijson.common.parse. - ''' - return common.parse(basic_parse(file, **kwargs)) - - -def items(file, prefix, map_type=None, **kwargs): - ''' - Backend-specific wrapper for ijson.common.items. - ''' - return common.items(parse(file, **kwargs), prefix, map_type=map_type) +common.enrich_backend(globals()) diff --git a/script.module.ijson/lib/ijson/backends/yajl.py b/script.module.ijson/lib/ijson/backends/yajl.py index eb923a365..050e1aab3 100644 --- a/script.module.ijson/lib/ijson/backends/yajl.py +++ b/script.module.ijson/lib/ijson/backends/yajl.py @@ -2,46 +2,13 @@ Wrapper for YAJL C library version 1.x. ''' -from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, c_char, \ - c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast +from ctypes import Structure, c_uint, byref -from ijson import common, backends -from ijson.compat import b2s +from ijson import common, utils +from ijson.backends import _yajl2_ctypes_common -yajl = backends.find_yajl_ctypes(1) - -yajl.yajl_alloc.restype = POINTER(c_char) -yajl.yajl_get_error.restype = POINTER(c_char) - -C_EMPTY = CFUNCTYPE(c_int, c_void_p) -C_INT = CFUNCTYPE(c_int, c_void_p, c_int) -C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) -C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) -C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) - - -_callback_data = [ - # Mapping of JSON parser events to callback C types and value converters. - # Used to define the Callbacks structure and actual callback functions - # inside the parse function. - ('null', C_EMPTY, lambda: None), - ('boolean', C_INT, lambda v: bool(v)), - # "integer" and "double" aren't actually yielded by yajl since "number" - # takes precedence if defined - ('integer', C_LONG, lambda v, l: int(string_at(v, l))), - ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), - ('number', C_STR, lambda v, l: common.number(b2s(string_at(v, l)))), - ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), - ('start_map', C_EMPTY, lambda: None), - ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), - ('end_map', C_EMPTY, lambda: None), - ('start_array', C_EMPTY, lambda: None), - ('end_array', C_EMPTY, lambda: None), -] - -class Callbacks(Structure): - _fields_ = [(name, type) for name, type, func in _callback_data] +yajl = _yajl2_ctypes_common.get_yajl(1) class Config(Structure): _fields_ = [ @@ -49,13 +16,10 @@ class Config(Structure): ("checkUTF8", c_uint) ] -YAJL_OK = 0 -YAJL_CANCELLED = 1 -YAJL_INSUFFICIENT_DATA = 2 -YAJL_ERROR = 3 - -def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): +@utils.coroutine +def basic_parse_basecoro(target, allow_comments=False, multiple_values=False, + use_float=False): ''' Iterator yielding unprefixed events. @@ -66,49 +30,30 @@ def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): - check_utf8: if True, parser will cause an error if input is invalid utf-8 - buf_size: a size of an input buffer ''' - events = [] - - def callback(event, func_type, func): - def c_callback(context, *args): - events.append((event, func(*args))) - return 1 - return func_type(c_callback) - - callbacks = Callbacks(*[callback(*data) for data in _callback_data]) - config = Config(allow_comments, check_utf8) + if multiple_values: + raise ValueError("yajl backend doesn't support multiple_values") + callbacks = _yajl2_ctypes_common.make_callbaks(target.send, use_float, 1) + config = Config(allow_comments, True) handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) try: while True: - buffer = f.read(buf_size) + try: + buffer = (yield) + except GeneratorExit: + buffer = b'' if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_parse_complete(handle) - if result == YAJL_ERROR: - perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) - error = cast(perror, c_char_p).value - yajl.yajl_free_error(handle, perror) - exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError + if result == _yajl2_ctypes_common.YAJL_ERROR: + error = _yajl2_ctypes_common.yajl_get_error(yajl, handle, buffer) raise common.JSONError(error) - if not buffer and not events: - if result == YAJL_INSUFFICIENT_DATA: + elif not buffer: + if result == _yajl2_ctypes_common.YAJL_INSUFFICIENT_DATA: raise common.IncompleteJSONError('Incomplete JSON data') break - - for event in events: - yield event - events = [] finally: yajl.yajl_free(handle) -def parse(file, **kwargs): - ''' - Backend-specific wrapper for ijson.common.parse. - ''' - return common.parse(basic_parse(file, **kwargs)) -def items(file, prefix, map_type=None, **kwargs): - ''' - Backend-specific wrapper for ijson.common.items. - ''' - return common.items(parse(file, **kwargs), prefix, map_type=map_type) +common.enrich_backend(globals()) diff --git a/script.module.ijson/lib/ijson/backends/yajl2.py b/script.module.ijson/lib/ijson/backends/yajl2.py index 7bede7c3c..8f19bf106 100644 --- a/script.module.ijson/lib/ijson/backends/yajl2.py +++ b/script.module.ijson/lib/ijson/backends/yajl2.py @@ -2,59 +2,22 @@ Wrapper for YAJL C library version 2.x. ''' -from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, c_char, \ - c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast +from ctypes import byref -from ijson import common, backends -from ijson.compat import b2s +from ijson import common, utils +from ijson.backends import _yajl2_ctypes_common -yajl = backends.find_yajl_ctypes(2) - -yajl.yajl_alloc.restype = POINTER(c_char) -yajl.yajl_get_error.restype = POINTER(c_char) - -C_EMPTY = CFUNCTYPE(c_int, c_void_p) -C_INT = CFUNCTYPE(c_int, c_void_p, c_int) -C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) -C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) -C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) - - -_callback_data = [ - # Mapping of JSON parser events to callback C types and value converters. - # Used to define the Callbacks structure and actual callback functions - # inside the parse function. - ('null', C_EMPTY, lambda: None), - ('boolean', C_INT, lambda v: bool(v)), - # "integer" and "double" aren't actually yielded by yajl since "number" - # takes precedence if defined - ('integer', C_LONG, lambda v, l: int(string_at(v, l))), - ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), - ('number', C_STR, lambda v, l: common.number(b2s(string_at(v, l)))), - ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), - ('start_map', C_EMPTY, lambda: None), - ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), - ('end_map', C_EMPTY, lambda: None), - ('start_array', C_EMPTY, lambda: None), - ('end_array', C_EMPTY, lambda: None), -] - -class Callbacks(Structure): - _fields_ = [(name, type) for name, type, func in _callback_data] - -YAJL_OK = 0 -YAJL_CANCELLED = 1 -YAJL_INSUFFICIENT_DATA = 2 -YAJL_ERROR = 3 +yajl = _yajl2_ctypes_common.get_yajl(2) # constants defined in yajl_parse.h YAJL_ALLOW_COMMENTS = 1 YAJL_MULTIPLE_VALUES = 8 -def basic_parse(f, allow_comments=False, buf_size=64 * 1024, - multiple_values=False): +@utils.coroutine +def basic_parse_basecoro(target, allow_comments=False, multiple_values=False, + use_float=False): ''' Iterator yielding unprefixed events. @@ -65,15 +28,7 @@ def basic_parse(f, allow_comments=False, buf_size=64 * 1024, - buf_size: a size of an input buffer - multiple_values: allows the parser to parse multiple JSON objects ''' - events = [] - - def callback(event, func_type, func): - def c_callback(context, *args): - events.append((event, func(*args))) - return 1 - return func_type(c_callback) - - callbacks = Callbacks(*[callback(*data) for data in _callback_data]) + callbacks = _yajl2_ctypes_common.make_callbaks(target.send, use_float, 2) handle = yajl.yajl_alloc(byref(callbacks), None, None) if allow_comments: yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1) @@ -81,34 +36,22 @@ def c_callback(context, *args): yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1) try: while True: - buffer = f.read(buf_size) + try: + buffer = (yield) + except GeneratorExit: + buffer = b'' if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_complete_parse(handle) - if result != YAJL_OK: - perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) - error = cast(perror, c_char_p).value - yajl.yajl_free_error(handle, perror) - exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError - raise exception(error.decode('utf-8')) - if not buffer and not events: + if result != _yajl2_ctypes_common.YAJL_OK: + error = _yajl2_ctypes_common.yajl_get_error(yajl, handle, buffer) + exception = common.IncompleteJSONError if result == _yajl2_ctypes_common.YAJL_INSUFFICIENT_DATA else common.JSONError + raise exception(error) + if not buffer: break - - for event in events: - yield event - events = [] finally: yajl.yajl_free(handle) -def parse(file, **kwargs): - ''' - Backend-specific wrapper for ijson.common.parse. - ''' - return common.parse(basic_parse(file, **kwargs)) -def items(file, prefix, map_type=None, **kwargs): - ''' - Backend-specific wrapper for ijson.common.items. - ''' - return common.items(parse(file, **kwargs), prefix, map_type=map_type) +common.enrich_backend(globals()) diff --git a/script.module.ijson/lib/ijson/backends/yajl2_c.py b/script.module.ijson/lib/ijson/backends/yajl2_c.py index 1ae9029cf..6511870d9 100644 --- a/script.module.ijson/lib/ijson/backends/yajl2_c.py +++ b/script.module.ijson/lib/ijson/backends/yajl2_c.py @@ -8,16 +8,63 @@ ''' Wrapper for _yajl2 C extension module ''' -import decimal -from ijson import common -from . import _yajl2 # @UnresolvedImport +from ijson import common, compat, utils +from . import _yajl2 -def basic_parse(file, **kwargs): - return _yajl2.basic_parse(file.read, decimal.Decimal, common.JSONError, common.IncompleteJSONError, **kwargs) -def parse(file, **kwargs): - return _yajl2.parse(file.read, decimal.Decimal, common.JSONError, common.IncompleteJSONError, **kwargs) +_get_buf_size = lambda kwargs: kwargs.pop('buf_size', 64 * 1024) -def items(file, prefix, map_type=None, **kwargs): - return _yajl2.items(prefix, file.read, decimal.Decimal, common.JSONError, common.IncompleteJSONError, map_type, **kwargs) +@utils.coroutine +def basic_parse_basecoro(target, **kwargs): + return _yajl2.basic_parse_basecoro(target.send, **kwargs) + +def basic_parse_gen(file, **kwargs): + f = compat.bytes_reader(file) + buf_size = _get_buf_size(kwargs) + return _yajl2.basic_parse(f, buf_size, **kwargs) + +def basic_parse_async(file, **kwargs): + buf_size = _get_buf_size(kwargs) + return _yajl2.basic_parse_async(file, buf_size, **kwargs) + +@utils.coroutine +def parse_basecoro(target, **kwargs): + return _yajl2.parse_basecoro(target.send, **kwargs) + +def parse_gen(file, **kwargs): + f = compat.bytes_reader(file) + buf_size = _get_buf_size(kwargs) + return _yajl2.parse(f, buf_size, **kwargs) + +def parse_async(file, **kwargs): + buf_size = _get_buf_size(kwargs) + return _yajl2.parse_async(file, buf_size, **kwargs) + +@utils.coroutine +def kvitems_basecoro(target, prefix, map_type=None, **kwargs): + return _yajl2.kvitems_basecoro(target.send, prefix, map_type, **kwargs) + +def kvitems_gen(file, prefix, map_type=None, **kwargs): + f = compat.bytes_reader(file) + buf_size = _get_buf_size(kwargs) + return _yajl2.kvitems(f, buf_size, prefix, map_type, **kwargs) + +def kvitems_async(file, prefix, map_type=None, **kwargs): + buf_size = _get_buf_size(kwargs) + return _yajl2.kvitems_async(file, buf_size, prefix, map_type, **kwargs) + +@utils.coroutine +def items_basecoro(target, prefix, map_type=None, **kwargs): + return _yajl2.items_basecoro(target.send, prefix, map_type, **kwargs) + +def items_gen(file, prefix, map_type=None, **kwargs): + f = compat.bytes_reader(file) + buf_size = _get_buf_size(kwargs) + return _yajl2.items(f, buf_size, prefix, map_type, **kwargs) + +def items_async(file, prefix, map_type=None, **kwargs): + buf_size = _get_buf_size(kwargs) + return _yajl2.items_async(file, buf_size, prefix, map_type, **kwargs) + +common.enrich_backend(globals()) diff --git a/script.module.ijson/lib/ijson/backends/yajl2_cffi.py b/script.module.ijson/lib/ijson/backends/yajl2_cffi.py index 8a03b4266..6b8689bc1 100644 --- a/script.module.ijson/lib/ijson/backends/yajl2_cffi.py +++ b/script.module.ijson/lib/ijson/backends/yajl2_cffi.py @@ -4,9 +4,8 @@ from cffi import FFI import functools -import sys -from ijson import common, backends +from ijson import common, backends, utils from ijson.compat import b2s @@ -79,8 +78,8 @@ def wrapper(func): @functools.wraps(func) def wrapped(ctx, *args, **kwargs): value = func(*args, **kwargs) - ctx = ffi.from_handle(ctx) - ctx.append((event, value)) + send = ffi.from_handle(ctx) + send((event, value)) return 1 return wrapped return wrapper @@ -99,21 +98,21 @@ def boolean(val): @ffi.callback('int(void *ctx, long long integerVal)') -@append_event_to_ctx('integer') +@append_event_to_ctx('number') def integer(val): return int(val) -@ffi.callback('int(void *ctx, double doubleVal)') -@append_event_to_ctx('double') +@ffi.callback('int(void * ctx, double doubleVal)') +@append_event_to_ctx('number') def double(val): - return float(val) + return val @ffi.callback('int(void *ctx, const char *numberVal, size_t numberLen)') @append_event_to_ctx('number') def number(val, length): - return common.number(b2s(ffi.string(val, maxlen=length))) + return common.integer_or_decimal(b2s(ffi.string(val, maxlen=length))) @ffi.callback('int(void *ctx, const unsigned char *stringVal, size_t stringLen)') @@ -131,7 +130,7 @@ def start_map(): @ffi.callback('int(void *ctx, const unsigned char *key, size_t stringLen)') @append_event_to_ctx('map_key') def map_key(key, length): - return b2s(ffi.string(key, maxlen=length)) + return ffi.string(key, maxlen=length).decode('utf-8') @ffi.callback('int(void *ctx)') @@ -152,18 +151,23 @@ def end_array(): return None -_callback_data = ( - # For more information about callbacks, - # take a look at the ctypes backend - null, boolean, integer, double, number, string, +_decimal_callback_data = ( + null, boolean, ffi.NULL, ffi.NULL, number, string, start_map, map_key, end_map, start_array, end_array ) +_float_callback_data = ( + null, boolean, integer, double, ffi.NULL, string, + start_map, map_key, end_map, start_array, end_array +) -_asd = list() -def yajl_init(scope, events, allow_comments=False, multiple_values=False): - scope.ctx = ffi.new_handle(events) - scope.callbacks = ffi.new('yajl_callbacks*', _callback_data) + +def yajl_init(scope, send, allow_comments=False, multiple_values=False, use_float=False): + scope.ctx = ffi.new_handle(send) + if use_float: + scope.callbacks = ffi.new('yajl_callbacks*', _float_callback_data) + else: + scope.callbacks = ffi.new('yajl_callbacks*', _decimal_callback_data) handle = yajl.yajl_alloc(scope.callbacks, ffi.NULL, scope.ctx) if allow_comments: @@ -182,7 +186,11 @@ def yajl_parse(handle, buffer): if result != YAJL_OK: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) - error = b2s(ffi.string(perror)) + error = ffi.string(perror) + try: + error = error.decode('utf8') + except UnicodeDecodeError: + pass yajl.yajl_free_error(handle, perror) exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError raise exception(error) @@ -192,52 +200,33 @@ class Container(object): pass -def basic_parse(f, buf_size=64*1024, **config): +@utils.coroutine +def basic_parse_basecoro(target, **config): ''' - Iterator yielding unprefixed events. + Coroutine dispatching unprefixed events. Parameters: - - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - - buf_size: a size of an input buffer - multiple_values: allows the parser to parse multiple JSON objects ''' # the scope objects makes sure the C objects allocated in _yajl.init # are kept alive until this function is done scope = Container() - events = [] - handle = yajl_init(scope, events, **config) + handle = yajl_init(scope, target.send, **config) try: while True: - buffer = f.read(buf_size) - # this calls the callbacks which will - # fill the events list + try: + buffer = (yield) + except GeneratorExit: + buffer = b'' yajl_parse(handle, buffer) - - if not buffer and not events: + if not buffer: break - - for event in events: - yield event - - # clear all events, but don't replace the - # the events list instance - del events[:] finally: yajl.yajl_free(handle) -def parse(file, **kwargs): - ''' - Backend-specific wrapper for ijson.common.parse. - ''' - return common.parse(basic_parse(file, **kwargs)) - -def items(file, prefix, map_type=None, **kwargs): - ''' - Backend-specific wrapper for ijson.common.items. - ''' - return common.items(parse(file, **kwargs), prefix, map_type=map_type) +common.enrich_backend(globals()) diff --git a/script.module.ijson/lib/ijson/common.py b/script.module.ijson/lib/ijson/common.py index 13e753509..620c9568b 100644 --- a/script.module.ijson/lib/ijson/common.py +++ b/script.module.ijson/lib/ijson/common.py @@ -2,6 +2,10 @@ Backend independent higher level interfaces, common exceptions. ''' import decimal +import inspect +import warnings + +from ijson import compat, utils class JSONError(Exception): @@ -18,10 +22,12 @@ class IncompleteJSONError(JSONError): pass -def parse(basic_events): +@utils.coroutine +def parse_basecoro(target): ''' - An iterator returning parsing events with the information about their location - with the JSON object tree. Events are tuples ``(prefix, type, value)``. + A coroutine dispatching parsing events with the information about their + location with the JSON object tree. Events are tuples + ``(prefix, type, value)``. Available types and values are: @@ -62,7 +68,8 @@ def parse(basic_events): ''' path = [] - for event, value in basic_events: + while True: + event, value = yield if event == 'map_key': prefix = '.'.join(path[:-1]) path[-1] = value @@ -80,8 +87,7 @@ def parse(basic_events): prefix = '.'.join(path) else: # any scalar value prefix = '.'.join(path) - - yield prefix, event, value + target.send((prefix, event, value)) class ObjectBuilder(object): @@ -93,15 +99,16 @@ class ObjectBuilder(object): Example:: - from StringIO import StringIO - from ijson.parse import basic_parse - from ijson.utils import ObjectBuilder + >>> from ijson import basic_parse + >>> from ijson.common import ObjectBuilder + >>> from ijson.compat import BytesIO - builder = ObjectBuilder() - f = StringIO('{"key": "value"}) - for event, value in basic_parse(f): - builder.event(event, value) - print builder.value + >>> builder = ObjectBuilder() + >>> f = BytesIO(b'{"key": "value"}') + >>> for event, value in basic_parse(f): + ... builder.event(event, value) + >>> builder.value == {'key': 'value'} + True ''' def __init__(self, map_type=None): @@ -128,31 +135,62 @@ def setter(value): else: self.containers[-1](value) -def items(prefixed_events, prefix, map_type=None): + +@utils.coroutine +def items_basecoro(target, prefix, map_type=None): ''' - An iterator returning native Python objects constructed from the events + An couroutine dispatching native Python objects constructed from the events under a given prefix. ''' - prefixed_events = iter(prefixed_events) - try: - while True: - current, event, value = next(prefixed_events) - if current == prefix: - if event in ('start_map', 'start_array'): - builder = ObjectBuilder(map_type=map_type) - end_event = event.replace('start', 'end') - while (current, event) != (prefix, end_event): - builder.event(event, value) - current, event, value = next(prefixed_events) - del builder.containers[:] - yield builder.value - else: - yield value - except StopIteration: - pass + while True: + current, event, value = (yield) + if current == prefix: + if event in ('start_map', 'start_array'): + object_depth = 1 + builder = ObjectBuilder(map_type=map_type) + while object_depth: + builder.event(event, value) + current, event, value = (yield) + if event in ('start_map', 'start_array'): + object_depth += 1 + elif event in ('end_map', 'end_array'): + object_depth -= 1 + del builder.containers[:] + target.send(builder.value) + else: + target.send(value) -def number(str_value): +@utils.coroutine +def kvitems_basecoro(target, prefix, map_type=None): + ''' + An coroutine dispatching (key, value) pairs constructed from the events + under a given prefix. The prefix should point to JSON objects + ''' + builder = None + while True: + path, event, value = (yield) + while path == prefix and event == 'map_key': + object_depth = 0 + key = value + builder = ObjectBuilder(map_type=map_type) + path, event, value = (yield) + if event == 'start_map': + object_depth += 1 + while ( + (event != 'map_key' or object_depth != 0) and + (event != 'end_map' or object_depth != -1)): + builder.event(event, value) + path, event, value = (yield) + if event == 'start_map': + object_depth += 1 + elif event == 'end_map': + object_depth -= 1 + del builder.containers[:] + target.send((key, builder.value)) + + +def integer_or_decimal(str_value): ''' Converts string with a numeric value into an int or a Decimal. Used in different backends for consistent number representation. @@ -160,3 +198,292 @@ def number(str_value): if not ('.' in str_value or 'e' in str_value or 'E' in str_value): return int(str_value) return decimal.Decimal(str_value) + +def integer_or_float(str_value): + ''' + Converts string with a numeric value into an int or a float. + Used in different backends for consistent number representation. + ''' + if not ('.' in str_value or 'e' in str_value or 'E' in str_value): + return int(str_value) + return float(str_value) + +def number(str_value): + warnings.warn("number() function will be removed in a later release", DeprecationWarning) + return integer_or_decimal(str_value) + +def file_source(f, buf_size=64*1024): + '''A generator that yields data from a file-like object''' + f = compat.bytes_reader(f) + while True: + data = f.read(buf_size) + yield data + if not data: + break + + +def _basic_parse_pipeline(backend, config): + return ( + (backend['basic_parse_basecoro'], [], config), + ) + + +def _parse_pipeline(backend, config): + return ( + (backend['parse_basecoro'], [], {}), + (backend['basic_parse_basecoro'], [], config) + ) + + +def _items_pipeline(backend, prefix, map_type, config): + return ( + (backend['items_basecoro'], (prefix,), {'map_type': map_type}), + (backend['parse_basecoro'], [], {}), + (backend['basic_parse_basecoro'], [], config) + ) + + +def _kvitems_pipeline(backend, prefix, map_type, config): + return ( + (backend['kvitems_basecoro'], (prefix,), {'map_type': map_type}), + (backend['parse_basecoro'], [], {}), + (backend['basic_parse_basecoro'], [], config) + ) + + +def _make_basic_parse_coro(backend): + def basic_parse_coro(target, **config): + return utils.chain( + target, + *_basic_parse_pipeline(backend, config) + ) + return basic_parse_coro + + +def _make_parse_coro(backend): + def parse_coro(target, **config): + return utils.chain( + target, + *_parse_pipeline(backend, config) + ) + return parse_coro + + +def _make_items_coro(backend): + def items_coro(target, prefix, map_type=None, **config): + return utils.chain( + target, + *_items_pipeline(backend, prefix, map_type, config) + ) + return items_coro + + +def _make_kvitems_coro(backend): + def kvitems_coro(target, prefix, map_type=None, **config): + return utils.chain( + target, + *_kvitems_pipeline(backend, prefix, map_type, config) + ) + return kvitems_coro + + +def is_awaitablefunction(func): + """True if `func` is an awaitable function""" + return ( + inspect.iscoroutinefunction(func) or ( + inspect.isgeneratorfunction(func) and + (func.__code__.co_flags & inspect.CO_ITERABLE_COROUTINE) + ) + ) + +def is_async_file(f): + """True if `f` has an asynchronous `read` method""" + return ( + compat.IS_PY35 and hasattr(f, 'read') and + is_awaitablefunction(f.read) + ) + +def is_file(x): + """True if x has a `read` method""" + return hasattr(x, 'read') + + +def is_iterable(x): + """True if x can be iterated over""" + return hasattr(x, '__iter__') + + +def _get_source(source): + if isinstance(source, compat.bytetype): + return compat.BytesIO(source) + elif isinstance(source, compat.texttype): + return compat.StringIO(source) + return source + + +def _make_basic_parse_gen(backend): + def basic_parse_gen(file_obj, buf_size=64*1024, **config): + return utils.coros2gen( + file_source(file_obj, buf_size=buf_size), + *_basic_parse_pipeline(backend, config) + ) + return basic_parse_gen + + +def _make_parse_gen(backend): + def parse_gen(file_obj, buf_size=64*1024, **config): + return utils.coros2gen( + file_source(file_obj, buf_size=buf_size), + *_parse_pipeline(backend, config) + ) + return parse_gen + + +def _make_items_gen(backend): + def items_gen(file_obj, prefix, map_type=None, buf_size=64*1024, **config): + return utils.coros2gen( + file_source(file_obj, buf_size=buf_size), + *_items_pipeline(backend, prefix, map_type, config) + ) + return items_gen + + +def _make_kvitems_gen(backend): + def kvitems_gen(file_obj, prefix, map_type=None, buf_size=64*1024, **config): + return utils.coros2gen( + file_source(file_obj, buf_size=buf_size), + *_kvitems_pipeline(backend, prefix, map_type, config) + ) + return kvitems_gen + + +def _make_basic_parse(backend): + def basic_parse(source, buf_size=64*1024, **config): + source = _get_source(source) + if is_async_file(source): + return backend['basic_parse_async']( + source, buf_size=buf_size, **config + ) + elif is_file(source): + return backend['basic_parse_gen']( + source, buf_size=buf_size, **config + ) + raise ValueError("Unknown source type: %r" % type(source)) + return basic_parse + + +def _make_parse(backend): + def parse(source, buf_size=64*1024, **config): + source = _get_source(source) + if is_async_file(source): + return backend['parse_async']( + source, buf_size=buf_size, **config + ) + elif is_file(source): + return backend['parse_gen']( + source, buf_size=buf_size, **config + ) + elif is_iterable(source): + return utils.coros2gen(source, + (parse_basecoro, (), {}) + ) + raise ValueError("Unknown source type: %r" % type(source)) + return parse + + +def _make_items(backend): + def items(source, prefix, map_type=None, buf_size=64*1024, **config): + source = _get_source(source) + if is_async_file(source): + return backend['items_async']( + source, prefix, map_type=map_type, buf_size=buf_size, **config + ) + elif is_file(source): + return backend['items_gen']( + source, prefix, map_type=map_type, buf_size=buf_size, **config + ) + elif is_iterable(source): + return utils.coros2gen(source, + (backend['items_basecoro'], (prefix,), {'map_type': map_type}) + ) + raise ValueError("Unknown source type: %r" % type(source)) + return items + + +def _make_kvitems(backend): + def kvitems(source, prefix, map_type=None, buf_size=64*1024, **config): + source = _get_source(source) + if is_async_file(source): + return backend['kvitems_async']( + source, prefix, map_type=map_type, buf_size=buf_size, **config + ) + elif is_file(source): + return backend['kvitems_gen']( + source, prefix, map_type=map_type, buf_size=buf_size, **config + ) + elif is_iterable(source): + return utils.coros2gen(source, + (backend['kvitems_basecoro'], (prefix,), {'map_type': map_type}) + ) + raise ValueError("Unknown source type: %r" % type(source)) + return kvitems + + +_common_functions_warn = ''' +Don't use the ijson.common.* functions; instead go directly with the ijson.* ones. +See the documentation for more information. +''' + +def parse(events): + """Like ijson.parse, but takes events generated via ijson.basic_parse instead + of a file""" + warnings.warn(_common_functions_warn, DeprecationWarning) + return utils.coros2gen(events, + (parse_basecoro, (), {}) + ) + + +def kvitems(events, prefix, map_type=None): + """Like ijson.kvitems, but takes events generated via ijson.parse instead of + a file""" + warnings.warn(_common_functions_warn, DeprecationWarning) + return utils.coros2gen(events, + (kvitems_basecoro, (prefix,), {'map_type': map_type}) + ) + + +def items(events, prefix, map_type=None): + """Like ijson.items, but takes events generated via ijson.parse instead of + a file""" + warnings.warn(_common_functions_warn, DeprecationWarning) + return utils.coros2gen(events, + (items_basecoro, (prefix,), {'map_type': map_type}) + ) + + +def enrich_backend(backend): + ''' + Provides a backend with any missing coroutines/generators/async-iterables + it might be missing by using the generic ones written in python. + ''' + backend['backend'] = backend['__name__'].split('.')[-1] + for name in ('basic_parse', 'parse', 'items', 'kvitems'): + basecoro_name = name + '_basecoro' + if basecoro_name not in backend: + backend[basecoro_name] = globals()[basecoro_name] + coro_name = name + '_coro' + if coro_name not in backend: + factory = globals()['_make_' + coro_name] + backend[coro_name] = factory(backend) + gen_name = name + '_gen' + if gen_name not in backend: + factory = globals()['_make_' + gen_name] + backend[gen_name] = factory(backend) + if compat.IS_PY35: + from . import utils35 + async_name = name + '_async' + if async_name not in backend: + factory = getattr(utils35, '_make_' + async_name) + backend[async_name] = factory(backend) + factory = globals()['_make_' + name] + backend[name] = factory(backend) \ No newline at end of file diff --git a/script.module.ijson/lib/ijson/compat.py b/script.module.ijson/lib/ijson/compat.py index 50963a99d..4204224a2 100644 --- a/script.module.ijson/lib/ijson/compat.py +++ b/script.module.ijson/lib/ijson/compat.py @@ -3,14 +3,53 @@ ''' import sys +import warnings IS_PY2 = sys.version_info[0] < 3 +IS_PY35 = sys.version_info[0:2] >= (3, 5) if IS_PY2: b2s = lambda s: s bytetype = str + texttype = unicode + from StringIO import StringIO + BytesIO = StringIO else: b2s = lambda b: b.decode('utf-8') bytetype = bytes + texttype = str + from io import BytesIO, StringIO + +class utf8reader(object): + """Takes a utf8-encoded string reader and reads bytes out of it""" + + def __init__(self, str_reader): + self.str_reader = str_reader + + def read(self, n): + return self.str_reader.read(n).encode('utf-8') + +_str_vs_bytes_warning = ''' +ijson works by reading bytes, but a string reader has been given instead. This +probably, but not necessarily, means a file-like object has been opened in text +mode ('t') rather than binary mode ('b'). + +An automatic conversion is being performed on the fly to continue, but on the +other hand this creates unnecessary encoding/decoding operations that decrease +the efficiency of the system. In the future this automatic conversion will be +removed, and users will receive errors instead of this warning. To avoid this +problem make sure file-like objects are opened in binary mode instead of text +mode. +''' + +def _warn_and_return(o): + warnings.warn(_str_vs_bytes_warning, DeprecationWarning) + return o + +def bytes_reader(f): + """Returns a file-like object that reads bytes""" + if type(f.read(0)) == bytetype: + return f + return _warn_and_return(utf8reader(f)) \ No newline at end of file diff --git a/script.module.ijson/lib/ijson/dump.py b/script.module.ijson/lib/ijson/dump.py new file mode 100644 index 000000000..f450301fd --- /dev/null +++ b/script.module.ijson/lib/ijson/dump.py @@ -0,0 +1,57 @@ +'''Dumping command-line utility''' + +import argparse +import sys + +import ijson +from . import compat + + +HEADERS = { + 'basic_parse': 'name, value', + 'parse': 'path, name, value', + 'kvitems': 'key, value', + 'items': 'value', +} + +def to_string(o): + if isinstance(o, compat.texttype) and compat.IS_PY2: + o = o.encode('utf8') + if isinstance(o, compat.bytetype): + return compat.b2s(o) + return str(o) + +def dump(): + parser = argparse.ArgumentParser(description='Dump ijson events') + parser.add_argument('-m', '--method', choices=['basic_parse', 'parse', 'kvitems', 'items'], + help='The method to use for dumping', default='basic_parse') + parser.add_argument('-p', '--prefix', help='Prefix (used with -M items|kvitems)', default='') + parser.add_argument('-M', '--multiple-values', help='Allow multiple values', action='store_true') + args = parser.parse_args() + + method = getattr(ijson, args.method) + method_args = () + method_kwargs = {} + if args.method in ('items', 'kvitems'): + method_args = args.prefix, + if args.multiple_values: + method_kwargs['multiple_values'] = True + header = '#: ' + HEADERS[args.method] + print(header) + print('-' * len(header)) + + # Use the raw bytes stream in stdin if possible + stdin = sys.stdin + if hasattr(stdin, 'buffer'): + stdin = stdin.buffer + + enumerated_results = enumerate(method(stdin, *method_args, **method_kwargs)) + if args.method == 'items': + for i, result in enumerated_results: + print('%i: %s' % (i, result)) + else: + for i, result in enumerated_results: + print('%i: %s' % (i, ', '.join(to_string(bit) for bit in result))) + +if __name__ == '__main__': + dump() \ No newline at end of file diff --git a/script.module.ijson/lib/ijson/utils.py b/script.module.ijson/lib/ijson/utils.py index 75092b500..d54a9f3e4 100644 --- a/script.module.ijson/lib/ijson/utils.py +++ b/script.module.ijson/lib/ijson/utils.py @@ -4,7 +4,7 @@ def coroutine(func): ''' - Wraps a generator which intended to be used as a pure coroutine by + Wraps a generator which is intended to be used as a pure coroutine by .send()ing it values. The only thing that the wrapper does is calling .next() for the first time which is required by Python generator protocol. ''' @@ -15,41 +15,53 @@ def wrapper(*args, **kwargs): return g return wrapper -@coroutine -def foreach(coroutine_func): - ''' - Dispatches each JSON array item to a handler coroutine. A coroutine is - created anew for each item by calling `coroutine_func` callable. The - resulting coroutine should accept value in the form of tuple of values - generated by rich JSON parser: (prefix, event, value). - - First event received by foreach should be a "start_array" event. - ''' - g = None - base, event, value = yield - if event != 'start_array': - raise Exception('foreach requires "start_array" as the first event, got %s' % repr((base, event, value))) - START_EVENTS = set(['start_map', 'start_array', 'null', 'boolean', 'number', 'string']) - itemprefix = base + '.item' if base else 'item' - while True: - prefix, event, value = yield - if prefix == itemprefix and event in START_EVENTS: - g = coroutine_func() - if (prefix, event) != (base, 'end_array'): - g.send((prefix, event, value)) - -@coroutine -def dispatcher(targets): - ''' - Dispatches JSON parser events into several handlers depending on event - prefixes. - - Accepts a list of tuples (base_prefix, coroutine). A coroutine then - receives all the events with prefixes starting with its base_prefix. - ''' - while True: - prefix, event, value = yield - for base, target in targets: - if prefix.startswith(base): - target.send((prefix, event, value)) - break + +def chain(sink, *coro_pipeline): + ''' + Chains together a sink and a number of coroutines to form a coroutine + pipeline. The pipeline works by calling send() on the coroutine created with + the information in `coro_pipeline[-1]`, which sends its results to the + coroutine created from `coro_pipeline[-2]`, and so on, until the final + result is sent to `sink`. + ''' + f = sink + for coro_func, coro_args, coro_kwargs in coro_pipeline: + f = coro_func(f, *coro_args, **coro_kwargs) + return f + + +class sendable_list(list): + ''' + A list that mimics a coroutine receiving values. + + Coroutine are sent values via their send() method. This class defines such a + method so that values sent into it are appended into the list, which can be + inspected later. As such, this type can be used as an "accumulating sink" in + a pipeline consisting on many coroutines. + ''' + send = list.append + + +def coros2gen(source, *coro_pipeline): + ''' + A utility function that returns a generator yielding values dispatched by a + coroutine pipeline after *it* has received values coming from `source`. + ''' + events = sendable_list() + f = chain(events, *coro_pipeline) + try: + for value in source: + try: + f.send(value) + except StopIteration: + for event in events: + yield event + return + for event in events: + yield event + del events[:] + except GeneratorExit: + try: + f.close() + except: + pass \ No newline at end of file diff --git a/script.module.ijson/lib/ijson/utils35.py b/script.module.ijson/lib/ijson/utils35.py new file mode 100644 index 000000000..811f49453 --- /dev/null +++ b/script.module.ijson/lib/ijson/utils35.py @@ -0,0 +1,92 @@ +''' +Python3.5+ specific utilities +''' +import collections + +from ijson import utils, common, compat + + +class utf8reader_async(compat.utf8reader): + """ + Takes a utf8-encoded string asynchronous reader and asynchronously reads + bytes out of it + """ + async def read(self, n): + data = await self.str_reader.read(n) + return data.encode('utf-8') + +async def _get_read(f): + """Returns an awaitable read function that reads the requested type""" + if type(await f.read(0)) == compat.bytetype: + return f.read + return compat._warn_and_return(utf8reader_async(f).read) + +class sendable_deque(collections.deque): + '''Like utils.sendable_list, but for deque objects''' + send = collections.deque.append + +class async_iterable(object): + ''' + A utility class that implements an async iterator returning values + dispatched by a coroutine pipeline after *it* has received values coming + from an async file-like object. + ''' + + def __init__(self, f, buf_size, *coro_pipeline): + self.events = sendable_deque() + self.coro = utils.chain(self.events, *coro_pipeline) + self.coro_finished = False + self.f = f + self.buf_size = buf_size + self.read = None + + def __aiter__(self): + return self + + async def __anext__(self): + if not self.read: + self.read = await _get_read(self.f) + if self.events: + return self.events.popleft() + if self.coro_finished: + raise StopAsyncIteration + while True: + data = await self.read(self.buf_size) + try: + self.coro.send(data) + if self.events: + return self.events.popleft() + except StopIteration: + self.coro_finished = True + if self.events: + return self.events.popleft() + raise StopAsyncIteration + + +def _make_basic_parse_async(backend): + def basic_parse_async(f, buf_size=64*1024, **config): + return async_iterable(f, buf_size, + *common._basic_parse_pipeline(backend, config) + ) + return basic_parse_async + +def _make_parse_async(backend): + def parse_async(f, buf_size=64*1024, **config): + return async_iterable(f, buf_size, + *common._parse_pipeline(backend, config) + ) + return parse_async + +def _make_items_async(backend): + def items_async(f, prefix, map_type=None, buf_size=64*1024, **config): + return async_iterable(f, buf_size, + *common._items_pipeline(backend, prefix, map_type, config) + ) + return items_async + +def _make_kvitems_async(backend): + def kvitems_async(f, prefix, map_type=None, buf_size=64*1024, **config): + return async_iterable(f, buf_size, + *common._kvitems_pipeline(backend, prefix, map_type, config) + ) + return kvitems_async \ No newline at end of file diff --git a/script.module.ijson/lib/ijson/version.py b/script.module.ijson/lib/ijson/version.py index 52d6ff394..29e4a9413 100644 --- a/script.module.ijson/lib/ijson/version.py +++ b/script.module.ijson/lib/ijson/version.py @@ -1 +1 @@ -__version__ = '2.5' \ No newline at end of file +__version__ = '3.2.2' diff --git a/script.module.ijson/icon.png b/script.module.ijson/resources/icon.png similarity index 100% rename from script.module.ijson/icon.png rename to script.module.ijson/resources/icon.png