From 8df4f8ed584bbfe9d3e4a5dd9088013f58bbc235 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 17:56:37 +0000 Subject: [PATCH 01/43] SRV-439 - performance optimizations for string handling in xml formatting A number of small perf optimizations: * use 'translate' to translate all xml characters at once instead of doing multiple string translation passes. * construct strings inline using writelines instead of doing it through a function, in order to save function call cost --- llsd/serde_xml.py | 348 ++++++++++++++++++++++++++++++++++------------ tests/bench.py | 69 +++++++++ 2 files changed, 325 insertions(+), 92 deletions(-) diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 7dfeaa2..3c57678 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -1,11 +1,13 @@ import base64 +import binascii +from collections import deque import io import re -import types +import uuid from llsd.base import (_LLSD, ALL_CHARS, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER, LLSDParseError, LLSDSerializationError, UnicodeType, - _format_datestr, _str_to_bytes, _to_python, is_unicode) + _format_datestr, _str_to_bytes, is_unicode, PY2, uri, binary, _parse_datestr) from llsd.fastest_elementtree import ElementTreeError, fromstring, parse as _parse INVALID_XML_BYTES = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c'\ @@ -14,7 +16,22 @@ INVALID_XML_RE = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f]') +XML_ESC_TRANS = {} +if not PY2: + XML_ESC_TRANS = str.maketrans({'&': '&', + '<':'<', + '>':'>', + u'\uffff':None, # cannot be parsed + u'\ufffe':None}) # cannot be parsed + + for x in INVALID_XML_BYTES: + XML_ESC_TRANS[x] = None + + def remove_invalid_xml_bytes(b): + """ + Remove characters that aren't allowed in xml. + """ try: # Dropping chars that cannot be parsed later on. The # translate() function was benchmarked to be the fastest way @@ -25,6 +42,24 @@ def remove_invalid_xml_bytes(b): # unit tests) return INVALID_XML_RE.sub('', b) +# only python2, which is not covered by coverage tests +def xml_esc(v): # pragma: no cover + "Escape string or unicode object v for xml output" + + # Use is_unicode() instead of is_string() because in python 2, str is + # bytes, not unicode, and should not be "encode()"d. Attempts to + # encode("utf-8") a bytes type will result in an implicit + # decode("ascii") that will throw a UnicodeDecodeError if the string + # contains non-ascii characters. + if is_unicode(v): + # we need to drop these invalid characters because they + # cannot be parsed (and encode() doesn't drop them for us) + v = v.replace(u'\uffff', u'') + v = v.replace(u'\ufffe', u'') + v = v.encode('utf-8') + v = remove_invalid_xml_bytes(v) + return v.replace(b'&',b'&').replace(b'<',b'<').replace(b'>',b'>') + class LLSDXMLFormatter(LLSDBaseFormatter): """ @@ -37,75 +72,78 @@ class LLSDXMLFormatter(LLSDBaseFormatter): this class since the module level format_xml() is the most convenient interface to this functionality. """ - def _elt(self, name, contents=None): - """ - Serialize a single element. - If 'contents' is omitted, write . - If 'contents' is bytes, write contents. - If 'contents' is str, write contents.encode('utf8'). - """ - if not contents: - self.stream.writelines([b"<", name, b" />"]) - else: - self.stream.writelines([b"<", name, b">", - _str_to_bytes(contents), - b""]) - - def xml_esc(self, v): - "Escape string or unicode object v for xml output" - - # Use is_unicode() instead of is_string() because in python 2, str is - # bytes, not unicode, and should not be "encode()"d. Attempts to - # encode("utf-8") a bytes type will result in an implicit - # decode("ascii") that will throw a UnicodeDecodeError if the string - # contains non-ascii characters. - if is_unicode(v): - # we need to drop these invalid characters because they - # cannot be parsed (and encode() doesn't drop them for us) - v = v.replace(u'\uffff', u'') - v = v.replace(u'\ufffe', u'') - v = v.encode('utf-8') - v = remove_invalid_xml_bytes(v) - return v.replace(b'&',b'&').replace(b'<',b'<').replace(b'>',b'>') + def __init__(self, indent_atom = None): + "Construct a serializer." + # Call the super class constructor so that we have the type map + super(LLSDXMLFormatter, self).__init__() + self._indent_atom = b'' + self._eol = b'' + self._indent_level = 0 + + def _indent(self): + pass def _LLSD(self, v): return self._generate(v.thing) def _UNDEF(self, _v): - return self._elt(b'undef') + self.stream.writelines([b'', self._eol]) def _BOOLEAN(self, v): if v: - return self._elt(b'boolean', b'true') - else: - return self._elt(b'boolean', b'false') + return self.stream.writelines([b'true', self._eol]) + self.stream.writelines([b'false', self._eol]) def _INTEGER(self, v): - return self._elt(b'integer', str(v)) + self.stream.writelines([b'', str(v).encode('utf-8'), b'', self._eol]) def _REAL(self, v): - return self._elt(b'real', repr(v)) + self.stream.writelines([b'', str(v).encode('utf-8'), b'', self._eol]) def _UUID(self, v): if v.int == 0: - return self._elt(b'uuid') - else: - return self._elt(b'uuid', str(v)) + return self.stream.writelines([b'', self._eol]) + self.stream.writelines([b'', str(v).encode('utf-8'), b'', self._eol]) def _BINARY(self, v): - return self._elt(b'binary', base64.b64encode(v).strip()) + self.stream.writelines([b'', base64.b64encode(v).strip(), b'', self._eol]) def _STRING(self, v): - return self._elt(b'string', self.xml_esc(v)) + # We don't simply have a function that encapsulates the PY2 vs PY3 calls, + # as that results in another function call and is slightly less performant + if PY2: # pragma: no cover + return self.stream.writelines([b'', _str_to_bytes(xml_esc(v)), b'', self._eol]) + self.stream.writelines([b'', v.translate(XML_ESC_TRANS).encode('utf-8'), b'', self._eol]) def _URI(self, v): - return self._elt(b'uri', self.xml_esc(str(v))) + # We don't simply have a function that encapsulates the PY2 vs PY3 calls, + # as that results in another function call and is slightly less performant + if PY2: # pragma: no cover + return self.stream.writelines([b'', _str_to_bytes(xml_esc(v)), b'', self._eol]) + self.stream.writelines([b'', str(v).translate(XML_ESC_TRANS).encode('utf-8'), b'', self._eol]) def _DATE(self, v): - return self._elt(b'date', _format_datestr(v)) + self.stream.writelines([b'', _format_datestr(v), b'', self._eol]) def _ARRAY(self, v): - self.stream.write(b'') + self.stream.writelines([b'', self._eol]) + self._indent_level = self._indent_level + 1 for item in v: + self._indent() self._generate(item) - self.stream.write(b'') + self._indent_level = self._indent_level - 1 + self.stream.writelines([b'', self._eol]) def _MAP(self, v): - self.stream.write(b'') + self.stream.writelines([b'', self._eol]) + self._indent_level = self._indent_level + 1 for key, value in v.items(): - self._elt(b'key', self.xml_esc(UnicodeType(key))) + self._indent() + if PY2: # pragma: no cover + self.stream.writelines([b'', + xml_esc(UnicodeType(key)), + b'', + self._eol]) + else: + self.stream.writelines([b'', + UnicodeType(key).translate(XML_ESC_TRANS).encode('utf-8'), + b'', + self._eol]) + self._indent() self._generate(value) - self.stream.write(b'') + self._indent_level = self._indent_level - 1 + self._indent() + self.stream.writelines([b'', self._eol]) def _generate(self, something): "Generate xml from a single python object." @@ -121,11 +159,10 @@ def _generate(self, something): def _write(self, something): """ Serialize a python object to self.stream as application/llsd+xml. - :param something: A python object (typically a dict) to be serialized. """ - self.stream.write(b'' - b'') + self.stream.writelines([b'', self._eol, + b'', self._eol]) self._generate(something) self.stream.write(b'') @@ -154,49 +191,12 @@ def __init__(self, indent_atom = None): self._indent_atom = b' ' else: self._indent_atom = indent_atom + self._eol = b'\n' def _indent(self): "Write an indentation based on the atom and indentation level." self.stream.writelines([self._indent_atom] * self._indent_level) - def _ARRAY(self, v): - "Recursively format an array with pretty turned on." - self.stream.write(b'\n') - self._indent_level += 1 - for item in v: - self._indent() - self._generate(item) - self.stream.write(b'\n') - self._indent_level -= 1 - self._indent() - self.stream.write(b'') - - def _MAP(self, v): - "Recursively format a map with pretty turned on." - self.stream.write(b'\n') - self._indent_level += 1 - # sorted list of keys - for key in sorted(v): - self._indent() - self._elt(b'key', UnicodeType(key)) - self.stream.write(b'\n') - self._indent() - self._generate(v[key]) - self.stream.write(b'\n') - self._indent_level -= 1 - self._indent() - self.stream.write(b'') - - def _write(self, something): - """ - Serialize a python object to self.stream as 'pretty' application/llsd+xml. - - :param something: a python object (typically a dict) to be serialized. - """ - self.stream.write(b'\n') - self._generate(something) - self.stream.write(b'\n') - def format_pretty_xml(something): """ @@ -237,6 +237,168 @@ def write_pretty_xml(stream, something): return LLSDXMLPrettyFormatter().write(stream, something) +class LLSDXMLParser: + def __init__(self): + "Construct an xml node parser." + + self.NODE_HANDLERS = { + "undef": lambda x: None, + "boolean": self._bool_to_python, + "integer": self._int_to_python, + "real": self._real_to_python, + "uuid": self._uuid_to_python, + "string": self._str_to_python, + "binary": self._bin_to_python, + "date": self._date_to_python, + "uri": self._uri_to_python, + "map": self._map_to_python, + "array": self._array_to_python, + } + + self.parse_stack = deque([]) + + def _bool_to_python(self, node): + "Convert boolean node to a python object." + val = node.text or '' + try: + # string value, accept 'true' or 'True' or whatever + return (val.lower() in ('true', '1', '1.0')) + except AttributeError: + # not a string (no lower() method), use normal Python rules + return bool(val) + + def _int_to_python(self, node): + "Convert integer node to a python object." + val = node.text or '' + if not val.strip(): + return 0 + return int(val) + + def _real_to_python(self, node): + "Convert floating point node to a python object." + val = node.text or '' + if not val.strip(): + return 0.0 + return float(val) + + def _uuid_to_python(self, node): + "Convert uuid node to a python object." + if node.text: + return uuid.UUID(hex=node.text) + return uuid.UUID(int=0) + + def _str_to_python(self, node): + "Convert string node to a python object." + return node.text or '' + + def _bin_to_python(self, node): + base = node.get('encoding') or 'base64' + try: + if base == 'base16': + # parse base16 encoded data + return binary(base64.b16decode(node.text or '')) + if base == 'base64': + # parse base64 encoded data + return binary(base64.b64decode(node.text or '')) + raise LLSDParseError("Parser doesn't support %s encoding" % base) + + except binascii.Error as exc: + # convert exception class so it's more catchable + raise LLSDParseError("Encoded binary data: " + str(exc)) + except TypeError as exc: + # convert exception class so it's more catchable + raise LLSDParseError("Bad binary data: " + str(exc)) + + def _date_to_python(self, node): + "Convert date node to a python object." + val = node.text or '' + if not val: + val = "1970-01-01T00:00:00Z" + return _parse_datestr(val) + + def _uri_to_python(self, node): + "Convert uri node to a python object." + val = node.text or '' + return uri(val) + + def _map_to_python(self, node): + "Convert map node to a python object." + new_result = {} + new_stack_entry = [iter(node), node, new_result] + self.parse_stack.appendleft(new_stack_entry) + return new_result + + def _array_to_python(self, node): + "Convert array node to a python object." + new_result = [] + new_stack_entry = [iter(node), node, new_result] + self.parse_stack.appendleft(new_stack_entry) + return new_result + + def parse_node(self, something): + """ + Parse an ElementTree tree + This parser is iterative instead of recursive. It uses + Each element in parse_stack is an iterator into either the list + or the dict in the tree. This limits depth by size of free memory + instead of size of the function call stack, allowing us to render + deeper trees than a recursive model. + :param something: The xml node to parse. + :returns: Returns a python object. + """ + + # if the passed in element is not a map or array, simply return + # its value. Otherwise, create a dict or array to receive + # child/leaf elements. + if something.tag == "map": + cur_result = {} + elif something.tag == "array": + cur_result = [] + else: + if something.tag not in self.NODE_HANDLERS: + raise LLSDParseError("Unknown value type %s" % something.tag) + return self.NODE_HANDLERS[something.tag](something) + + # start by pushing the current element iterator data onto + # the stack + # 0 - iterator indicating the current position in the given level of the tree + # this can be either a list iterator position, or an iterator of + # keys for the dict. + # 1 - the actual element object. + # 2 - the result for this level in the tree, onto which + # children or leafs will be appended/set + self.parse_stack.appendleft([iter(something), something, cur_result]) + while True: + node_iter, iterable, cur_result = self.parse_stack[0] + try: + value = next(node_iter) + + except StopIteration: + node_iter, iterable, cur_result = self.parse_stack.popleft() + if len(self.parse_stack) == 0: + break + else: + if iterable.tag == "map": + if value.tag != "key": + raise LLSDParseError("Expected 'key', got %s" % value.tag) + key = value.text + if key is None: + key = '' + try: + value = next(node_iter) + except StopIteration: + raise LLSDParseError("No value for map item %s" % key) + try: + cur_result[key] = self.NODE_HANDLERS[value.tag](value) + except KeyError as err: + raise LLSDParseError("Unknown value type: " + str(err)) + elif iterable.tag == "array": + try: + cur_result.append(self.NODE_HANDLERS[value.tag](value)) + except KeyError as err: + raise LLSDParseError("Unknown value type: " + str(err)) + return cur_result + def parse_xml(something): """ This is the basic public interface for parsing llsd+xml. @@ -252,6 +414,8 @@ def parse_xml(something): return parse_xml_nohdr(parser) + + def parse_xml_nohdr(baseparser): """ Parse llsd+xml known to be without an header. May still @@ -280,7 +444,7 @@ def parse_xml_nohdr(baseparser): if element.tag != 'llsd': raise LLSDParseError("Invalid XML Declaration") # Extract its contents. - return _to_python(element[0]) + return LLSDXMLParser().parse_node(element[0]) def format_xml(something): diff --git a/tests/bench.py b/tests/bench.py index f907997..707107e 100644 --- a/tests/bench.py +++ b/tests/bench.py @@ -45,6 +45,9 @@ """ _bench_data = llsd.parse_xml(BENCH_DATA_XML) + + + BENCH_DATA_BINARY = llsd.format_binary(_bench_data) BENCH_DATA_NOTATION = llsd.format_notation(_bench_data) @@ -78,6 +81,40 @@ def binary_stream(): f.seek(0) yield f +def build_deep_xml(): + deep_data = {} + curr_data = deep_data + for i in range(250): + curr_data["curr_data"] = {} + curr_data["integer"] = 7 + curr_data["string"] = "string" + curr_data["map"] = { "item1": 2.345, "item2": [1,2,3], "item3": {"item4": llsd.uri("http://foo.bar.com")}} + curr_data = curr_data["curr_data"] + + return deep_data +_deep_bench_data = build_deep_xml() + +def build_wide_xml(): + + wide_xml = b""" +wide_array" +""" + wide_data = {} + for i in range(100000): + wide_data["item"+str(i)] = {"item1":2.345, "item2": [1,2,3], "item3": "string", "item4":{"subitem": llsd.uri("http://foo.bar.com")}} + return wide_data +_wide_bench_data = build_wide_xml() + +def build_wide_array_xml(): + + wide_xml = b""" +wide_array" +""" + wide_data = [] + for i in range(100000): + wide_data.append([2.345,[1,2,3], "string", [llsd.uri("http://foo.bar.com")]]) + return wide_data +_wide_array_bench_data = build_wide_array_xml() def bench_stream(parse, stream): ret = parse(stream) @@ -125,3 +162,35 @@ def test_format_notation(benchmark): def test_format_binary(benchmark): benchmark(llsd.format_binary, _bench_data) + +def test_format_xml_deep(benchmark): + benchmark(llsd.format_xml, _deep_bench_data) + +def test_format_xml_wide(benchmark): + benchmark(llsd.format_xml, _wide_bench_data) + +def test_format_notation_deep(benchmark): + benchmark(llsd.format_notation, _deep_bench_data) + +def test_format_notation_wide(benchmark): + benchmark(llsd.format_notation, _wide_bench_data) + +def test_format_notation_wide_array(benchmark): + benchmark(llsd.format_notation, _wide_array_bench_data) + +def test_format_binary_deep(benchmark): + benchmark(llsd.format_binary, _deep_bench_data) + +def test_format_binary_wide(benchmark): + benchmark(llsd.format_binary, _wide_bench_data) + +def test_format_binary_wide_array(benchmark): + benchmark(llsd.format_binary, _wide_array_bench_data) + +def test_parse_xml_deep(benchmark): + deep_data = llsd.format_xml(_deep_bench_data) + benchmark(llsd.parse_xml, deep_data) + +def test_parse_binary_deep(benchmark): + deep_data = llsd.format_binary(_deep_bench_data) + benchmark(llsd.parse_binary, deep_data) From 7f1b5ae99feea49e24181e63a3807b17d3de559d Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:36:00 +0000 Subject: [PATCH 02/43] Build in python containers, so we can get 2.7 --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8ad9b57..0835c54 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,6 +13,7 @@ jobs: matrix: python-version: ['2.7', '3.7', '3.8', '3.10'] runs-on: [ubuntu-latest] + container: python:${{ matrix.python-version }} env: PYTHON: ${{ matrix.python-version }} steps: From e44367bd1f15beb123e42536f8f98667b5f4ccac Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:37:41 +0000 Subject: [PATCH 03/43] fix syntax error --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0835c54..c450a52 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,7 +13,8 @@ jobs: matrix: python-version: ['2.7', '3.7', '3.8', '3.10'] runs-on: [ubuntu-latest] - container: python:${{ matrix.python-version }} + container: + image: 'python:${{ matrix.python-version }}' env: PYTHON: ${{ matrix.python-version }} steps: From 437db6b78acba7088919bbbdf8888cf16630f6c0 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:42:26 +0000 Subject: [PATCH 04/43] try a different form for containers --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c450a52..18ccddf 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,9 +12,10 @@ jobs: strategy: matrix: python-version: ['2.7', '3.7', '3.8', '3.10'] + container: ["python:2.7", "python:3.7", "python:3.8", "python:3.10"] runs-on: [ubuntu-latest] container: - image: 'python:${{ matrix.python-version }}' + image: ${{ matrix.containert }} env: PYTHON: ${{ matrix.python-version }} steps: From c6ca424f491c65642aff4793679271b32740e909 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:43:16 +0000 Subject: [PATCH 05/43] test --- .github/workflows/ci.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 18ccddf..017d224 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,8 +14,6 @@ jobs: python-version: ['2.7', '3.7', '3.8', '3.10'] container: ["python:2.7", "python:3.7", "python:3.8", "python:3.10"] runs-on: [ubuntu-latest] - container: - image: ${{ matrix.containert }} env: PYTHON: ${{ matrix.python-version }} steps: From 18dfa775559326d7b9980b35a26192a579d5ae9d Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:44:44 +0000 Subject: [PATCH 06/43] Spacing change --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 017d224..39a376f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,6 +14,8 @@ jobs: python-version: ['2.7', '3.7', '3.8', '3.10'] container: ["python:2.7", "python:3.7", "python:3.8", "python:3.10"] runs-on: [ubuntu-latest] + container: + image: ${{ matrix.containert }} env: PYTHON: ${{ matrix.python-version }} steps: From 2059a06f54fe361a385d0c6939f680583eb5a808 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:45:54 +0000 Subject: [PATCH 07/43] Another fix --- .github/workflows/ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 39a376f..d7d697f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,10 +12,9 @@ jobs: strategy: matrix: python-version: ['2.7', '3.7', '3.8', '3.10'] - container: ["python:2.7", "python:3.7", "python:3.8", "python:3.10"] runs-on: [ubuntu-latest] container: - image: ${{ matrix.containert }} + image: "python:${{ matrix.python-version }}" env: PYTHON: ${{ matrix.python-version }} steps: From a7a9ca3043147fc87cc10d8f5af23120ca0d1a5c Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:50:49 +0000 Subject: [PATCH 08/43] Sudo pip installs (in container) --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d7d697f..4935fcb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,8 +28,8 @@ jobs: - name: Install python dependencies run: | - pip install wheel build tox - pip install .[dev] + sudo -H pip install wheel build tox + sudo -H pip install .[dev] - name: Determine pyenv id: pyenv From 58ee3480e2ad0131c0f6d0564a488995f8301502 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 18:56:52 +0000 Subject: [PATCH 09/43] upgrade pip --- .github/workflows/ci.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4935fcb..9cc4cc6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,8 +28,9 @@ jobs: - name: Install python dependencies run: | - sudo -H pip install wheel build tox - sudo -H pip install .[dev] + pip install --upgrade pip + pip install wheel build tox + pip install .[dev] - name: Determine pyenv id: pyenv From a844d052cfe163170c3150d3f80ebd14541c27d6 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:04:02 +0000 Subject: [PATCH 10/43] Update scm version settings --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index db8beb0..2864c11 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,9 @@ setup_requires=["setuptools_scm<6"], use_scm_version={ 'local_scheme': 'no-local-version', # disable local-version to allow uploads to test.pypi.org + 'version_scheme': 'post-release', + 'relative_to': __file__, + 'root': '..', }, extras_require={ "dev": ["pytest", "pytest-benchmark", "pytest-cov<3"], From 5c6f3024b640ed19c0a5d4ee617b85b1e62e24bc Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:08:01 +0000 Subject: [PATCH 11/43] Try alpine container --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9cc4cc6..c624081 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,7 +14,7 @@ jobs: python-version: ['2.7', '3.7', '3.8', '3.10'] runs-on: [ubuntu-latest] container: - image: "python:${{ matrix.python-version }}" + image: "python:${{ matrix.python-version }}-alpine" env: PYTHON: ${{ matrix.python-version }} steps: From 30808b438124d6dcdd79acf0d92775031213559c Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:10:15 +0000 Subject: [PATCH 12/43] Don't upgrade pip --- .github/workflows/ci.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c624081..1b9b27e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,7 +28,6 @@ jobs: - name: Install python dependencies run: | - pip install --upgrade pip pip install wheel build tox pip install .[dev] From 21b1945f4557e82cef44950ce9b8779f5b0a0ce4 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:16:18 +0000 Subject: [PATCH 13/43] Don't use alpine --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1b9b27e..9cc4cc6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,7 +14,7 @@ jobs: python-version: ['2.7', '3.7', '3.8', '3.10'] runs-on: [ubuntu-latest] container: - image: "python:${{ matrix.python-version }}-alpine" + image: "python:${{ matrix.python-version }}" env: PYTHON: ${{ matrix.python-version }} steps: @@ -28,6 +28,7 @@ jobs: - name: Install python dependencies run: | + pip install --upgrade pip pip install wheel build tox pip install .[dev] From f0e00824e24e74d70e91c89431995878d1167f08 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:19:43 +0000 Subject: [PATCH 14/43] Show tags --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9cc4cc6..05eef29 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,6 +28,7 @@ jobs: - name: Install python dependencies run: | + git tag pip install --upgrade pip pip install wheel build tox pip install .[dev] From 67712a26500c97d3bc046cae5be6cc59015dfe12 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:23:11 +0000 Subject: [PATCH 15/43] what user are we running under? --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 05eef29..d03c635 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,6 +28,7 @@ jobs: - name: Install python dependencies run: | + set git tag pip install --upgrade pip pip install wheel build tox From 4c2cfafd97413a7cd64e71596a73933661f36b3c Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:28:54 +0000 Subject: [PATCH 16/43] Try sudo --- .github/workflows/ci.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d03c635..0d3cfad 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,11 +28,11 @@ jobs: - name: Install python dependencies run: | - set - git tag + apt-get update + apt-get -y install sudo pip install --upgrade pip - pip install wheel build tox - pip install .[dev] + sudo -H pip install wheel build tox + sudo -H pip install .[dev] - name: Determine pyenv id: pyenv From ab0c5f67402e8e637f77ad021e8d5d4c502499ce Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:31:12 +0000 Subject: [PATCH 17/43] Do we have a .git --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0d3cfad..b71ff21 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,6 +32,7 @@ jobs: apt-get -y install sudo pip install --upgrade pip sudo -H pip install wheel build tox + ls -la sudo -H pip install .[dev] - name: Determine pyenv From 0f8cb0ed5ba2f65afbe5fc4801320e9ad36e40a9 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:32:50 +0000 Subject: [PATCH 18/43] Possibly don't need action to install python --- .github/workflows/ci.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b71ff21..afe34dd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,10 +22,6 @@ jobs: with: fetch-depth: 0 # fetch all history for setuptools_scm to be able to read tags - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install python dependencies run: | apt-get update From 466bd570c5a230d2886b1836396fc690230685f0 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:34:10 +0000 Subject: [PATCH 19/43] show tags --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index afe34dd..0798d13 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -27,6 +27,7 @@ jobs: apt-get update apt-get -y install sudo pip install --upgrade pip + sudo git tag sudo -H pip install wheel build tox ls -la sudo -H pip install .[dev] From 95671ab31f6523b4a44fa32b5da51d90f9b77348 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:36:24 +0000 Subject: [PATCH 20/43] mark directory as safe --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0798d13..cf9f345 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -27,6 +27,7 @@ jobs: apt-get update apt-get -y install sudo pip install --upgrade pip + git config --global --add safe.directory . sudo git tag sudo -H pip install wheel build tox ls -la From e201ac4fdc71efb57c246980ec1118250e3931de Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:38:02 +0000 Subject: [PATCH 21/43] try direct safe directory --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cf9f345..7d96700 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,8 +26,8 @@ jobs: run: | apt-get update apt-get -y install sudo + git config --global --add safe.directory /__w/python-llsd/python-llsd pip install --upgrade pip - git config --global --add safe.directory . sudo git tag sudo -H pip install wheel build tox ls -la From b573bf8b91dd6c5aa870675666c1f434048c9440 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:42:50 +0000 Subject: [PATCH 22/43] Make all directories safe --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7d96700..f5d953c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,7 +26,7 @@ jobs: run: | apt-get update apt-get -y install sudo - git config --global --add safe.directory /__w/python-llsd/python-llsd + git config --global --add safe.directory '*' pip install --upgrade pip sudo git tag sudo -H pip install wheel build tox From fc959dcd2ddf3212437b77cde2f64c97533fa2ba Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:50:30 +0000 Subject: [PATCH 23/43] more diagnostics --- .github/workflows/ci.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f5d953c..3788113 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,7 +26,10 @@ jobs: run: | apt-get update apt-get -y install sudo + echo "MARKING DIRECTORIES AS SAFE" git config --global --add safe.directory '*' + echo "DONE MARKING DIRECTORIES AS SAFE" + ls -la /home pip install --upgrade pip sudo git tag sudo -H pip install wheel build tox From f288dc615071e78f5d00766d6aebe9d11f10755c Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:54:06 +0000 Subject: [PATCH 24/43] more diagnostics --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3788113..93c26eb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,8 +30,9 @@ jobs: git config --global --add safe.directory '*' echo "DONE MARKING DIRECTORIES AS SAFE" ls -la /home + echo $USER pip install --upgrade pip - sudo git tag + sudo -H git tag sudo -H pip install wheel build tox ls -la sudo -H pip install .[dev] From d01b6adfb50c64dc046fab9d06e3361aec4776d3 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 19:56:05 +0000 Subject: [PATCH 25/43] show current user --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 93c26eb..ba5b389 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,7 +30,7 @@ jobs: git config --global --add safe.directory '*' echo "DONE MARKING DIRECTORIES AS SAFE" ls -la /home - echo $USER + who pip install --upgrade pip sudo -H git tag sudo -H pip install wheel build tox From 308045d4bcb46350c6a6d47422adf5636bf631ed Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:02:45 +0000 Subject: [PATCH 26/43] Try buster --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ba5b389..389d520 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,7 +14,7 @@ jobs: python-version: ['2.7', '3.7', '3.8', '3.10'] runs-on: [ubuntu-latest] container: - image: "python:${{ matrix.python-version }}" + image: "python:${{ matrix.python-version }}-buster" env: PYTHON: ${{ matrix.python-version }} steps: From 8bf587e6a8872b2773a5f6f6a989cf44c3835ce5 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:10:10 +0000 Subject: [PATCH 27/43] try venv --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 389d520..e0e267a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,6 +26,8 @@ jobs: run: | apt-get update apt-get -y install sudo + python -m venv /tmp/venv + source /tmp/venv/bin/activate echo "MARKING DIRECTORIES AS SAFE" git config --global --add safe.directory '*' echo "DONE MARKING DIRECTORIES AS SAFE" From b8745bbd58425bf5e1dc837994af5f0ce037d839 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:11:52 +0000 Subject: [PATCH 28/43] be more assertive --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e0e267a..5575e81 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,8 +26,8 @@ jobs: run: | apt-get update apt-get -y install sudo - python -m venv /tmp/venv - source /tmp/venv/bin/activate + sudo python -m venv /tmp/venv + sudo source /tmp/venv/bin/activate echo "MARKING DIRECTORIES AS SAFE" git config --global --add safe.directory '*' echo "DONE MARKING DIRECTORIES AS SAFE" From 91431ae6f3fe1a7b7382136e035d4993a5506330 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:14:45 +0000 Subject: [PATCH 29/43] Try again --- .github/workflows/ci.yaml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5575e81..c9eff45 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,13 +26,11 @@ jobs: run: | apt-get update apt-get -y install sudo - sudo python -m venv /tmp/venv - sudo source /tmp/venv/bin/activate - echo "MARKING DIRECTORIES AS SAFE" - git config --global --add safe.directory '*' - echo "DONE MARKING DIRECTORIES AS SAFE" + python -m venv /tmp/venv + source /tmp/venv/bin/activate + echo "VIRTUAL ENV: $VIRTUAL_ENV" ls -la /home - who + set pip install --upgrade pip sudo -H git tag sudo -H pip install wheel build tox From 5be2d148f63a0ef8dd7d667c12786b99e9dbc540 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:16:57 +0000 Subject: [PATCH 30/43] venv didn't work --- .github/workflows/ci.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c9eff45..92cfd71 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,9 +26,6 @@ jobs: run: | apt-get update apt-get -y install sudo - python -m venv /tmp/venv - source /tmp/venv/bin/activate - echo "VIRTUAL ENV: $VIRTUAL_ENV" ls -la /home set pip install --upgrade pip From f15fa8ecd82c339dc8905281d9e9c0df1cd35454 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:20:54 +0000 Subject: [PATCH 31/43] show directory --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 92cfd71..9f41e5d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,10 +26,10 @@ jobs: run: | apt-get update apt-get -y install sudo - ls -la /home + ls -la set pip install --upgrade pip - sudo -H git tag + git tag sudo -H pip install wheel build tox ls -la sudo -H pip install .[dev] From 289459c8381bf514770d6c4d06fe09b2064bdbf0 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:25:56 +0000 Subject: [PATCH 32/43] make root of git user root --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9f41e5d..79a0691 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,6 +29,7 @@ jobs: ls -la set pip install --upgrade pip + sudo chown . root git tag sudo -H pip install wheel build tox ls -la From c5ea1f38f9400f815fde09c1ba224ce795472279 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:27:22 +0000 Subject: [PATCH 33/43] oopse --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 79a0691..dcc997a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,7 +29,7 @@ jobs: ls -la set pip install --upgrade pip - sudo chown . root + sudo chown root . git tag sudo -H pip install wheel build tox ls -la From d3c22a3e342fe60d1a0d0def9a0f604621ddb5cb Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:29:21 +0000 Subject: [PATCH 34/43] more diagnostics --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dcc997a..8f8a7a0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -31,6 +31,7 @@ jobs: pip install --upgrade pip sudo chown root . git tag + git status sudo -H pip install wheel build tox ls -la sudo -H pip install .[dev] From c6e1e8cb0009ec5656859127e26197f7c5745ade Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:49:25 +0000 Subject: [PATCH 35/43] fixup setup --- setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.py b/setup.py index 2864c11..db8beb0 100644 --- a/setup.py +++ b/setup.py @@ -20,9 +20,6 @@ setup_requires=["setuptools_scm<6"], use_scm_version={ 'local_scheme': 'no-local-version', # disable local-version to allow uploads to test.pypi.org - 'version_scheme': 'post-release', - 'relative_to': __file__, - 'root': '..', }, extras_require={ "dev": ["pytest", "pytest-benchmark", "pytest-cov<3"], From 1994a95e2e112b06b68d5c0a0ffb6ed0e1dbfb10 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 20:54:19 +0000 Subject: [PATCH 36/43] cleanup --- .github/workflows/ci.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8f8a7a0..acaa97b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,14 +26,9 @@ jobs: run: | apt-get update apt-get -y install sudo - ls -la - set pip install --upgrade pip sudo chown root . - git tag - git status sudo -H pip install wheel build tox - ls -la sudo -H pip install .[dev] - name: Determine pyenv From 00a4c7da9f54d7b4f4aebd1bc5430f15c20ae98c Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Wed, 28 Jun 2023 21:08:49 +0000 Subject: [PATCH 37/43] Remove iterative xml parsing --- llsd/serde_xml.py | 171 +--------------------------------------------- 1 file changed, 2 insertions(+), 169 deletions(-) diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 3c57678..62c9fa6 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -1,13 +1,10 @@ import base64 -import binascii -from collections import deque import io import re -import uuid from llsd.base import (_LLSD, ALL_CHARS, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER, LLSDParseError, LLSDSerializationError, UnicodeType, - _format_datestr, _str_to_bytes, is_unicode, PY2, uri, binary, _parse_datestr) + _format_datestr, _str_to_bytes, _to_python, is_unicode, PY2) from llsd.fastest_elementtree import ElementTreeError, fromstring, parse as _parse INVALID_XML_BYTES = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c'\ @@ -237,168 +234,6 @@ def write_pretty_xml(stream, something): return LLSDXMLPrettyFormatter().write(stream, something) -class LLSDXMLParser: - def __init__(self): - "Construct an xml node parser." - - self.NODE_HANDLERS = { - "undef": lambda x: None, - "boolean": self._bool_to_python, - "integer": self._int_to_python, - "real": self._real_to_python, - "uuid": self._uuid_to_python, - "string": self._str_to_python, - "binary": self._bin_to_python, - "date": self._date_to_python, - "uri": self._uri_to_python, - "map": self._map_to_python, - "array": self._array_to_python, - } - - self.parse_stack = deque([]) - - def _bool_to_python(self, node): - "Convert boolean node to a python object." - val = node.text or '' - try: - # string value, accept 'true' or 'True' or whatever - return (val.lower() in ('true', '1', '1.0')) - except AttributeError: - # not a string (no lower() method), use normal Python rules - return bool(val) - - def _int_to_python(self, node): - "Convert integer node to a python object." - val = node.text or '' - if not val.strip(): - return 0 - return int(val) - - def _real_to_python(self, node): - "Convert floating point node to a python object." - val = node.text or '' - if not val.strip(): - return 0.0 - return float(val) - - def _uuid_to_python(self, node): - "Convert uuid node to a python object." - if node.text: - return uuid.UUID(hex=node.text) - return uuid.UUID(int=0) - - def _str_to_python(self, node): - "Convert string node to a python object." - return node.text or '' - - def _bin_to_python(self, node): - base = node.get('encoding') or 'base64' - try: - if base == 'base16': - # parse base16 encoded data - return binary(base64.b16decode(node.text or '')) - if base == 'base64': - # parse base64 encoded data - return binary(base64.b64decode(node.text or '')) - raise LLSDParseError("Parser doesn't support %s encoding" % base) - - except binascii.Error as exc: - # convert exception class so it's more catchable - raise LLSDParseError("Encoded binary data: " + str(exc)) - except TypeError as exc: - # convert exception class so it's more catchable - raise LLSDParseError("Bad binary data: " + str(exc)) - - def _date_to_python(self, node): - "Convert date node to a python object." - val = node.text or '' - if not val: - val = "1970-01-01T00:00:00Z" - return _parse_datestr(val) - - def _uri_to_python(self, node): - "Convert uri node to a python object." - val = node.text or '' - return uri(val) - - def _map_to_python(self, node): - "Convert map node to a python object." - new_result = {} - new_stack_entry = [iter(node), node, new_result] - self.parse_stack.appendleft(new_stack_entry) - return new_result - - def _array_to_python(self, node): - "Convert array node to a python object." - new_result = [] - new_stack_entry = [iter(node), node, new_result] - self.parse_stack.appendleft(new_stack_entry) - return new_result - - def parse_node(self, something): - """ - Parse an ElementTree tree - This parser is iterative instead of recursive. It uses - Each element in parse_stack is an iterator into either the list - or the dict in the tree. This limits depth by size of free memory - instead of size of the function call stack, allowing us to render - deeper trees than a recursive model. - :param something: The xml node to parse. - :returns: Returns a python object. - """ - - # if the passed in element is not a map or array, simply return - # its value. Otherwise, create a dict or array to receive - # child/leaf elements. - if something.tag == "map": - cur_result = {} - elif something.tag == "array": - cur_result = [] - else: - if something.tag not in self.NODE_HANDLERS: - raise LLSDParseError("Unknown value type %s" % something.tag) - return self.NODE_HANDLERS[something.tag](something) - - # start by pushing the current element iterator data onto - # the stack - # 0 - iterator indicating the current position in the given level of the tree - # this can be either a list iterator position, or an iterator of - # keys for the dict. - # 1 - the actual element object. - # 2 - the result for this level in the tree, onto which - # children or leafs will be appended/set - self.parse_stack.appendleft([iter(something), something, cur_result]) - while True: - node_iter, iterable, cur_result = self.parse_stack[0] - try: - value = next(node_iter) - - except StopIteration: - node_iter, iterable, cur_result = self.parse_stack.popleft() - if len(self.parse_stack) == 0: - break - else: - if iterable.tag == "map": - if value.tag != "key": - raise LLSDParseError("Expected 'key', got %s" % value.tag) - key = value.text - if key is None: - key = '' - try: - value = next(node_iter) - except StopIteration: - raise LLSDParseError("No value for map item %s" % key) - try: - cur_result[key] = self.NODE_HANDLERS[value.tag](value) - except KeyError as err: - raise LLSDParseError("Unknown value type: " + str(err)) - elif iterable.tag == "array": - try: - cur_result.append(self.NODE_HANDLERS[value.tag](value)) - except KeyError as err: - raise LLSDParseError("Unknown value type: " + str(err)) - return cur_result - def parse_xml(something): """ This is the basic public interface for parsing llsd+xml. @@ -414,8 +249,6 @@ def parse_xml(something): return parse_xml_nohdr(parser) - - def parse_xml_nohdr(baseparser): """ Parse llsd+xml known to be without an header. May still @@ -444,7 +277,7 @@ def parse_xml_nohdr(baseparser): if element.tag != 'llsd': raise LLSDParseError("Invalid XML Declaration") # Extract its contents. - return LLSDXMLParser().parse_node(element[0]) + return _to_python(element[0]) def format_xml(something): From 95584f6592691fbdfff393d76b4eff6cdbd98dde Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Thu, 29 Jun 2023 21:23:09 +0000 Subject: [PATCH 38/43] Improve test code coverage --- llsd/serde_xml.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 62c9fa6..7e4b0dc 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -177,17 +177,14 @@ class LLSDXMLPrettyFormatter(LLSDXMLFormatter): This class is not necessarily suited for serializing very large objects. It sorts on dict (llsd map) keys alphabetically to ease human reading. """ - def __init__(self, indent_atom = None): + def __init__(self, indent_atom = b' '): "Construct a pretty serializer." # Call the super class constructor so that we have the type map super(LLSDXMLPrettyFormatter, self).__init__() # Private data used for indentation. self._indent_level = 1 - if indent_atom is None: - self._indent_atom = b' ' - else: - self._indent_atom = indent_atom + self._indent_atom = indent_atom self._eol = b'\n' def _indent(self): From e00171d7bc03f043cff1da3508580119d9272bd8 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Fri, 7 Jul 2023 17:55:04 +0000 Subject: [PATCH 39/43] SL-19707 throw an error if we exceed 200 depth in formatting or parsing --- llsd/base.py | 2 +- llsd/serde_binary.py | 23 +++++++------ llsd/serde_notation.py | 14 +++++++- llsd/serde_xml.py | 18 +++++----- tests/bench.py | 2 +- tests/llsd_test.py | 74 ++++++++++++++++++++++++++++++++++-------- 6 files changed, 97 insertions(+), 36 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index cbeab54..3d0c4f8 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -31,7 +31,7 @@ ALL_CHARS = str(bytearray(range(256))) if PY2 else bytes(range(256)) - +MAX_FORMAT_DEPTH = 200 class _LLSD: __metaclass__ = abc.ABCMeta diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 6f0d93e..54f45c8 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -5,7 +5,7 @@ import uuid from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, BINARY_HEADER, - _str_to_bytes, binary, is_integer, is_string, uri) + MAX_FORMAT_DEPTH,_str_to_bytes, binary, is_integer, is_string, uri) try: @@ -15,7 +15,6 @@ # Python 3: 'range()' is already lazy pass - class LLSDBinaryParser(LLSDBaseParser): """ Parse application/llsd+binary to a python object. @@ -164,15 +163,19 @@ def format_binary(something): def write_binary(stream, something): stream.write(b'\n') - _write_binary_recurse(stream, something) + _write_binary_recurse(stream, something, 0) -def _write_binary_recurse(stream, something): +def _write_binary_recurse(stream, something, depth): "Binary formatter workhorse." + + if depth > MAX_FORMAT_DEPTH: + raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) + if something is None: stream.write(b'!') elif isinstance(something, _LLSD): - _write_binary_recurse(stream, something.thing) + _write_binary_recurse(stream, something.thing, depth) elif isinstance(something, bool): stream.write(b'1' if something else b'0') elif is_integer(something): @@ -202,27 +205,27 @@ def _write_binary_recurse(stream, something): seconds_since_epoch = calendar.timegm(something.timetuple()) stream.writelines([b'd', struct.pack(' MAX_FORMAT_DEPTH: + raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) + t = type(something) handler = self.type_map.get(t) if handler: diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 7e4b0dc..63b342a 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -3,7 +3,7 @@ import re from llsd.base import (_LLSD, ALL_CHARS, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER, - LLSDParseError, LLSDSerializationError, UnicodeType, + MAX_FORMAT_DEPTH, LLSDParseError, LLSDSerializationError, UnicodeType, _format_datestr, _str_to_bytes, _to_python, is_unicode, PY2) from llsd.fastest_elementtree import ElementTreeError, fromstring, parse as _parse @@ -24,7 +24,6 @@ for x in INVALID_XML_BYTES: XML_ESC_TRANS[x] = None - def remove_invalid_xml_bytes(b): """ Remove characters that aren't allowed in xml. @@ -76,7 +75,7 @@ def __init__(self, indent_atom = None): super(LLSDXMLFormatter, self).__init__() self._indent_atom = b'' self._eol = b'' - self._indent_level = 0 + self._depth = 1 def _indent(self): pass @@ -115,15 +114,15 @@ def _DATE(self, v): self.stream.writelines([b'', _format_datestr(v), b'', self._eol]) def _ARRAY(self, v): self.stream.writelines([b'', self._eol]) - self._indent_level = self._indent_level + 1 + self._depth = self._depth + 1 for item in v: self._indent() self._generate(item) - self._indent_level = self._indent_level - 1 + self._depth = self._depth - 1 self.stream.writelines([b'', self._eol]) def _MAP(self, v): self.stream.writelines([b'', self._eol]) - self._indent_level = self._indent_level + 1 + self._depth = self._depth + 1 for key, value in v.items(): self._indent() if PY2: # pragma: no cover @@ -138,12 +137,14 @@ def _MAP(self, v): self._eol]) self._indent() self._generate(value) - self._indent_level = self._indent_level - 1 + self._depth = self._depth - 1 self._indent() self.stream.writelines([b'', self._eol]) def _generate(self, something): "Generate xml from a single python object." + if self._depth - 1 > MAX_FORMAT_DEPTH: + raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) t = type(something) if t in self.type_map: return self.type_map[t](something) @@ -183,13 +184,12 @@ def __init__(self, indent_atom = b' '): super(LLSDXMLPrettyFormatter, self).__init__() # Private data used for indentation. - self._indent_level = 1 self._indent_atom = indent_atom self._eol = b'\n' def _indent(self): "Write an indentation based on the atom and indentation level." - self.stream.writelines([self._indent_atom] * self._indent_level) + self.stream.writelines([self._indent_atom] * self._depth) def format_pretty_xml(something): diff --git a/tests/bench.py b/tests/bench.py index 707107e..722e67b 100644 --- a/tests/bench.py +++ b/tests/bench.py @@ -84,7 +84,7 @@ def binary_stream(): def build_deep_xml(): deep_data = {} curr_data = deep_data - for i in range(250): + for i in range(198): curr_data["curr_data"] = {} curr_data["integer"] = 7 curr_data["string"] = "string" diff --git a/tests/llsd_test.py b/tests/llsd_test.py index 46f64fe..3c5d364 100644 --- a/tests/llsd_test.py +++ b/tests/llsd_test.py @@ -527,6 +527,26 @@ def testParseNotationHalfTruncatedHex(self): def testParseNotationInvalidHex(self): self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xzz'") + def testDeepMap(self): + """ + Test formatting of a deeply nested map + """ + + test_map = {"foo":"bar", "depth":0} + max_depth = 199 + for depth in range(max_depth): + test_map = {"foo":"bar", "depth":depth, "next":test_map} + + # this should not throw an exception. + test_notation_out = self.llsd.as_notation(test_map) + + test_notation_parsed = self.llsd.parse(io.BytesIO(test_notation_out)) + self.assertEqual(test_map, test_notation_parsed) + + test_map = {"foo":"bar", "depth":depth, "next":test_map} + # this should throw an exception. + self.assertRaises(llsd.LLSDSerializationError, self.llsd.as_notation, test_map) + class LLSDBinaryUnitTest(unittest.TestCase): """ @@ -964,6 +984,26 @@ def testParseDelimitedString(self): self.assertEqual('\t\x07\x08\x0c\n\r\t\x0b\x0fp', llsd.parse(delimited_string)) + def testDeepMap(self): + """ + Test formatting of a deeply nested map + """ + + test_map = {"foo":"bar", "depth":0} + max_depth = 199 + for depth in range(max_depth): + test_map = {"foo":"bar", "depth":depth, "next":test_map} + + # this should not throw an exception. + test_binary_out = self.llsd.as_binary(test_map) + + test_binary_parsed = self.llsd.parse(io.BytesIO(test_binary_out)) + self.assertEqual(test_map, test_binary_parsed) + + test_map = {"foo":"bar", "depth":depth, "next":test_map} + # this should throw an exception. + self.assertRaises(llsd.LLSDSerializationError, self.llsd.as_binary, test_map) + class LLSDPythonXMLUnitTest(unittest.TestCase): @@ -1345,20 +1385,6 @@ def testMap(self): map_within_map_xml) self.assertXMLRoundtrip({}, blank_map_xml) - def testDeepMap(self): - """ - Test that formatting a deeply nested map does not cause a RecursionError - """ - - test_map = {"foo":"bar", "depth":0, "next":None} - max_depth = 200 - for depth in range(max_depth): - test_map = {"foo":"bar", "depth":depth, "next":test_map} - - # this should not throw an exception. - test_xml = self.llsd.as_xml(test_map) - - def testBinary(self): """ Test the parse and serialization of input type : binary. @@ -1493,6 +1519,26 @@ def testFormatPrettyXML(self): self.assertEqual(result[result.find(b"?>") + 2: len(result)], format_xml_result[format_xml_result.find(b"?>") + 2: len(format_xml_result)]) + def testDeepMap(self): + """ + Test formatting of a deeply nested map + """ + + test_map = {"foo":"bar", "depth":0} + max_depth = 199 + for depth in range(max_depth): + test_map = {"foo":"bar", "depth":depth, "next":test_map} + + # this should not throw an exception. + test_xml_out = self.llsd.as_xml(test_map) + + test_xml_parsed = self.llsd.parse(io.BytesIO(test_xml_out)) + self.assertEqual(test_map, test_xml_parsed) + + test_map = {"foo":"bar", "depth":depth, "next":test_map} + # this should throw an exception. + self.assertRaises(llsd.LLSDSerializationError, self.llsd.as_xml, test_map) + def testLLSDSerializationFailure(self): """ Test serialization function as_xml with an object of non-supported type. From 78601c000c515a4ccb318122cc0b4e560452152a Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Fri, 7 Jul 2023 19:00:51 +0000 Subject: [PATCH 40/43] SL-19707 - maximum parse depth is now 200 --- llsd/base.py | 37 +++++++++++++++++++++---------------- llsd/serde_binary.py | 12 ++++++++++-- llsd/serde_notation.py | 10 ++++++++-- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 3d0c4f8..4288068 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -32,6 +32,8 @@ ALL_CHARS = str(bytearray(range(256))) if PY2 else bytes(range(256)) MAX_FORMAT_DEPTH = 200 +MAX_PARSE_DEPTH = 200 + class _LLSD: __metaclass__ = abc.ABCMeta @@ -209,7 +211,7 @@ def _parse_datestr(datestr): return datetime.datetime(year, month, day, hour, minute, second, usec) -def _bool_to_python(node): +def _bool_to_python(node, depth=0): "Convert boolean node to a python object." val = node.text or '' try: @@ -220,7 +222,7 @@ def _bool_to_python(node): return bool(val) -def _int_to_python(node): +def _int_to_python(node, depth=0): "Convert integer node to a python object." val = node.text or '' if not val.strip(): @@ -228,7 +230,7 @@ def _int_to_python(node): return int(val) -def _real_to_python(node): +def _real_to_python(node, depth=0): "Convert floating point node to a python object." val = node.text or '' if not val.strip(): @@ -236,19 +238,19 @@ def _real_to_python(node): return float(val) -def _uuid_to_python(node): +def _uuid_to_python(node, depth=0): "Convert uuid node to a python object." if node.text: return uuid.UUID(hex=node.text) return uuid.UUID(int=0) -def _str_to_python(node): +def _str_to_python(node, depth=0): "Convert string node to a python object." return node.text or '' -def _bin_to_python(node): +def _bin_to_python(node, depth=0): base = node.get('encoding') or 'base64' try: if base == 'base16': @@ -267,7 +269,7 @@ def _bin_to_python(node): return LLSDParseError("Bad binary data: " + str(exc)) -def _date_to_python(node): +def _date_to_python(node, depth=0): "Convert date node to a python object." val = node.text or '' if not val: @@ -275,30 +277,30 @@ def _date_to_python(node): return _parse_datestr(val) -def _uri_to_python(node): +def _uri_to_python(node, depth=0): "Convert uri node to a python object." val = node.text or '' return uri(val) -def _map_to_python(node): +def _map_to_python(node, depth=0): "Convert map node to a python object." result = {} for index in range(len(node))[::2]: if node[index].text is None: - result[''] = _to_python(node[index+1]) + result[''] = _to_python(node[index+1], depth+1) else: - result[node[index].text] = _to_python(node[index+1]) + result[node[index].text] = _to_python(node[index+1], depth+1) return result -def _array_to_python(node): +def _array_to_python(node, depth=0): "Convert array node to a python object." - return [_to_python(child) for child in node] + return [_to_python(child, depth+1) for child in node] NODE_HANDLERS = dict( - undef=lambda x: None, + undef=lambda x,y: None, boolean=_bool_to_python, integer=_int_to_python, real=_real_to_python, @@ -312,9 +314,12 @@ def _array_to_python(node): ) -def _to_python(node): +def _to_python(node, depth=0): "Convert node to a python object." - return NODE_HANDLERS[node.tag](node) + if depth > MAX_PARSE_DEPTH: + raise LLSDParseError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) + + return NODE_HANDLERS[node.tag](node, depth) class LLSDBaseFormatter(object): diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 54f45c8..9e1c076 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -5,7 +5,7 @@ import uuid from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, BINARY_HEADER, - MAX_FORMAT_DEPTH,_str_to_bytes, binary, is_integer, is_string, uri) + MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH, _str_to_bytes, binary, is_integer, is_string, uri) try: @@ -21,7 +21,7 @@ class LLSDBinaryParser(LLSDBaseParser): See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization """ - __slots__ = ['_dispatch', '_keep_binary'] + __slots__ = ['_dispatch', '_keep_binary', '_depth'] def __init__(self): super(LLSDBinaryParser, self).__init__() @@ -62,6 +62,7 @@ def __init__(self): # entries in _dispatch. for c, func in _dispatch_dict.items(): self._dispatch[ord(c)] = func + self._depth = 0 def parse(self, something, ignore_binary = False): """ @@ -81,6 +82,9 @@ def parse(self, something, ignore_binary = False): def _parse(self): "The actual parser which is called recursively when necessary." + if self._depth > MAX_PARSE_DEPTH: + self._error("Parse depth exceeded max.") + cc = self._getc() try: func = self._dispatch[ord(cc)] @@ -96,6 +100,7 @@ def _parse_map(self): count = 0 cc = self._getc() key = b'' + self._depth = self._depth + 1 while (cc != b'}') and (count < size): if cc == b'k': key = self._parse_string() @@ -109,16 +114,19 @@ def _parse_map(self): cc = self._getc() if cc != b'}': self._error("invalid map close token") + self._depth = self._depth - 1 return rv def _parse_array(self): "Parse a single llsd array" rv = [] + self._depth = self._depth + 1 size = struct.unpack("!i", self._getc(4))[0] for count in range(size): rv.append(self._parse()) if self._getc() != b']': self._error("invalid array close token") + self._depth = self._depth - 1 return rv def _parse_string(self): diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index 22c3422..38a847c 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -4,7 +4,7 @@ import uuid from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, NOTATION_HEADER, - MAX_FORMAT_DEPTH, LLSDParseError, LLSDSerializationError, UnicodeType, + MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH, LLSDParseError, LLSDSerializationError, UnicodeType, _format_datestr, _parse_datestr, _str_to_bytes, binary, uri) @@ -70,6 +70,7 @@ def __init__(self): # Then fill in specific entries based on the dict above. for c, func in _dispatch_dict.items(): self._dispatch[ord(c)] = func + self._depth = 0 def parse(self, something, ignore_binary = False): """ @@ -107,6 +108,8 @@ def _get_until(self, delim): def _parse(self, cc): "The notation parser workhorse." + if self._depth > MAX_PARSE_DEPTH: + self._error("Parse depth exceeded max.") try: func = self._dispatch[ord(cc)] except IndexError: @@ -182,6 +185,7 @@ def _parse_map(self, cc): rv = {} key = b'' found_key = False + self._depth = self._depth + 1 # skip the beginning '{' cc = self._getc() while (cc != b'}'): @@ -207,6 +211,7 @@ def _parse_map(self, cc): else: self._error("missing separator") cc = self._getc() + self._depth = self._depth - 1 return rv @@ -217,6 +222,7 @@ def _parse_array(self, cc): array: [ object, object, object ] """ rv = [] + self._depth = self._depth + 1 # skip the beginning '[' cc = self._getc() while (cc != b']'): @@ -227,7 +233,7 @@ def _parse_array(self, cc): continue rv.append(self._parse(cc)) cc = self._getc() - + self._depth = self._depth - 1 return rv def _parse_uuid(self, cc): From 65863088f9fb2238f63e0b3e933eed8a3f047a1c Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Fri, 18 Aug 2023 17:20:00 +0000 Subject: [PATCH 41/43] CR changes --- llsd/base.py | 2 +- llsd/serde_binary.py | 6 +++--- llsd/serde_notation.py | 2 +- llsd/serde_xml.py | 14 +++++--------- tests/llsd_test.py | 8 ++++---- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 4288068..71040d2 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -317,7 +317,7 @@ def _array_to_python(node, depth=0): def _to_python(node, depth=0): "Convert node to a python object." if depth > MAX_PARSE_DEPTH: - raise LLSDParseError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) + raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) return NODE_HANDLERS[node.tag](node, depth) diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 9e1c076..1a4ce48 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -83,7 +83,7 @@ def parse(self, something, ignore_binary = False): def _parse(self): "The actual parser which is called recursively when necessary." if self._depth > MAX_PARSE_DEPTH: - self._error("Parse depth exceeded max.") + self._error("Parse depth exceeded maximum depth of %d." % MAX_PARSE_DEPTH) cc = self._getc() try: @@ -100,7 +100,7 @@ def _parse_map(self): count = 0 cc = self._getc() key = b'' - self._depth = self._depth + 1 + self._depth += 1 while (cc != b'}') and (count < size): if cc == b'k': key = self._parse_string() @@ -114,7 +114,7 @@ def _parse_map(self): cc = self._getc() if cc != b'}': self._error("invalid map close token") - self._depth = self._depth - 1 + self._depth -= 1 return rv def _parse_array(self): diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index 38a847c..bf522e4 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -109,7 +109,7 @@ def _get_until(self, delim): def _parse(self, cc): "The notation parser workhorse." if self._depth > MAX_PARSE_DEPTH: - self._error("Parse depth exceeded max.") + self._error("Parse depth exceeded max of %d" % MAX_PARSE_DEPTH) try: func = self._dispatch[ord(cc)] except IndexError: diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 63b342a..7ec45cf 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -69,12 +69,12 @@ class LLSDXMLFormatter(LLSDBaseFormatter): interface to this functionality. """ - def __init__(self, indent_atom = None): + def __init__(self, indent_atom = b'', eol = b''): "Construct a serializer." # Call the super class constructor so that we have the type map super(LLSDXMLFormatter, self).__init__() - self._indent_atom = b'' - self._eol = b'' + self._indent_atom = indent_atom + self._eol = eol self._depth = 1 def _indent(self): @@ -178,14 +178,10 @@ class LLSDXMLPrettyFormatter(LLSDXMLFormatter): This class is not necessarily suited for serializing very large objects. It sorts on dict (llsd map) keys alphabetically to ease human reading. """ - def __init__(self, indent_atom = b' '): + def __init__(self, indent_atom = b' ', eol = b'\n'): "Construct a pretty serializer." # Call the super class constructor so that we have the type map - super(LLSDXMLPrettyFormatter, self).__init__() - - # Private data used for indentation. - self._indent_atom = indent_atom - self._eol = b'\n' + super(LLSDXMLPrettyFormatter, self).__init__(indent_atom = indent_atom, eol = eol) def _indent(self): "Write an indentation based on the atom and indentation level." diff --git a/tests/llsd_test.py b/tests/llsd_test.py index 3c5d364..b274a39 100644 --- a/tests/llsd_test.py +++ b/tests/llsd_test.py @@ -16,7 +16,7 @@ import pytest import llsd -from llsd.base import PY2, is_integer, is_string, is_unicode +from llsd.base import PY2, is_integer, is_string, is_unicode, MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH from llsd.serde_xml import remove_invalid_xml_bytes from tests.fuzz import LLSDFuzzer @@ -533,7 +533,7 @@ def testDeepMap(self): """ test_map = {"foo":"bar", "depth":0} - max_depth = 199 + max_depth = MAX_FORMAT_DEPTH - 1 for depth in range(max_depth): test_map = {"foo":"bar", "depth":depth, "next":test_map} @@ -990,7 +990,7 @@ def testDeepMap(self): """ test_map = {"foo":"bar", "depth":0} - max_depth = 199 + max_depth = MAX_FORMAT_DEPTH -1 for depth in range(max_depth): test_map = {"foo":"bar", "depth":depth, "next":test_map} @@ -1525,7 +1525,7 @@ def testDeepMap(self): """ test_map = {"foo":"bar", "depth":0} - max_depth = 199 + max_depth = MAX_FORMAT_DEPTH - 1 for depth in range(max_depth): test_map = {"foo":"bar", "depth":depth, "next":test_map} From c5e41c8eb96d6024545012b09e215cc6b9ce9572 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Thu, 7 Sep 2023 19:13:17 +0000 Subject: [PATCH 42/43] CR fixes --- llsd/base.py | 2 +- llsd/serde_binary.py | 4 ++-- llsd/serde_notation.py | 16 ++++++++-------- llsd/serde_xml.py | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 71040d2..2a03219 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -317,7 +317,7 @@ def _array_to_python(node, depth=0): def _to_python(node, depth=0): "Convert node to a python object." if depth > MAX_PARSE_DEPTH: - raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH) + raise LLSDParseError("Cannot parse depth of more than %d" % MAX_FORMAT_DEPTH) return NODE_HANDLERS[node.tag](node, depth) diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 1a4ce48..e4ac7c5 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -120,13 +120,13 @@ def _parse_map(self): def _parse_array(self): "Parse a single llsd array" rv = [] - self._depth = self._depth + 1 + self._depth += 1 size = struct.unpack("!i", self._getc(4))[0] for count in range(size): rv.append(self._parse()) if self._getc() != b']': self._error("invalid array close token") - self._depth = self._depth - 1 + self._depth -= 1 return rv def _parse_string(self): diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index bf522e4..8e1d3e2 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -185,7 +185,7 @@ def _parse_map(self, cc): rv = {} key = b'' found_key = False - self._depth = self._depth + 1 + self._depth += 1 # skip the beginning '{' cc = self._getc() while (cc != b'}'): @@ -211,7 +211,7 @@ def _parse_map(self, cc): else: self._error("missing separator") cc = self._getc() - self._depth = self._depth - 1 + self._depth -= 1 return rv @@ -222,7 +222,7 @@ def _parse_array(self, cc): array: [ object, object, object ] """ rv = [] - self._depth = self._depth + 1 + self._depth += 1 # skip the beginning '[' cc = self._getc() while (cc != b']'): @@ -233,7 +233,7 @@ def _parse_array(self, cc): continue rv.append(self._parse(cc)) cc = self._getc() - self._depth = self._depth - 1 + self._depth -= 1 return rv def _parse_uuid(self, cc): @@ -454,22 +454,22 @@ def _DATE(self, v): def _ARRAY(self, v): self.stream.write(b'[') delim = b'' - self._depth = self._depth + 1 + self._depth += 1 for item in v: self.stream.write(delim) self._generate(item) delim = b',' - self._depth = self._depth - 1 + self._depth -= 1 self.stream.write(b']') def _MAP(self, v): self.stream.write(b'{') delim = b'' - self._depth = self._depth + 1 + self._depth += 1 for key, value in v.items(): self.stream.writelines([delim, b"'", self._esc(UnicodeType(key)), b"':"]) self._generate(value) delim = b',' - self._depth = self._depth - 1 + self._depth -= 1 self.stream.write(b'}') def _esc(self, data, quote=b"'"): diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 7ec45cf..e3367b6 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -114,15 +114,15 @@ def _DATE(self, v): self.stream.writelines([b'', _format_datestr(v), b'', self._eol]) def _ARRAY(self, v): self.stream.writelines([b'', self._eol]) - self._depth = self._depth + 1 + self._depth += 1 for item in v: self._indent() self._generate(item) - self._depth = self._depth - 1 + self._depth -= 1 self.stream.writelines([b'', self._eol]) def _MAP(self, v): self.stream.writelines([b'', self._eol]) - self._depth = self._depth + 1 + self._depth += 1 for key, value in v.items(): self._indent() if PY2: # pragma: no cover @@ -137,7 +137,7 @@ def _MAP(self, v): self._eol]) self._indent() self._generate(value) - self._depth = self._depth - 1 + self._depth -= 1 self._indent() self.stream.writelines([b'', self._eol]) From 2432466eb861ac3c01347d88dffabbb67c576846 Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Thu, 7 Sep 2023 19:47:21 +0000 Subject: [PATCH 43/43] CR fixes --- llsd/base.py | 2 +- llsd/serde_xml.py | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 2a03219..e7204ca 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -317,7 +317,7 @@ def _array_to_python(node, depth=0): def _to_python(node, depth=0): "Convert node to a python object." if depth > MAX_PARSE_DEPTH: - raise LLSDParseError("Cannot parse depth of more than %d" % MAX_FORMAT_DEPTH) + raise LLSDParseError("Cannot parse depth of more than %d" % MAX_PARSE_DEPTH) return NODE_HANDLERS[node.tag](node, depth) diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index e3367b6..a7da4e7 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -98,18 +98,18 @@ def _UUID(self, v): self.stream.writelines([b'', str(v).encode('utf-8'), b'', self._eol]) def _BINARY(self, v): self.stream.writelines([b'', base64.b64encode(v).strip(), b'', self._eol]) - def _STRING(self, v): - # We don't simply have a function that encapsulates the PY2 vs PY3 calls, - # as that results in another function call and is slightly less performant - if PY2: # pragma: no cover + + if PY2: + def _STRING(self, v): return self.stream.writelines([b'', _str_to_bytes(xml_esc(v)), b'', self._eol]) - self.stream.writelines([b'', v.translate(XML_ESC_TRANS).encode('utf-8'), b'', self._eol]) - def _URI(self, v): - # We don't simply have a function that encapsulates the PY2 vs PY3 calls, - # as that results in another function call and is slightly less performant - if PY2: # pragma: no cover + def _URI(self, v): return self.stream.writelines([b'', _str_to_bytes(xml_esc(v)), b'', self._eol]) - self.stream.writelines([b'', str(v).translate(XML_ESC_TRANS).encode('utf-8'), b'', self._eol]) + else: + def _STRING(self, v): + self.stream.writelines([b'', v.translate(XML_ESC_TRANS).encode('utf-8'), b'', self._eol]) + def _URI(self, v): + self.stream.writelines([b'', str(v).translate(XML_ESC_TRANS).encode('utf-8'), b'', self._eol]) + def _DATE(self, v): self.stream.writelines([b'', _format_datestr(v), b'', self._eol]) def _ARRAY(self, v):