Skip to content

Commit

Permalink
Merge pull request #15 from secondlife/SRV-439
Browse files Browse the repository at this point in the history
SRV-439 - performance optimizations for string handling in xml formatting
  • Loading branch information
roxanneskelly authored Sep 7, 2023
2 parents a63abbe + 2432466 commit b703873
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 154 deletions.
14 changes: 8 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,23 @@ jobs:
matrix:
python-version: ['2.7', '3.7', '3.8', '3.10']
runs-on: [ubuntu-latest]
container:
image: "python:${{ matrix.python-version }}-buster"
env:
PYTHON: ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0 # fetch all history for setuptools_scm to be able to read tags

- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install python dependencies
run: |
pip install wheel build tox
pip install .[dev]
apt-get update
apt-get -y install sudo
pip install --upgrade pip
sudo chown root .
sudo -H pip install wheel build tox
sudo -H pip install .[dev]
- name: Determine pyenv
id: pyenv
Expand Down
37 changes: 21 additions & 16 deletions llsd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

ALL_CHARS = str(bytearray(range(256))) if PY2 else bytes(range(256))

MAX_FORMAT_DEPTH = 200
MAX_PARSE_DEPTH = 200

class _LLSD:
__metaclass__ = abc.ABCMeta
Expand Down Expand Up @@ -209,7 +211,7 @@ def _parse_datestr(datestr):
return datetime.datetime(year, month, day, hour, minute, second, usec)


def _bool_to_python(node):
def _bool_to_python(node, depth=0):
"Convert boolean node to a python object."
val = node.text or ''
try:
Expand All @@ -220,35 +222,35 @@ def _bool_to_python(node):
return bool(val)


def _int_to_python(node):
def _int_to_python(node, depth=0):
"Convert integer node to a python object."
val = node.text or ''
if not val.strip():
return 0
return int(val)


def _real_to_python(node):
def _real_to_python(node, depth=0):
"Convert floating point node to a python object."
val = node.text or ''
if not val.strip():
return 0.0
return float(val)


def _uuid_to_python(node):
def _uuid_to_python(node, depth=0):
"Convert uuid node to a python object."
if node.text:
return uuid.UUID(hex=node.text)
return uuid.UUID(int=0)


def _str_to_python(node):
def _str_to_python(node, depth=0):
"Convert string node to a python object."
return node.text or ''


def _bin_to_python(node):
def _bin_to_python(node, depth=0):
base = node.get('encoding') or 'base64'
try:
if base == 'base16':
Expand All @@ -267,38 +269,38 @@ def _bin_to_python(node):
return LLSDParseError("Bad binary data: " + str(exc))


def _date_to_python(node):
def _date_to_python(node, depth=0):
"Convert date node to a python object."
val = node.text or ''
if not val:
val = "1970-01-01T00:00:00Z"
return _parse_datestr(val)


def _uri_to_python(node):
def _uri_to_python(node, depth=0):
"Convert uri node to a python object."
val = node.text or ''
return uri(val)


def _map_to_python(node):
def _map_to_python(node, depth=0):
"Convert map node to a python object."
result = {}
for index in range(len(node))[::2]:
if node[index].text is None:
result[''] = _to_python(node[index+1])
result[''] = _to_python(node[index+1], depth+1)
else:
result[node[index].text] = _to_python(node[index+1])
result[node[index].text] = _to_python(node[index+1], depth+1)
return result


def _array_to_python(node):
def _array_to_python(node, depth=0):
"Convert array node to a python object."
return [_to_python(child) for child in node]
return [_to_python(child, depth+1) for child in node]


NODE_HANDLERS = dict(
undef=lambda x: None,
undef=lambda x,y: None,
boolean=_bool_to_python,
integer=_int_to_python,
real=_real_to_python,
Expand All @@ -312,9 +314,12 @@ def _array_to_python(node):
)


def _to_python(node):
def _to_python(node, depth=0):
"Convert node to a python object."
return NODE_HANDLERS[node.tag](node)
if depth > MAX_PARSE_DEPTH:
raise LLSDParseError("Cannot parse depth of more than %d" % MAX_PARSE_DEPTH)

return NODE_HANDLERS[node.tag](node, depth)


class LLSDBaseFormatter(object):
Expand Down
33 changes: 22 additions & 11 deletions llsd/serde_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import uuid

from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, BINARY_HEADER,
_str_to_bytes, binary, is_integer, is_string, uri)
MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH, _str_to_bytes, binary, is_integer, is_string, uri)


try:
Expand All @@ -15,14 +15,13 @@
# Python 3: 'range()' is already lazy
pass


class LLSDBinaryParser(LLSDBaseParser):
"""
Parse application/llsd+binary to a python object.
See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
"""
__slots__ = ['_dispatch', '_keep_binary']
__slots__ = ['_dispatch', '_keep_binary', '_depth']

def __init__(self):
super(LLSDBinaryParser, self).__init__()
Expand Down Expand Up @@ -63,6 +62,7 @@ def __init__(self):
# entries in _dispatch.
for c, func in _dispatch_dict.items():
self._dispatch[ord(c)] = func
self._depth = 0

def parse(self, something, ignore_binary = False):
"""
Expand All @@ -82,6 +82,9 @@ def parse(self, something, ignore_binary = False):

def _parse(self):
"The actual parser which is called recursively when necessary."
if self._depth > MAX_PARSE_DEPTH:
self._error("Parse depth exceeded maximum depth of %d." % MAX_PARSE_DEPTH)

cc = self._getc()
try:
func = self._dispatch[ord(cc)]
Expand All @@ -97,6 +100,7 @@ def _parse_map(self):
count = 0
cc = self._getc()
key = b''
self._depth += 1
while (cc != b'}') and (count < size):
if cc == b'k':
key = self._parse_string()
Expand All @@ -110,16 +114,19 @@ def _parse_map(self):
cc = self._getc()
if cc != b'}':
self._error("invalid map close token")
self._depth -= 1
return rv

def _parse_array(self):
"Parse a single llsd array"
rv = []
self._depth += 1
size = struct.unpack("!i", self._getc(4))[0]
for count in range(size):
rv.append(self._parse())
if self._getc() != b']':
self._error("invalid array close token")
self._depth -= 1
return rv

def _parse_string(self):
Expand Down Expand Up @@ -164,15 +171,19 @@ def format_binary(something):

def write_binary(stream, something):
stream.write(b'<?llsd/binary?>\n')
_write_binary_recurse(stream, something)
_write_binary_recurse(stream, something, 0)


def _write_binary_recurse(stream, something):
def _write_binary_recurse(stream, something, depth):
"Binary formatter workhorse."

if depth > MAX_FORMAT_DEPTH:
raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH)

if something is None:
stream.write(b'!')
elif isinstance(something, _LLSD):
_write_binary_recurse(stream, something.thing)
_write_binary_recurse(stream, something.thing, depth)
elif isinstance(something, bool):
stream.write(b'1' if something else b'0')
elif is_integer(something):
Expand Down Expand Up @@ -202,27 +213,27 @@ def _write_binary_recurse(stream, something):
seconds_since_epoch = calendar.timegm(something.timetuple())
stream.writelines([b'd', struct.pack('<d', seconds_since_epoch)])
elif isinstance(something, (list, tuple)):
_write_list(stream, something)
_write_list(stream, something, depth)
elif isinstance(something, dict):
stream.writelines([b'{', struct.pack('!i', len(something))])
for key, value in something.items():
key = _str_to_bytes(key)
stream.writelines([b'k', struct.pack('!i', len(key)), key])
_write_binary_recurse(stream, value)
_write_binary_recurse(stream, value, depth+1)
stream.write(b'}')
else:
try:
return _write_list(stream, list(something))
return _write_list(stream, list(something), depth)
except TypeError:
raise LLSDSerializationError(
"Cannot serialize unknown type: %s (%s)" %
(type(something), something))


def _write_list(stream, something):
def _write_list(stream, something, depth):
stream.writelines([b'[', struct.pack('!i', len(something))])
for item in something:
_write_binary_recurse(stream, item)
_write_binary_recurse(stream, item, depth+1)
stream.write(b']')


Expand Down
22 changes: 20 additions & 2 deletions llsd/serde_notation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import uuid

from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, NOTATION_HEADER,
LLSDParseError, LLSDSerializationError, UnicodeType,
MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH, LLSDParseError, LLSDSerializationError, UnicodeType,
_format_datestr, _parse_datestr, _str_to_bytes, binary, uri)


Expand Down Expand Up @@ -70,6 +70,7 @@ def __init__(self):
# Then fill in specific entries based on the dict above.
for c, func in _dispatch_dict.items():
self._dispatch[ord(c)] = func
self._depth = 0

def parse(self, something, ignore_binary = False):
"""
Expand Down Expand Up @@ -107,6 +108,8 @@ def _get_until(self, delim):

def _parse(self, cc):
"The notation parser workhorse."
if self._depth > MAX_PARSE_DEPTH:
self._error("Parse depth exceeded max of %d" % MAX_PARSE_DEPTH)
try:
func = self._dispatch[ord(cc)]
except IndexError:
Expand Down Expand Up @@ -182,6 +185,7 @@ def _parse_map(self, cc):
rv = {}
key = b''
found_key = False
self._depth += 1
# skip the beginning '{'
cc = self._getc()
while (cc != b'}'):
Expand All @@ -207,6 +211,7 @@ def _parse_map(self, cc):
else:
self._error("missing separator")
cc = self._getc()
self._depth -= 1

return rv

Expand All @@ -217,6 +222,7 @@ def _parse_array(self, cc):
array: [ object, object, object ]
"""
rv = []
self._depth += 1
# skip the beginning '['
cc = self._getc()
while (cc != b']'):
Expand All @@ -227,7 +233,7 @@ def _parse_array(self, cc):
continue
rv.append(self._parse(cc))
cc = self._getc()

self._depth -= 1
return rv

def _parse_uuid(self, cc):
Expand Down Expand Up @@ -411,6 +417,11 @@ class LLSDNotationFormatter(LLSDBaseFormatter):
See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
"""

def __init__(self):
super(LLSDNotationFormatter, self).__init__()
self._depth = 0

def _LLSD(self, v):
return self._generate(v.thing)
def _UNDEF(self, v):
Expand Down Expand Up @@ -443,18 +454,22 @@ def _DATE(self, v):
def _ARRAY(self, v):
self.stream.write(b'[')
delim = b''
self._depth += 1
for item in v:
self.stream.write(delim)
self._generate(item)
delim = b','
self._depth -= 1
self.stream.write(b']')
def _MAP(self, v):
self.stream.write(b'{')
delim = b''
self._depth += 1
for key, value in v.items():
self.stream.writelines([delim, b"'", self._esc(UnicodeType(key)), b"':"])
self._generate(value)
delim = b','
self._depth -= 1
self.stream.write(b'}')

def _esc(self, data, quote=b"'"):
Expand All @@ -466,6 +481,9 @@ def _generate(self, something):
:param something: a python object (typically a dict) to be serialized.
"""
if self._depth > MAX_FORMAT_DEPTH:
raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH)

t = type(something)
handler = self.type_map.get(t)
if handler:
Expand Down
Loading

1 comment on commit b703873

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Python Benchmarks'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.10.

Benchmark suite Current: b703873 Previous: a63abbe Ratio
tests/bench.py::test_format_notation 23281.178240620364 iter/sec (stddev: 0.000012682945522827392) 26326.55225540916 iter/sec (stddev: 0.000011888050812629382) 1.13

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.