From c28f864acbcf943fc1cb8a03ac792bf32d6b1581 Mon Sep 17 00:00:00 2001 From: Fox-IT Security Research Team Date: Wed, 7 Oct 2020 15:42:25 +0200 Subject: [PATCH] Release version 2.0 of dissect.cstruct --- .gitignore | 11 +- MANIFEST.in | 2 + README.md | 53 +- dissect/cstruct/__init__.py | 63 +- dissect/cstruct/bitbuffer.py | 50 + dissect/cstruct/compiler.py | 408 ++++++ dissect/cstruct/cstruct.py | 1783 ++----------------------- dissect/cstruct/exceptions.py | 16 + dissect/cstruct/expression.py | 77 ++ dissect/cstruct/parser.py | 582 ++++++++ dissect/cstruct/types/__init__.py | 0 dissect/cstruct/types/base.py | 192 +++ dissect/cstruct/types/bytesinteger.py | 90 ++ dissect/cstruct/types/chartype.py | 52 + dissect/cstruct/types/enum.py | 113 ++ dissect/cstruct/types/flag.py | 105 ++ dissect/cstruct/types/instance.py | 68 + dissect/cstruct/types/packedtype.py | 53 + dissect/cstruct/types/pointer.py | 64 + dissect/cstruct/types/structure.py | 254 ++++ dissect/cstruct/types/voidtype.py | 11 + dissect/cstruct/types/wchartype.py | 55 + dissect/cstruct/utils.py | 175 +++ examples/disk.py | 36 +- examples/mirai.py | 7 +- examples/pe.py | 13 +- examples/secdesc.py | 114 +- pyproject.toml | 5 + setup.py | 6 +- tests/test_basic.py | 644 ++++++++- tests/test_compiled.py | 244 ---- tests/test_expression.py | 74 + tox.ini | 48 +- 33 files changed, 3379 insertions(+), 2089 deletions(-) create mode 100644 MANIFEST.in create mode 100644 dissect/cstruct/bitbuffer.py create mode 100644 dissect/cstruct/compiler.py create mode 100644 dissect/cstruct/exceptions.py create mode 100644 dissect/cstruct/expression.py create mode 100644 dissect/cstruct/parser.py create mode 100644 dissect/cstruct/types/__init__.py create mode 100644 dissect/cstruct/types/base.py create mode 100644 dissect/cstruct/types/bytesinteger.py create mode 100644 dissect/cstruct/types/chartype.py create mode 100644 dissect/cstruct/types/enum.py create mode 100644 dissect/cstruct/types/flag.py create mode 100644 dissect/cstruct/types/instance.py create mode 100644 dissect/cstruct/types/packedtype.py create mode 100644 dissect/cstruct/types/pointer.py create mode 100644 dissect/cstruct/types/structure.py create mode 100644 dissect/cstruct/types/voidtype.py create mode 100644 dissect/cstruct/types/wchartype.py create mode 100644 dissect/cstruct/utils.py create mode 100644 pyproject.toml delete mode 100644 tests/test_compiled.py create mode 100644 tests/test_expression.py diff --git a/.gitignore b/.gitignore index d19657a..032b2dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ -*~ -.*.swp *.pyc -*.egg-info -.DS_Store -.tox -.pytest_cache +*.egg-info/ +.tox/ +.pytest_cache/ +.eggs/ +dist/ diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..60e3204 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +exclude .gitignore +exclude .gitlab-ci.yml diff --git a/README.md b/README.md index 09c3490..7a26f90 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,55 @@ assert cs.uint24[2](b'\x01\x00\x00\x02\x00\x00') == [1, 2] # You can also parse assert cs.char[None](b'hello world!\x00') == b'hello world!' # A list index of None means null terminated ``` +### Unions and nested structures +Unions and nested structures are support, both anonymous and named. + +```python +cdef = """ +struct test_union { + char magic[4]; + union { + struct { + uint32 a; + uint32 b; + } a; + struct { + char b[8]; + } b; + } c; +}; + +struct test_anonymous { + char magic[4]; + struct { + uint32 a; + uint32 b; + }; + struct { + char c[8]; + }; +}; +""" +c = cstruct.cstruct() +c.load(cdef) + +assert len(c.test_union) == 12 + +a = c.test_union(b'ohaideadbeef') +assert a.magic == b'ohai' +assert a.c.a.a == 0x64616564 +assert a.c.a.b == 0x66656562 +assert a.c.b.b == b'deadbeef' + +assert a.dumps() == b'ohaideadbeef' + +b = c.test_anonymous(b'ohai\x39\x05\x00\x00\x28\x23\x00\x00deadbeef') +assert b.magic == b'ohai' +assert b.a == 1337 +assert b.b == 9000 +assert b.c == b'deadbeef' +``` + ### Parse bit fields Bit fields are supported as part of structures. They are properly aligned to their boundaries. @@ -109,7 +158,3 @@ You can implement your own types by subclassing `BaseType` or `RawType`, and add ### Custom definition parsers Don't like the C-like definition syntax? Write your own syntax parser! - -## Todo -- Nested structure definitions -- Unions diff --git a/dissect/cstruct/__init__.py b/dissect/cstruct/__init__.py index 34da3a8..ea0ea49 100644 --- a/dissect/cstruct/__init__.py +++ b/dissect/cstruct/__init__.py @@ -1,33 +1,64 @@ +from dissect.cstruct.compiler import Compiler +from dissect.cstruct.expression import Expression +from dissect.cstruct.types.base import Array, BaseType, RawType +from dissect.cstruct.types.chartype import CharType +from dissect.cstruct.types.instance import Instance +from dissect.cstruct.types.structure import Structure, Field, Union +from dissect.cstruct.types.voidtype import VoidType +from dissect.cstruct.types.wchartype import WcharType +from dissect.cstruct.types.packedtype import PackedType +from dissect.cstruct.types.flag import Flag, FlagInstance +from dissect.cstruct.types.enum import Enum, EnumInstance +from dissect.cstruct.types.bytesinteger import BytesInteger +from dissect.cstruct.types.pointer import Pointer, PointerInstance + +from dissect.cstruct.exceptions import ( + Error, + ParserError, + ResolveError, + NullPointerDereference, +) + from dissect.cstruct.cstruct import ( cstruct, ctypes, +) + +from dissect.cstruct.utils import ( dumpstruct, hexdump, - Instance, - PointerInstance, - Parser, - RawType, - BaseType, - Error, - ParserError, - CompilerError, - ResolveError, - NullPointerDereference, ) +from dissect.cstruct.bitbuffer import BitBuffer + __all__ = [ - "cstruct", - "ctypes", - "dumpstruct", - "hexdump", + "Compiler", + "Array", + "Union", + "Field", "Instance", + "Structure", + "Expression", + "PackedType", + "Pointer", "PointerInstance", - "Parser", + "VoidType", + "WcharType", "RawType", "BaseType", + "CharType", + "Enum", + "EnumInstance", + "Flag", + "FlagInstance", + "BytesInteger", + "BitBuffer", + "cstruct", + "ctypes", + "dumpstruct", + "hexdump", "Error", "ParserError", - "CompilerError", "ResolveError", "NullPointerDereference", ] diff --git a/dissect/cstruct/bitbuffer.py b/dissect/cstruct/bitbuffer.py new file mode 100644 index 0000000..748359a --- /dev/null +++ b/dissect/cstruct/bitbuffer.py @@ -0,0 +1,50 @@ +class BitBuffer(object): + """Implements a bit buffer that can read and write bit fields.""" + + def __init__(self, stream, endian): + self.stream = stream + self.endian = endian + + self._type = None + self._buffer = 0 + self._remaining = 0 + + def read(self, field_type, bits): + if self._remaining < 1 or self._type.size != field_type.size: + self._type = field_type + self._remaining = field_type.size * 8 + self._buffer = field_type._read(self.stream) + + if self.endian != '>': + v = self._buffer & ((1 << bits) - 1) + self._buffer >>= bits + self._remaining -= bits + else: + v = self._buffer & (((1 << (self._remaining - bits)) - 1) ^ ((1 << self._remaining) - 1)) + v >>= self._remaining - bits + self._remaining -= bits + + return v + + def write(self, field_type, data, bits): + if self._remaining == 0: + self._remaining = field_type.size * 8 + self._type = field_type + + if self.endian != '>': + self._buffer |= data << (self._type.size * 8 - self._remaining) + else: + self._buffer |= data << (self._remaining - bits) + + self._remaining -= bits + + def flush(self): + self._type._write(self.stream, self._buffer) + self._type = None + self._remaining = 0 + self._buffer = 0 + + def reset(self): + self._type = None + self._buffer = 0 + self._remaining = 0 diff --git a/dissect/cstruct/compiler.py b/dissect/cstruct/compiler.py new file mode 100644 index 0000000..8bc8a67 --- /dev/null +++ b/dissect/cstruct/compiler.py @@ -0,0 +1,408 @@ +import struct +from collections import OrderedDict +from dissect.cstruct.bitbuffer import BitBuffer +from dissect.cstruct.expression import Expression +from dissect.cstruct.types.base import Array +from dissect.cstruct.types.chartype import CharType +from dissect.cstruct.types.instance import Instance +from dissect.cstruct.types.structure import Structure, Union +from dissect.cstruct.types.wchartype import WcharType +from dissect.cstruct.types.packedtype import PackedType +from dissect.cstruct.types.flag import Flag, FlagInstance +from dissect.cstruct.types.enum import Enum, EnumInstance +from dissect.cstruct.types.bytesinteger import BytesInteger +from dissect.cstruct.types.pointer import Pointer, PointerInstance + + +class Compiler(object): + """Compiler for cstruct structures. Creates somewhat optimized parsing code.""" + + TYPES = ( + Structure, + Pointer, + Enum, + Flag, + Array, + PackedType, + CharType, + WcharType, + BytesInteger, + ) + + COMPILE_TEMPLATE = """ +class {name}(Structure): + def __init__(self, cstruct, structure, source=None): + self.structure = structure + self.source = source + super().__init__(cstruct, structure.name, structure.fields, anonymous=structure.anonymous) + + def _read(self, stream): + r = OrderedDict() + sizes = {{}} + bitreader = BitBuffer(stream, self.cstruct.endian) + +{read_code} + + return Instance(self, r, sizes) + + def add_field(self, name, type_, offset=None): + raise NotImplementedError("Can't add fields to a compiled structure") + + def __repr__(self): + return '' +""" + + def __init__(self, cstruct): + self.cstruct = cstruct + + def compile(self, structure): + if isinstance(structure, Union): + # TODO: Compiling unions should be supported + return structure + + structure_name = structure.name + + try: + # Generate struct class based on provided structure type + source = self.gen_struct_class(structure_name, structure) + except TypeError: + return structure + + # Create code object that can be executed later on + code_object = compile( + source, + f'', + 'exec', + ) + + env = { + 'OrderedDict': OrderedDict, + 'Structure': Structure, + 'Instance': Instance, + 'Expression': Expression, + 'EnumInstance': EnumInstance, + 'FlagInstance': FlagInstance, + 'PointerInstance': PointerInstance, + 'BytesInteger': BytesInteger, + 'BitBuffer': BitBuffer, + 'struct': struct, + 'range': range, + } + + exec(code_object, env) + return env[structure_name](self.cstruct, structure, source) + + def gen_struct_class(self, name, structure): + blocks = [] + classes = [] + cur_block = [] + read_size = 0 + prev_was_bits = False + + for field in structure.fields: + field_type = self.cstruct.resolve(field.type) + + if not isinstance(field_type, self.TYPES): + raise TypeError(f"Unsupported type for compiler: {field_type}") + + if isinstance(field_type, Structure) \ + or (isinstance(field_type, Array) and isinstance(field_type.type, Structure)): + + blocks.append(self.gen_read_block(read_size, cur_block)) + + struct_read = 's = stream.tell()\n' + if isinstance(field_type, Array): + num = field_type.count + + if isinstance(num, Expression): + num = 'max(0, Expression(self.cstruct, "{expr}").evaluate(r))'.format(expr=num.expression) + + struct_read += ( + 'r["{name}"] = []\n' + 'for _ in range({num}):\n' + ' r["{name}"].append(self.lookup["{name}"].type.type._read(stream))\n'.format( + name=field.name, + num=num, + ) + ) + struct_read += 'sizes["{name}"] = stream.tell() - s'.format(name=field.name) + elif isinstance(field_type, Structure) and field_type.anonymous: + struct_read += 'v = self.lookup["{name}"].type._read(stream)\n'.format(name=field.name) + struct_read += 'r.update(v._values)\n' + struct_read += 'sizes.update(v._sizes)' + else: + struct_read += 'r["{name}"] = self.lookup["{name}"].type._read(stream)\n'.format(name=field.name) + struct_read += 'sizes["{name}"] = stream.tell() - s'.format(name=field.name) + + blocks.append(struct_read) + read_size = 0 + cur_block = [] + continue + + if field.bits: + blocks.append(self.gen_read_block(read_size, cur_block)) + blocks.append( + 'r["{name}"] = bitreader.read(self.cstruct.{type_name}, {bits})'.format( + name=field.name, + type_name=field.type.name, + bits=field.bits + ) + ) + + read_size = 0 + cur_block = [] + prev_was_bits = True + continue + + if prev_was_bits: + blocks.append('bitreader.reset()') + prev_was_bits = False + + try: + count = len(field_type) + read_size += count + cur_block.append(field) + except TypeError: + if cur_block: + blocks.append(self.gen_read_block(read_size, cur_block)) + + blocks.append(self.gen_dynamic_block(field)) + read_size = 0 + cur_block = [] + + if len(cur_block): + blocks.append(self.gen_read_block(read_size, cur_block)) + + read_code = '\n\n'.join(blocks) + read_code = '\n'.join([' ' * 2 + line for line in read_code.split('\n')]) + + classes.append( + self.COMPILE_TEMPLATE.format( + name=name, + read_code=read_code + ) + ) + return '\n\n'.join(classes) + + def gen_read_block(self, size, block): + template = ( + 'buf = stream.read({size})\n' + 'if len(buf) != {size}: raise EOFError()\n' + 'data = struct.unpack(self.cstruct.endian + "{{}}", buf)\n' + '{{}}'.format(size=size) + ) + + read_code = [] + fmt = [] + + cur_type = None + cur_count = 0 + + buf_offset = 0 + data_offset = 0 + + for field in block: + field_type = self.cstruct.resolve(field.type) + read_type = field_type + + count = 1 + data_count = 1 + + if isinstance(read_type, (Enum, Flag)): + read_type = read_type.type + elif isinstance(read_type, Pointer): + read_type = self.cstruct.pointer + + if isinstance(field_type, Array): + count = read_type.count + data_count = count + read_type = read_type.type + + if isinstance(read_type, (Enum, Flag)): + read_type = read_type.type + elif isinstance(read_type, Pointer): + read_type = self.cstruct.pointer + + if isinstance(read_type, (CharType, WcharType, BytesInteger)): + read_slice = '{}:{}'.format( + buf_offset, buf_offset + (count * read_type.size) + ) + else: + read_slice = '{}:{}'.format(data_offset, data_offset + count) + elif isinstance(read_type, CharType): + read_slice = f'{buf_offset}:{buf_offset + 1}' + elif isinstance(read_type, (WcharType, BytesInteger)): + read_slice = '{}:{}'.format(buf_offset, buf_offset + read_type.size) + else: + read_slice = str(data_offset) + + if not cur_type: + if isinstance(read_type, PackedType): + cur_type = read_type.packchar + else: + cur_type = 'x' + + if isinstance(read_type, (PackedType, CharType, WcharType, BytesInteger, Enum, Flag)): + char_count = count + + if isinstance(read_type, (CharType, WcharType, BytesInteger)): + data_count = 0 + pack_char = 'x' + char_count *= read_type.size + else: + pack_char = read_type.packchar + + if cur_type != pack_char: + fmt.append('{}{}'.format(cur_count, cur_type)) + cur_count = 0 + + cur_count += char_count + cur_type = pack_char + + if isinstance(read_type, BytesInteger): + getter = 'BytesInteger.parse(buf[{slice}], {size}, {count}, {signed}, self.cstruct.endian){data_slice}' + + getter = getter.format( + slice=read_slice, + size=read_type.size, + count=count, + signed=read_type.signed, + data_slice='[0]' if count == 1 else '', + ) + elif isinstance(read_type, (CharType, WcharType)): + getter = 'buf[{}]'.format(read_slice) + + if isinstance(read_type, WcharType): + getter += ".decode('utf-16-le' if self.cstruct.endian == '<' else 'utf-16-be')" + else: + getter = 'data[{}]'.format(read_slice) + + if isinstance(field_type, (Enum, Flag)): + getter = '{enum_type}Instance(self.cstruct.{type_name}, {getter})'.format( + enum_type=field_type.__class__.__name__, + type_name=field_type.name, + getter=getter + ) + elif isinstance(field_type, Array) and isinstance(field_type.type, (Enum, Flag)): + getter = '[{enum_type}Instance(self.cstruct.{type_name}, d) for d in {getter}]'.format( + enum_type=field_type.type.__class__.__name__, + type_name=field_type.type.name, + getter=getter + ) + elif isinstance(field_type, Pointer): + getter = 'PointerInstance(self.cstruct.{type_name}, stream, {getter}, r)'.format( + type_name=field_type.type.name, + getter=getter + ) + elif isinstance(field_type, Array) and isinstance(field_type.type, Pointer): + getter = '[PointerInstance(self.cstruct.{type_name}, stream, d, r) for d in {getter}]'.format( + type_name=field_type.type.name, + getter=getter + ) + elif isinstance(field_type, Array) and isinstance(read_type, PackedType): + getter = 'list({})'.format(getter) + + read_code.append( + 'r["{name}"] = {getter}'.format(name=field.name, getter=getter) + ) + read_code.append( + 'sizes["{name}"] = {size}'.format(name=field.name, size=count * read_type.size) + ) + + data_offset += data_count + buf_offset += count * read_type.size + + if cur_count: + fmt.append('{}{}'.format(cur_count, cur_type)) + + return template.format(''.join(fmt), '\n'.join(read_code)) + + def gen_dynamic_block(self, field): + if not isinstance(field.type, Array): + raise TypeError(f"Only Array can be dynamic, got {field.type!r}") + + field_type = self.cstruct.resolve(field.type.type) + reader = None + + if isinstance(field_type, (Enum, Flag)): + field_type = field_type.type + + if not field.type.count: # Null terminated + if isinstance(field_type, PackedType): + reader = ( + 't = []\n' + 'while True:\n' + ' d = stream.read({size})\n' + ' if len(d) != {size}: raise EOFError()\n' + ' v = struct.unpack(self.cstruct.endian + "{packchar}", d)[0]\n' + ' if v == 0: break\n' + ' t.append(v)'.format(size=field_type.size, packchar=field_type.packchar) + ) + + elif isinstance(field_type, (CharType, WcharType)): + reader = ( + 't = []\n' + 'while True:\n' + ' c = stream.read({size})\n' + ' if len(c) != {size}: raise EOFError()\n' + ' if c == b"{null}": break\n' + ' t.append(c)\n' + 't = b"".join(t)'.format(size=field_type.size, null='\\x00' * field_type.size) + ) + + if isinstance(field_type, WcharType): + reader += ".decode('utf-16-le' if self.cstruct.endian == '<' else 'utf-16-be')" + elif isinstance(field_type, BytesInteger): + reader = ( + 't = []\n' + 'while True:\n' + ' d = stream.read({size})\n' + ' if len(d) != {size}: raise EOFError()\n' + ' v = BytesInteger.parse(d, {size}, 1, {signed}, self.cstruct.endian)\n' + ' if v == 0: break\n' + ' t.append(v)'.format(size=field_type.size, signed=field_type.signed) + ) + + if isinstance(field_type, (Enum, Flag)): + reader += '\nt = [{enum_type}Instance(self.cstruct.{type_name}, d) for d in t]'.format( + enum_type=field_type.__class__.__name__, + type_name=field_type.name + ) + + if not reader: + raise TypeError(f"Couldn't compile a reader for array {field!r}, {field_type!r}.") + + return 's = stream.tell()\n{reader}\nr["{name}"]' \ + ' = t\nsizes["{name}"] = stream.tell() - s'.format(reader=reader, name=field.name) + + expr = field.type.count.expression + expr_read = ( + 'dynsize = max(0, Expression(self.cstruct, "{expr}").evaluate(r))\n' + 'buf = stream.read(dynsize * {type_size})\n' + 'if len(buf) != dynsize * {type_size}: raise EOFError()\n' + 'r["{name}"] = {{reader}}\n' + 'sizes["{name}"] = dynsize * {type_size}'.format(expr=expr, name=field.name, type_size=field_type.size) + ) + + if isinstance(field_type, PackedType): + reader = 'list(struct.unpack(self.cstruct.endian + "{{:d}}{packchar}".format(dynsize), buf))'.format( + packchar=field_type.packchar, + ) + elif isinstance(field_type, (CharType, WcharType)): + reader = 'buf' + if isinstance(field_type, WcharType): + reader += ".decode('utf-16-le' if self.cstruct.endian == '<' else 'utf-16-be')" + elif isinstance(field_type, BytesInteger): + reader = 'BytesInteger.parse(buf, {size}, dynsize, {signed}, self.cstruct.endian)'.format( + size=field_type.size, + signed=field_type.signed + ) + + if isinstance(field_type, (Enum, Flag)): + reader += '[{enum_type}Instance(self.cstruct.{type_name}, d) for d in {reader}]'.format( + enum_type=field_type.__class__.__name__, + type_name=field_type.name, + reader=reader + ) + + return expr_read.format(reader=reader, size=None) diff --git a/dissect/cstruct/cstruct.py b/dissect/cstruct/cstruct.py index 3314e40..71842b4 100644 --- a/dissect/cstruct/cstruct.py +++ b/dissect/cstruct/cstruct.py @@ -1,118 +1,21 @@ -# Copyright (c) 2018 Fox-IT Security Research Team -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - # TODO: # - Rework definition parsing, maybe pycparser? # - Change expression implementation # - Lazy reading? from __future__ import print_function -import re -import sys -import ast -import pprint -import string -import struct import ctypes as _ctypes -from io import BytesIO -from collections import OrderedDict - -try: - from builtins import bytes as newbytes -except ImportError: - newbytes = bytes - -PY3 = sys.version_info > (3,) -if PY3: - long = int - xrange = range - -DEBUG = False - -COLOR_RED = '\033[1;31m' -COLOR_GREEN = '\033[1;32m' -COLOR_YELLOW = '\033[1;33m' -COLOR_BLUE = '\033[1;34m' -COLOR_PURPLE = '\033[1;35m' -COLOR_CYAN = '\033[1;36m' -COLOR_WHITE = '\033[1;37m' -COLOR_NORMAL = '\033[1;0m' - -COLOR_BG_RED = '\033[1;41m\033[1;37m' -COLOR_BG_GREEN = '\033[1;42m\033[1;37m' -COLOR_BG_YELLOW = '\033[1;43m\033[1;37m' -COLOR_BG_BLUE = '\033[1;44m\033[1;37m' -COLOR_BG_PURPLE = '\033[1;45m\033[1;37m' -COLOR_BG_CYAN = '\033[1;46m\033[1;37m' -COLOR_BG_WHITE = '\033[1;47m\033[1;30m' - -PRINTABLE = string.digits + string.ascii_letters + string.punctuation + " " - -COMPILE_TEMPL = """ -class {name}(Structure): - def __init__(self, cstruct, structure, source=None): - self.structure = structure - self.source = source - super({name}, self).__init__(cstruct, structure.name, structure.fields) - - def _read(self, stream): - r = OrderedDict() - sizes = {{}} - bitreader = BitBuffer(stream, self.cstruct.endian) - -{read_code} - - return Instance(self, r, sizes) - - def add_fields(self, name, type_, offset=None): - raise NotImplementedError("Can't add fields to a compiled structure") - - def __repr__(self): - return '' -""" - - -class Error(Exception): - pass - - -class ParserError(Error): - pass - - -class CompilerError(Error): - pass - - -class ResolveError(Error): - pass - - -class NullPointerDereference(Error): - pass - - -def log(line, *args, **kwargs): - if not DEBUG: - return +import sys - print(line.format(*args, **kwargs), file=sys.stderr) +from io import BytesIO +from dissect.cstruct.exceptions import ResolveError +from dissect.cstruct.types.base import Array +from dissect.cstruct.types.bytesinteger import BytesInteger +from dissect.cstruct.types.chartype import CharType +from dissect.cstruct.types.packedtype import PackedType +from dissect.cstruct.types.pointer import Pointer +from dissect.cstruct.types.voidtype import VoidType +from dissect.cstruct.types.wchartype import WcharType +from dissect.cstruct.parser import CStyleParser, TokenParser class cstruct(object): @@ -124,32 +27,39 @@ class cstruct(object): """ DEF_CSTYLE = 1 + DEF_LEGACY = 2 - def __init__(self, endian='<', pointer='uint64'): + def __init__(self, endian='<', pointer=None, align=None): self.endian = endian self.consts = {} self.lookups = {} self.typedefs = { - 'byte': 'int8', - 'ubyte': 'uint8', + 'BYTE': 'int8', + 'UBYTE': 'uint8', + 'UCHAR': 'uint8', 'uchar': 'uint8', + 'SHORT': 'int16', 'short': 'int16', + 'USHORT': 'uint16', 'ushort': 'uint16', + 'LONG': 'int32', 'long': 'int32', + 'ULONG': 'uint32', 'ulong': 'uint32', - 'ulong64': 'uint64', + 'ULONG64': 'uint64', 'u1': 'uint8', 'u2': 'uint16', 'u4': 'uint32', 'u8': 'uint64', - 'word': 'uint16', - 'dword': 'uint32', + 'WORD': 'uint16', + 'DWORD': 'uint32', + 'QWORD': 'uint64', - 'longlong': 'int64', - 'ulonglong': 'uint64', + 'LONGLONG': 'int64', + 'ULONGLONG': 'uint64', 'int': 'int32', 'unsigned int': 'uint32', @@ -175,26 +85,46 @@ def __init__(self, endian='<', pointer='uint64'): 'void': VoidType(), } + pointer = pointer or 'uint64' if sys.maxsize > 2 ** 32 else 'uint32' self.pointer = self.resolve(pointer) + self.align = align + self._anonymous_count = 0 + + def __getattr__(self, attr): + try: + return self.typedefs[attr] + except KeyError: + pass + + try: + return self.consts[attr] + except KeyError: + pass - def addtype(self, name, t, replace=False): + raise AttributeError("Invalid attribute: %s" % attr) + + def _next_anonymous(self): + name = 'anonymous_{:d}'.format(self._anonymous_count) + self._anonymous_count += 1 + return name + + def addtype(self, name, type_, replace=False): """Add a type or type reference. Args: name: Name of the type to be added. - t: The type to be added. Can be a str reference to another type + type_: The type to be added. Can be a str reference to another type or a compatible type class. Raises: ValueError: If the type already exists. """ - name = name.lower() - if not replace and name.lower() in self.typedefs: + if not replace and name in self.typedefs: raise ValueError("Duplicate type: %s" % name) - self.typedefs[name] = t + self.typedefs[name] = type_ - def load(self, s, deftype=None, **kwargs): + def load(self, definition, deftype=None, **kwargs): """Parse structures from the given definitions using the given definition type. Definitions can be parsed using different parsers. Currently, there's @@ -202,29 +132,33 @@ def load(self, s, deftype=None, **kwargs): modify this cstruct instance. Arguments can be passed to parsers using kwargs. + The CSTYLE parser was recently replaced with token based parser, + instead of a strictly regex based one. The old parser is still available + by using DEF_LEGACY. + Args: - s: The definition to parse. + definition: The definition to parse. deftype: The definition type to parse the definitions with. **kwargs: Keyword arguments for parsers. """ deftype = deftype or cstruct.DEF_CSTYLE if deftype == cstruct.DEF_CSTYLE: - parser = CStyleParser(self, **kwargs) + TokenParser(self, **kwargs).parse(definition) + elif deftype == cstruct.DEF_LEGACY: + CStyleParser(self, **kwargs).parse(definition) - parser.parse(s) - - def loadfile(self, s, deftype=None, **kwargs): + def loadfile(self, path, deftype=None, **kwargs): """Load structure definitions from a file. The given path will be read and parsed using the .load() function. Args: - s: The path to load definitions from. + path: The path to load definitions from. deftype: The definition type to parse the definitions with. **kwargs: Keyword arguments for parsers. """ - with open(s, 'r') as fh: + with open(path) as fh: self.load(fh.read(), deftype, **kwargs) def read(self, name, s): @@ -254,253 +188,56 @@ def resolve(self, name): Raises: ResolveError: If the type can't be resolved. """ - t = name - if not isinstance(t, str): - return t + type_name = name + if not isinstance(type_name, str): + return type_name - for i in xrange(10): - if t.lower() not in self.typedefs: + for _ in range(10): + if type_name not in self.typedefs: raise ResolveError("Unknown type %s" % name) - t = self.typedefs[t.lower()] + type_name = self.typedefs[type_name] - if not isinstance(t, str): - return t + if not isinstance(type_name, str): + return type_name raise ResolveError("Recursion limit exceeded while resolving type %s" % name) - def __getattr__(self, attr): - if attr.lower() in self.typedefs: - return self.typedefs[attr.lower()] - - if attr in self.consts: - return self.consts[attr] - - raise AttributeError("Invalid Attribute: %s" % attr) - - -class Parser(object): - """Base class for definition parsers. - - Args: - cstruct: An instance of cstruct. - """ - - def __init__(self, cstruct): - self.cstruct = cstruct - - def parse(self, data): - """This function should parse definitions to cstruct types. - - Args: - data: Data to parse definitions from, usually a string. - """ - raise NotImplementedError() - - -class CStyleParser(Parser): - """Definition parser for C-like structure syntax. - - Args: - cstruct: An instance of cstruct - compiled: Whether structs should be compiled or not. - """ - - def __init__(self, cstruct, compiled=True): - self.compiled = compiled - super(CStyleParser, self).__init__(cstruct) - - # TODO: Implement proper parsing - def parse(self, data): - self._constants(data) - self._enums(data) - self._structs(data) - self._lookups(data, self.cstruct.consts) - - def _constants(self, data): - r = re.finditer(r'#define\s+(?P[^\s]+)\s+(?P[^\r\n]+)\s*\n', data) - for t in r: - d = t.groupdict() - v = d['value'].rsplit('//')[0] - - try: - v = ast.literal_eval(v) - except (ValueError, SyntaxError): - pass - - self.cstruct.consts[d['name']] = v - - def _enums(self, data): - r = re.finditer( - r'enum\s+(?P[^\s:{]+)\s*(:\s*(?P[^\s]+)\s*)?\{(?P[^}]+)\}\s*;', - data, - ) - for t in r: - d = t.groupdict() - - nextval = 0 - values = {} - for line in d['values'].split('\n'): - line, sep, comment = line.partition("//") - for v in line.split(","): - key, sep, val = v.partition("=") - key = key.strip() - val = val.strip() - if not key: - continue - if not val: - val = nextval - else: - val = Expression(self.cstruct, val).evaluate({}) - - nextval = val + 1 - - values[key] = val - - if not d['type']: - d['type'] = 'uint32' - - enum = Enum( - self.cstruct, d['name'], self.cstruct.resolve(d['type']), values - ) - self.cstruct.addtype(enum.name, enum) - - def _structs(self, data): - compiler = Compiler(self.cstruct) - r = re.finditer( - r'(#(?P(?:compile))\s+)?((?Ptypedef)\s+)?(?P[^\s]+)\s+(?P[^\s]+)?(?P\s*\{[^}]+\}(?P\s+[^;\n]+)?)?\s*;', - data, - ) - for t in r: - d = t.groupdict() - - if d['name']: - name = d['name'] - elif d['defs']: - name = d['defs'].strip().split(',')[0].strip() - else: - raise ParserError("No name for struct") - - if d['type'] == 'struct': - data = self._parse_fields(d['fields'][1:-1].strip()) - st = Structure(self.cstruct, name, data) - if d['flags'] == 'compile' or self.compiled: - st = compiler.compile(st) - elif d['typedef'] == 'typedef': - st = d['type'] - else: - continue - - if d['name']: - self.cstruct.addtype(d['name'], st) - - if d['defs']: - for td in d['defs'].strip().split(','): - td = td.strip() - self.cstruct.addtype(td, st) - - def _parse_fields(self, s): - fields = re.finditer( - r'(?P[^\s]+)\s+(?P[^\s\[:]+)(\s*:\s*(?P\d+))?(\[(?P[^;\n]*)\])?;', - s, - ) - r = [] - for f in fields: - d = f.groupdict() - if d['type'].startswith('//'): - continue - - type_ = self.cstruct.resolve(d['type']) - - d['name'] = d['name'].replace('(', '').replace(')', '') - - # Maybe reimplement lazy type references later - # _type = TypeReference(self, d['type']) - if d['count'] is not None: - if d['count'] == '': - count = None - else: - count = Expression(self.cstruct, d['count']) - try: - count = count.evaluate() - except Exception: - pass - - type_ = Array(self.cstruct, type_, count) - - if d['name'].startswith('*'): - d['name'] = d['name'][1:] - type_ = Pointer(self.cstruct, type_) - - field = Field(d['name'], type_, int(d['bits']) if d['bits'] else None) - r.append(field) - - return r - - def _lookups(self, data, consts): - r = re.finditer(r'\$(?P[^\s]+) = ({[^}]+})\w*\n', data) - - for t in r: - d = ast.literal_eval(t.group(2)) - self.cstruct.lookups[t.group(1)] = dict( - [(self.cstruct.consts[k], v) for k, v in d.items()] - ) - class Instance(object): """Holds parsed structure data.""" + __slots__ = ('_type', '_values', '_sizes') def __init__(self, type_, values, sizes=None): object.__setattr__(self, '_type', type_) object.__setattr__(self, '_values', values) object.__setattr__(self, '_sizes', sizes) - def write(self, fh): - """Write this structure to a writable file-like object. - - Args: - fh: File-like objects that supports writing. - - Returns: - The amount of bytes written. - """ - return self.__dict__['_type'].write(fh, self) - - def dumps(self): - """Dump this structure to a byte string. - - Returns: - The raw bytes of this structure. - """ - s = BytesIO() - self.write(s) - return s.getvalue() - def __getattr__(self, attr): - if attr not in self.__dict__['_type'].lookup: + try: + return self._values[attr] + except KeyError: raise AttributeError("Invalid attribute: %r" % attr) - return self.__dict__['_values'][attr] - def __setattr__(self, attr, value): - if attr not in self.__dict__['_type'].lookup: + if attr not in self._type.lookup: raise AttributeError("Invalid attribute: %r" % attr) - self.__dict__['_values'][attr] = value + self._values[attr] = value def __getitem__(self, item): - return self.__dict__['_values'][item] + return self._values[item] def __contains__(self, attr): - return attr in self.__dict__['_values'] + return attr in self._values def __repr__(self): return '<%s %s>' % ( - self.__dict__['_type'].name, + self._type.name, ', '.join( [ - '%s=%s' % (k, hex(v) if isinstance(v, (int, long)) else repr(v)) - for k, v in self.__dict__['_values'].items() + '%s=%s' % (k, hex(v) if isinstance(v, (int, int)) else repr(v)) + for k, v in self._values.items() ] ), ) @@ -509,915 +246,28 @@ def __len__(self): return len(self.dumps()) def _size(self, field): - return self.__dict__['_sizes'][field] - - -class PointerInstance(object): - """Like the Instance class, but for structures referenced by a pointer.""" - - def __init__(self, t, stream, addr, ctx): - self._stream = stream - self._type = t - self._addr = addr - self._ctx = ctx - self._value = None - - def _get(self): - log("Dereferencing pointer -> 0x{:016x} [{!r}]", self._addr, self._stream) - if self._addr == 0: - raise NullPointerDereference() - - if self._value is None: - pos = self._stream.tell() - self._stream.seek(self._addr) - if isinstance(self._type, Array): - r = self._type._read(self._stream, self._ctx) - else: - r = self._type._read(self._stream) - self._stream.seek(pos) - self._value = r - - return self._value - - def __getattr__(self, attr): - return getattr(self._get(), attr) - - def __str__(self): - return str(self._get()) - - def __nonzero__(self): - return self._addr != 0 - - def __repr__(self): - return "".format(self._type, self._addr) - - -class Expression(object): - """Expression parser for simple calculations in definitions.""" - - operators = [ - ('+', lambda a, b: a + b), - ('-', lambda a, b: a - b), - ('*', lambda a, b: a * b), - ('/', lambda a, b: a / b), - ('&', lambda a, b: a & b), - ('|', lambda a, b: a | b), - ('>>', lambda a, b: a >> b), - ('<<', lambda a, b: a << b), - ] - - def __init__(self, cstruct, expr): - self.cstruct = cstruct - self.expr = expr - - def evaluate(self, context=None): - context = context if context else {} - level = 0 - levels = [] - buf = '' - - for i in xrange(len(self.expr)): - if self.expr[i] == '(': - level += 1 - levels.append(buf) - buf = '' - continue - - if self.expr[i] == ')': - level -= 1 - val = self.evaluate_part(buf, context) - buf = levels.pop() - buf += str(val) - continue - - buf += self.expr[i] - - return self.evaluate_part(buf, context) - - def evaluate_part(self, e, v): - e = e.strip() - - for o in self.operators: - if o[0] in e: - a, b = e.rsplit(o[0], 1) - return o[1](self.evaluate_part(a, v), self.evaluate_part(b, v)) - - if e in v: - return v[e] - - if e.startswith('0x'): - return int(e, 16) - - if e in self.cstruct.consts: - return self.cstruct.consts[e] - - return int(e) - - def __repr__(self): - return self.expr - - -class BaseType(object): - """Base class for cstruct type classes.""" - - def __init__(self, cstruct): - self.cstruct = cstruct - - def reads(self, data): - """Parse the given data according to the type that implements this class. - - Args: - data: Byte string to parse. - - Returns: - The parsed value of this type. - """ - data = BytesIO(data) - return self._read(data) - - def dumps(self, data): - """Dump the given data according to the type that implements this class. - - Args: - data: Data to dump. - - Returns: - The resulting bytes. - """ - out = BytesIO() - self._write(out, data) - return out.getvalue() - - def read(self, obj, *args, **kwargs): - """Parse the given data according to the type that implements this class. + return self._sizes[field] - Args: - obj: Data to parse. Can be a (byte) string or a file-like object. - - Returns: - The parsed value of this type. - """ - if isinstance(obj, (str, bytes, newbytes)): - return self.reads(obj) - - return self._read(obj) - - def write(self, stream, data): - """Write the given data to a writable file-like object according to the - type that implements this class. + def write(self, fh): + """Write this structure to a writable file-like object. Args: - stream: Writable file-like object to write to. - data: Data to write. + fh: File-like objects that supports writing. Returns: The amount of bytes written. """ - return self._write(stream, data) - - def _read(self, stream): - raise NotImplementedError() - - def _read_array(self, stream, count): - return [self._read(stream) for i in xrange(count)] - - def _read_0(self, stream): - raise NotImplementedError() - - def _write(self, stream, data): - raise NotImplementedError() - - def _write_array(self, stream, data): - num = 0 - for i in data: - num += self._write(stream, i) - return num - - def _write_0(self, stream, data): - raise NotImplementedError() - - def default(self): - """Return a default value of this type.""" - raise NotImplementedError() - - def default_array(self): - """Return a default array of this type.""" - raise NotImplementedError() - - def __getitem__(self, count): - return Array(self.cstruct, self, count) - - def __call__(self, *args, **kwargs): - if len(args) > 0: - return self.read(*args, **kwargs) - - r = self.default() - if kwargs: - for k, v in kwargs.items(): - setattr(r, k, v) - - return r - - -class RawType(BaseType): - """Base class for raw types that have a name and size.""" - - def __init__(self, cstruct, name=None, size=0): - self.name = name - self.size = size - super(RawType, self).__init__(cstruct) - - def __len__(self): - return self.size - - def __repr__(self): - if self.name: - return self.name - - return BaseType.__repr__(self) - - -class Structure(BaseType): - """Type class for structures.""" - - def __init__(self, cstruct, name, fields=None): - self.name = name - self.size = None - self.lookup = OrderedDict() - self.fields = fields if fields else [] - - for f in self.fields: - self.lookup[f.name] = f - - self._calc_offsets() - super(Structure, self).__init__(cstruct) - - def _calc_offsets(self): - offset = 0 - bitstype = None - bitsremaining = 0 - - for field in self.fields: - if field.bits: - if bitsremaining == 0 or field.type != bitstype: - bitstype = field.type - bitsremaining = bitstype.size * 8 - if offset is not None: - field.offset = offset - offset += bitstype.size - else: - field.offset = None - - bitsremaining -= field.bits - continue - - field.offset = offset - if offset is not None: - try: - offset += len(field.type) - except TypeError: - offset = None - - def _calc_size(self): - size = 0 - bitstype = None - bitsremaining = 0 - - for field in self.fields: - if field.bits: - if bitsremaining == 0 or field.type != bitstype: - bitstype = field.type - bitsremaining = bitstype.size * 8 - size += bitstype.size - - bitsremaining -= field.bits - continue - - fieldlen = len(field.type) - size += fieldlen - - if field.offset is not None: - size = max(size, field.offset + fieldlen) - - return size - - def _read(self, stream, *args, **kwargs): - log("[Structure::read] {} {}", self.name, self.size) - bitbuffer = BitBuffer(stream, self.cstruct.endian) - - struct_start = stream.tell() - - r = OrderedDict() - sizes = {} - for field in self.fields: - start = stream.tell() - ft = self.cstruct.resolve(field.type) - - if field.offset: - if start != struct_start + field.offset: - log( - "+ seeking to 0x{:x}+0x{:x} for {}".format( - struct_start, field.offset, field.name - ) - ) - stream.seek(struct_start + field.offset) - start = struct_start + field.offset - - if field.bits: - r[field.name] = bitbuffer.read(ft, field.bits) - continue - else: - bitbuffer.reset() - - if isinstance(ft, (Array, Pointer)): - v = ft._read(stream, r) - else: - v = ft._read(stream) - - sizes[field.name] = stream.tell() - start - r[field.name] = v - - return Instance(self, r, sizes) - - def _write(self, stream, data): - bitbuffer = BitBuffer(stream, self.cstruct.endian) - num = 0 - - for field in self.fields: - if field.bits: - bitbuffer.write(field.type, getattr(data, field.name), field.bits) - continue - - if bitbuffer._type: - bitbuffer.flush() - - num += field.type._write(stream, getattr(data, field.name)) - - # Flush bitbuffer - if bitbuffer._type: - bitbuffer.flush() - - return num - - def add_field(self, name, type_, offset=None): - """Add a field to this structure. - - Args: - name: The field name. - type_: The field type. - offset: The field offset. - """ - field = Field(name, type_, offset=offset) - self.fields.append(field) - self.lookup[name] = field - self.size = None - setattr(self, name, field) + return self._type.write(fh, self) - def default(self): - """Create and return an empty Instance from this structure. + def dumps(self): + """Dump this structure to a byte string. Returns: - An empty Instance from this structure. + The raw bytes of this structure. """ - r = OrderedDict() - for field in self.fields: - r[field.name] = field.type.default() - - return Instance(self, r) - - def __len__(self): - if self.size is None: - self.size = self._calc_size() - - return self.size - - def __repr__(self): - return ''.format(self.name) - - def show(self, indent=0): - """Pretty print this structure.""" - if indent == 0: - print("struct {}".format(self.name)) - - for field in self.fields: - if field.offset is None: - offset = '0x??' - else: - offset = '0x{:02x}'.format(field.offset) - - print("{}+{} {} {}".format(' ' * indent, offset, field.name, field.type)) - - if isinstance(field.type, Structure): - field.type.show(indent + 1) - - -class BitBuffer(object): - """Implements a bit buffer that can read and write bit fields.""" - - def __init__(self, stream, endian): - self.stream = stream - self.endian = endian - - self._type = None - self._buffer = 0 - self._remaining = 0 - - def read(self, field_type, bits): - if self._remaining < 1 or self._type != field_type: - self._type = field_type - self._remaining = field_type.size * 8 - self._buffer = field_type._read(self.stream) - - if self.endian != '>': - v = self._buffer & ((1 << bits) - 1) - self._buffer >>= bits - self._remaining -= bits - else: - v = self._buffer & ( - ((1 << (self._remaining - bits)) - 1) ^ ((1 << self._remaining) - 1) - ) - v >>= self._remaining - bits - self._remaining -= bits - - return v - - def write(self, field_type, data, bits): - if self._remaining == 0: - self._remaining = field_type.size * 8 - self._type = field_type - - if self.endian != '>': - self._buffer |= data << (self._type.size * 8 - self._remaining) - else: - self._buffer |= data << (self._remaining - bits) - - self._remaining -= bits - - def flush(self): - self._type._write(self.stream, self._buffer) - self._type = None - self._remaining = 0 - self._buffer = 0 - - def reset(self): - self._type = None - self._buffer = 0 - self._remaining = 0 - - -class Field(object): - """Holds a structure field.""" - - def __init__(self, name, type_, bits=None, offset=None): - self.name = name - self.type = type_ - self.bits = bits - self.offset = offset - - def __repr__(self): - return ''.format(self.name, self.type) - - -class Array(BaseType): - """Implements a fixed or dynamically sized array type. - - Example: - When using the default C-style parser, the following syntax is supported: - - x[3] -> 3 -> static length. - x[] -> None -> null-terminated. - x[expr] -> expr -> dynamic length. - """ - - def __init__(self, cstruct, type_, count): - self.type = type_ - self.count = count - self.dynamic = isinstance(self.count, Expression) or self.count is None - - super(Array, self).__init__(cstruct) - - def _read(self, stream, context=None): - if self.count is None: - return self.type._read_0(stream) - - if self.dynamic: - count = self.count.evaluate(context) - else: - count = self.count - - return self.type._read_array(stream, max(0, count)) - - def _write(self, f, data): - if self.count is None: - return self.type._write_0(f, data) - - return self.type._write_array(f, data) - - def default(self): - if self.dynamic or self.count is None: - return [] - - return [self.type.default() for i in xrange(self.count)] - - def __repr__(self): - if self.count is None: - return '{0!r}[]'.format(self.type) - - return '{0!r}[{1}]'.format(self.type, self.count) - - def __len__(self): - if self.dynamic: - raise TypeError("Dynamic size") - - return len(self.type) * self.count - - -class PackedType(RawType): - """Implements a packed type that uses Python struct packing characters.""" - - def __init__(self, cstruct, name, size, packchar): - self.packchar = packchar - super(PackedType, self).__init__(cstruct, name, size) - - def _read(self, stream): - return self._read_array(stream, 1)[0] - - def _read_array(self, stream, count): - length = self.size * count - data = stream.read(length) - fmt = self.cstruct.endian + str(count) + self.packchar - if len(data) != length: - raise EOFError("Read %d bytes, but expected %d" % (len(data), length)) - - return list(struct.unpack(fmt, data)) - - def _read_0(self, stream): - r = [] - while True: - d = stream.read(self.size) - v = struct.unpack(self.cstruct.endian + self.packchar, d)[0] - - if v == 0: - break - - r.append(v) - - return r - - def _write(self, stream, data): - return self._write_array(stream, [data]) - - def _write_array(self, stream, data): - fmt = self.cstruct.endian + str(len(data)) + self.packchar - return stream.write(struct.pack(fmt, *data)) - - def _write_0(self, stream, data): - return self._write_array(stream, data + [0]) - - def default(self): - return 0 - - def default_array(self, count): - return [0] * count - - -class CharType(RawType): - """Implements a character type that can properly handle strings.""" - - def __init__(self, cstruct): - super(CharType, self).__init__(cstruct, 'char', 1) - - def _read(self, stream): - return stream.read(1) - - def _read_array(self, stream, count): - if count == 0: - return b'' - - return stream.read(count) - - def _read_0(self, stream): - r = [] - while True: - c = stream.read(1) - if c == b'': - raise EOFError() - - if c == b'\x00': - break - - r.append(c) - - return b''.join(r) - - def _write(self, stream, data): - if isinstance(data, int): - data = chr(data) - - if PY3 and isinstance(data, str): - data = data.encode('latin-1') - - return stream.write(data) - - def _write_array(self, stream, data): - return self._write(stream, data) - - def _write_0(self, stream, data): - return self._write(stream, data + b'\x00') - - def default(self): - return b'\x00' - - def default_array(self, count): - return b'\x00' * count - - -class WcharType(RawType): - """Implements a wide-character type.""" - - def __init__(self, cstruct): - super(WcharType, self).__init__(cstruct, 'wchar', 2) - - @property - def encoding(self): - if self.cstruct.endian == '<': - return 'utf-16-le' - elif self.cstruct.endian == '>': - return 'utf-16-be' - - def _read(self, stream): - return stream.read(2).decode(self.encoding) - - def _read_array(self, stream, count): - if count == 0: - return u'' - - data = stream.read(2 * count) - return data.decode(self.encoding) - - def _read_0(self, stream): - r = b'' - while True: - c = stream.read(2) - - if len(c) != 2: - raise EOFError() - - if c == b'\x00\x00': - break - - r += c - - return r.decode(self.encoding) - - def _write(self, stream, data): - return stream.write(data.encode(self.encoding)) - - def _write_array(self, stream, data): - return self._write(stream, data) - - def _write_0(self, stream, data): - return self._write(stream, data + u'\x00') - - def default(self): - return u'\x00' - - def default_array(self, count): - return u'\x00' * count - - -class BytesInteger(RawType): - """Implements an integer type that can span an arbitrary amount of bytes.""" - - def __init__(self, cstruct, name, size, signed): - self.signed = signed - super(BytesInteger, self).__init__(cstruct, name, size) - - @staticmethod - def parse(buf, size, count, signed, endian): - nums = [] - - for c in xrange(count): - num = 0 - data = buf[c * size:(c + 1) * size] - if endian == '<': - data = b''.join(data[i:i + 1] for i in reversed(xrange(len(data)))) - - ints = list(data) if PY3 else map(ord, data) - for i in ints: - num = (num << 8) | i - - if signed and num & 1 << (size * 8 - 1): - bias = 1 << (size * 8 - 1) - num -= bias * 2 - - nums.append(num) - - return nums - - @staticmethod - def pack(data, size, endian): - buf = [] - for i in data: - num = int(i) - if num < 0: - num += 1 << (size * 8) - - d = [b'\x00'] * size - i = size - 1 - - while i >= 0: - b = num & 255 - d[i] = bytes((b,)) if PY3 else chr(b) - num >>= 8 - i -= 1 - - if endian == '<': - d = b''.join(d[i:i + 1][0] for i in reversed(xrange(len(d)))) - else: - d = b''.join(d) - - buf.append(d) - - return b''.join(buf) - - def _read(self, stream): - return self.parse(stream.read(self.size * 1), self.size, 1, self.signed, self.cstruct.endian)[0] - - def _read_array(self, stream, count): - return self.parse(stream.read(self.size * count), self.size, count, self.signed, self.cstruct.endian) - - def _read_0(self, stream): - r = [] - while True: - v = self._read(stream) - if v == 0: - break - r.append(v) - - return r - - def _write(self, stream, data): - return stream.write(self.pack([data], self.size, self.cstruct.endian)) - - def _write_array(self, stream, data): - return stream.write(self.pack(data, self.size, self.cstruct.endian)) - - def _write_0(self, stream, data): - return self._write_array(stream, data + [0]) - - def default(self): - return 0 - - def default_array(self, count): - return [0] * count - - -class Enum(RawType): - """Implements an Enum type. - - Enums can be made using any type. The API for accessing enums and their - values is very similar to Python 3 native enums. - - Example: - When using the default C-style parser, the following syntax is supported: - - enum [: ] { - - }; - - For example, an enum that has A=1, B=5 and C=6 could be written like so: - - enum Test : uint16 { - A, B=5, C - }; - """ - - def __init__(self, cstruct, name, type_, values): - self.type = type_ - self.values = values - self.reverse = {} - - for k, v in values.items(): - self.reverse[v] = k - - super(Enum, self).__init__(cstruct, name, len(self.type)) - - def __call__(self, value): - return EnumInstance(self, value) - - def _read(self, stream): - v = self.type._read(stream) - return self(v) - - def _read_array(self, stream, count): - return list(map(self, self.type._read_array(stream, count))) - - def _read_0(self, stream): - return list(map(self, self.type._read_0(stream))) - - def _write(self, stream, data): - data = data.value if isinstance(data, EnumInstance) else data - return self.type._write(stream, data) - - def _write_array(self, stream, data): - data = [d.value if isinstance(d, EnumInstance) else d for d in data] - return self.type._write_array(stream, data) - - def _write_0(self, stream, data): - data = [d.value if isinstance(d, EnumInstance) else d for d in data] - return self.type._write_0(stream, data) - - def default(self): - return self(0) - - def __getitem__(self, attr): - if attr in self.values: - return self(self.values[attr]) - - raise KeyError(attr) - - def __getattr__(self, attr): - if attr in self.values: - return self(self.values[attr]) - - raise AttributeError(attr) - - def __contains__(self, attr): - return attr in self.values - - -class EnumInstance(object): - """Implements a value instance of an Enum""" - - def __init__(self, enum, value): - self.enum = enum - self.value = value - - @property - def name(self): - if self.value not in self.enum.reverse: - return '{}_{}'.format(self.enum.name, self.value) - return self.enum.reverse[self.value] - - def __eq__(self, value): - if isinstance(value, EnumInstance) and value.enum is not self.enum: - return False - - if hasattr(value, 'value'): - value = value.value - - return self.value == value - - def __ne__(self, value): - return self.__eq__(value) is False - - def __hash__(self): - return hash((self.enum, self.value)) - - def __str__(self): - return '{}.{}'.format(self.enum.name, self.name) - - def __repr__(self): - return '<{}.{}: {}>'.format(self.enum.name, self.name, self.value) - - -class Union(RawType): - def __init__(self, cstruct): - self.cstruct = cstruct - super(Union, self).__init__(cstruct) - - def _read(self, stream): - raise NotImplementedError() - - -class Pointer(RawType): - """Implements a pointer to some other type.""" - - def __init__(self, cstruct, target): - self.cstruct = cstruct - self.type = target - super(Pointer, self).__init__(cstruct) - - def _read(self, stream, ctx): - addr = self.cstruct.pointer(stream) - return PointerInstance(self.type, stream, addr, ctx) - - def __len__(self): - return len(self.cstruct.pointer) - - def __repr__(self): - return ''.format(self.type) - - -class VoidType(RawType): - """Implements a void type.""" - - def __init__(self): - super(VoidType, self).__init__(None, 'void', 0) - - def _read(self, stream): - return None + s = BytesIO() + self.write(s) + return s.getvalue() def ctypes(structure): @@ -1427,15 +277,15 @@ def ctypes(structure): t = ctypes_type(field.type) fields.append((field.name, t)) - tt = type(structure.name, (_ctypes.Structure, ), {"_fields_": fields}) + tt = type(structure.name, (_ctypes.Structure,), {'_fields_': fields}) return tt def ctypes_type(t): mapping = { - "I": _ctypes.c_ulong, - "i": _ctypes.c_long, - "b": _ctypes.c_int8, + 'I': _ctypes.c_ulong, + 'i': _ctypes.c_long, + 'b': _ctypes.c_int8, } if isinstance(t, PackedType): @@ -1453,442 +303,3 @@ def ctypes_type(t): return ctypes.POINTER(subtype) raise NotImplementedError("Type not implemented: %s" % t.__class__.__name__) - - -class Compiler(object): - """Compiler for cstruct structures. Creates somewhat optimized parsing code.""" - - def __init__(self, cstruct): - self.cstruct = cstruct - - def compile(self, structure): - source = self.gen_struct_class(structure) - c = compile(source, '', 'exec') - - env = { - 'OrderedDict': OrderedDict, - 'Structure': Structure, - 'Instance': Instance, - 'Expression': Expression, - 'EnumInstance': EnumInstance, - 'PointerInstance': PointerInstance, - 'BytesInteger': BytesInteger, - 'BitBuffer': BitBuffer, - 'struct': struct, - 'xrange': xrange, - } - - exec(c, env) - sc = env[structure.name](self.cstruct, structure, source) - - return sc - - def gen_struct_class(self, structure): - blocks = [] - classes = [] - cur_block = [] - read_size = 0 - prev_was_bits = False - - for field in structure.fields: - ft = self.cstruct.resolve(field.type) - - if not isinstance( - ft, - ( - Structure, - Pointer, - Enum, - Array, - PackedType, - CharType, - WcharType, - BytesInteger, - ), - ): - raise CompilerError("Unsupported type for compiler: {}".format(ft)) - - if isinstance(ft, Structure) or ( - isinstance(ft, Array) and isinstance(ft.type, Structure) - ): - if cur_block: - blocks.append(self.gen_read_block(read_size, cur_block)) - - struct_read = 's = stream.tell()\n' - if isinstance(ft, Array): - num = ft.count - - if isinstance(num, Expression): - num = 'max(0, Expression(self.cstruct, "{expr}").evaluate(r))'.format( - expr=num.expr - ) - - struct_read += ( - 'r["{name}"] = []\n' - 'for _ in xrange({num}):\n' - ' r["{name}"].append(self.cstruct.{struct_name}._read(stream))\n'.format( - name=field.name, num=num, struct_name=ft.type.name - ) - ) - else: - struct_read += 'r["{name}"] = self.cstruct.{struct_name}._read(stream)\n'.format( - name=field.name, struct_name=ft.name - ) - - struct_read += 'sizes["{name}"] = stream.tell() - s'.format( - name=field.name - ) - blocks.append(struct_read) - read_size = 0 - cur_block = [] - continue - - if field.bits: - if cur_block: - blocks.append(self.gen_read_block(read_size, cur_block)) - - blocks.append( - 'r["{name}"] = bitreader.read(self.cstruct.{type_name}, {bits})'.format( - name=field.name, type_name=field.type.name, bits=field.bits - ) - ) - read_size = 0 - cur_block = [] - prev_was_bits = True - continue - elif prev_was_bits: - blocks.append('bitreader.reset()') - prev_was_bits = False - - try: - count = len(ft) - read_size += count - cur_block.append(field) - except Exception: - if cur_block: - blocks.append(self.gen_read_block(read_size, cur_block)) - blocks.append(self.gen_dynamic_block(field)) - read_size = 0 - cur_block = [] - - if len(cur_block): - blocks.append(self.gen_read_block(read_size, cur_block)) - - read_code = '\n\n'.join(blocks) - read_code = '\n'.join([' ' * 2 + line for line in read_code.split('\n')]) - - classes.append(COMPILE_TEMPL.format(name=structure.name, read_code=read_code)) - return '\n\n'.join(classes) - - def gen_read_block(self, size, block): - templ = ( - 'buf = stream.read({size})\n' - 'if len(buf) != {size}: raise EOFError()\n' - 'data = struct.unpack(self.cstruct.endian + "{{}}", buf)\n' - '{{}}'.format(size=size) - ) - readcode = [] - fmt = [] - - curtype = None - curcount = 0 - - buf_offset = 0 - data_offset = 0 - - for field in block: - ft = self.cstruct.resolve(field.type) - t = ft - count = 1 - data_count = 1 - read_slice = '' - - if isinstance(t, Enum): - t = t.type - elif isinstance(t, Pointer): - t = self.cstruct.pointer - - if isinstance(ft, Array): - count = t.count - data_count = count - t = t.type - - if isinstance(t, Enum): - t = t.type - elif isinstance(t, Pointer): - t = self.cstruct.pointer - - if isinstance(t, (CharType, WcharType, BytesInteger)): - read_slice = '{}:{}'.format( - buf_offset, buf_offset + (count * t.size) - ) - else: - read_slice = '{}:{}'.format(data_offset, data_offset + count) - elif isinstance(t, CharType): - read_slice = str(buf_offset) - elif isinstance(t, (WcharType, BytesInteger)): - read_slice = '{}:{}'.format(buf_offset, buf_offset + t.size) - else: - read_slice = str(data_offset) - - if not curtype: - if isinstance(t, PackedType): - curtype = t.packchar - else: - curtype = 'x' - - if isinstance(t, (PackedType, CharType, WcharType, BytesInteger, Enum)): - charcount = count - - if isinstance(t, (CharType, WcharType, BytesInteger)): - data_count = 0 - packchar = 'x' - charcount *= t.size - else: - packchar = t.packchar - - if curtype != packchar: - fmt.append('{}{}'.format(curcount, curtype)) - curcount = 0 - - curcount += charcount - curtype = packchar - - getter = '' - if isinstance(t, BytesInteger): - getter = 'BytesInteger.parse(buf[{slice}], {size}, {count}, {signed}, self.cstruct.endian){data_slice}'.format( - slice=read_slice, - size=t.size, - count=count, - signed=t.signed, - data_slice='[0]' if count == 1 else '', - ) - elif isinstance(t, (CharType, WcharType)): - getter = 'buf[{}]'.format(read_slice) - if isinstance(t, WcharType): - getter += ".decode('utf-16-le' if self.cstruct.endian == '<' else 'utf-16-be')" - else: - getter = 'data[{}]'.format(read_slice) - - if isinstance(ft, Enum): - getter = 'EnumInstance(self.cstruct.{type_name}, {getter})'.format( - type_name=ft.name, getter=getter - ) - elif isinstance(ft, Array) and isinstance(ft.type, Enum): - getter = '[EnumInstance(self.cstruct.{type_name}, d) for d in {getter}]'.format( - type_name=ft.type.name, getter=getter - ) - elif isinstance(ft, Pointer): - getter = 'PointerInstance(self.cstruct.{type_name}, stream, {getter}, r)'.format( - type_name=ft.type.name, getter=getter - ) - elif isinstance(ft, Array) and isinstance(ft.type, Pointer): - getter = '[PointerInstance(self.cstruct.{type_name}, stream, d, r) for d in {getter}]'.format( - type_name=ft.type.name, getter=getter - ) - elif isinstance(ft, Array) and isinstance(t, PackedType): - getter = 'list({})'.format(getter) - - readcode.append( - 'r["{name}"] = {getter}'.format(name=field.name, getter=getter) - ) - readcode.append( - 'sizes["{name}"] = {size}'.format(name=field.name, size=count * t.size) - ) - - data_offset += data_count - buf_offset += count * t.size - - if curcount: - fmt.append('{}{}'.format(curcount, curtype)) - - return templ.format(''.join(fmt), '\n'.join(readcode)) - - def gen_dynamic_block(self, field): - if not isinstance(field.type, Array): - raise CompilerError( - "Only Array can be dynamic, got {!r}".format(field.type) - ) - - t = field.type.type - reader = None - - if not field.type.count: # Null terminated - if isinstance(t, PackedType): - reader = ( - 't = []\nwhile True:\n' - ' d = stream.read({size})\n' - ' if len(d) != {size}: raise EOFError()\n' - ' v = struct.unpack(self.cstruct.endian + "{packchar}", d)[0]\n' - ' if v == 0: break\n' - ' t.append(v)'.format(size=t.size, packchar=t.packchar) - ) - - elif isinstance(t, (CharType, WcharType)): - reader = ( - 't = []\n' - 'while True:\n' - ' c = stream.read({size})\n' - ' if len(c) != {size}: raise EOFError()\n' - ' if c == b"{null}": break\n' - ' t.append(c)\nt = b"".join(t)'.format( - size=t.size, null='\\x00' * t.size - ) - ) - - if isinstance(t, WcharType): - reader += ".decode('utf-16-le' if self.cstruct.endian == '<' else 'utf-16-be')" - elif isinstance(t, BytesInteger): - reader = ( - 't = []\n' - 'while True:\n' - ' d = stream.read({size})\n' - ' if len(d) != {size}: raise EOFError()\n' - ' v = BytesInteger.parse(d, {size}, 1, {signed}, self.cstruct.endian)\n' - ' if v == 0: break\n' - ' t.append(v)'.format(size=t.size, signed=t.signed) - ) - - return '{reader}\nr["{name}"] = t\nsizes["{name}"] = len(t)'.format( - reader=reader, name=field.name - ) - else: - expr = field.type.count.expr - expr_read = ( - 'dynsize = max(0, Expression(self.cstruct, "{expr}").evaluate(r))\n' - 'buf = stream.read(dynsize * {type_size})\n' - 'if len(buf) != dynsize * {type_size}: raise EOFError()\n' - 'r["{name}"] = {{reader}}\n' - 'sizes["{name}"] = dynsize * {type_size}'.format( - expr=expr, name=field.name, type_size=t.size - ) - ) - - if isinstance(t, PackedType): - reader = 'list(struct.unpack(self.cstruct.endian + "{{:d}}{packchar}".format(dynsize), buf))'.format( - packchar=t.packchar, type_size=t.size - ) - elif isinstance(t, (CharType, WcharType)): - reader = 'buf' - if isinstance(t, WcharType): - reader += ".decode('utf-16-le' if self.cstruct.endian == '<' else 'utf-16-be')" - elif isinstance(t, BytesInteger): - reader = 'BytesInteger.parse(buf, {size}, dynsize, {signed}, self.cstruct.endian)'.format( - size=t.size, signed=t.signed - ) - - return expr_read.format(reader=reader, size=None) - - -def hexdump(s, palette=None, offset=0, prefix=""): - """Hexdump some data. - - Args: - s: Bytes to hexdump. - palette: Colorize the hexdump using this color pattern. - offset: Byte offset of the hexdump. - prefix: Optional prefix. - """ - if palette: - palette = palette[::-1] - - remaining = 0 - active = None - - for i in xrange(0, len(s), 16): - vals = "" - chars = [] - for j in xrange(16): - if not active and palette: - remaining, active = palette.pop() - vals += active - elif active and j == 0: - vals += active - - if i + j >= len(s): - vals += " " - else: - c = s[i + j] - c = chr(c) if PY3 else c - p = c if c in PRINTABLE else "." - - if active: - vals += "{:02x}".format(ord(c)) - chars.append(active + p + COLOR_NORMAL) - else: - vals += "{:02x}".format(ord(c)) - chars.append(p) - - remaining -= 1 - if remaining == 0: - active = None - - if palette is not None: - vals += COLOR_NORMAL - - if j == 15: - if palette is not None: - vals += COLOR_NORMAL - - vals += " " - - if j == 7: - vals += " " - - chars = "".join(chars) - print("{}{:08x} {:48s} {}".format(prefix, offset + i, vals, chars)) - - -def dumpstruct(t, data=None, offset=0): - """Dump a structure or parsed structure instance. - - Prints a colorized hexdump and parsed structure output. - - Args: - t: Structure or Instance to dump. - data: Bytes to parse the Structure on, if t is not a parsed Instance. - offset: Byte offset of the hexdump. - """ - colors = [ - (COLOR_RED, COLOR_BG_RED), - (COLOR_GREEN, COLOR_BG_GREEN), - (COLOR_YELLOW, COLOR_BG_YELLOW), - (COLOR_BLUE, COLOR_BG_BLUE), - (COLOR_PURPLE, COLOR_BG_PURPLE), - (COLOR_CYAN, COLOR_BG_CYAN), - (COLOR_WHITE, COLOR_BG_WHITE), - ] - - if isinstance(t, Instance): - g = t - t = t._type - data = g.dumps() - elif isinstance(t, Structure) and data: - g = t(data) - else: - raise ValueError("Invalid arguments") - - palette = [] - ci = 0 - out = "struct {}".format(t.name) + ":\n" - for field in g._type.fields: - fg, bg = colors[ci % len(colors)] - palette.append((g._size(field.name), bg)) - ci += 1 - - v = getattr(g, field.name) - if isinstance(v, str): - v = repr(v) - elif isinstance(v, int): - v = hex(v) - elif isinstance(v, list): - v = pprint.pformat(v) - if '\n' in v: - v = v.replace('\n', '\n{}'.format(' ' * (len(field.name) + 4))) - - out += "- {}{}{}: {}\n".format(fg, field.name, COLOR_NORMAL, v) - - print() - hexdump(data, palette, offset=offset) - print() - print(out) diff --git a/dissect/cstruct/exceptions.py b/dissect/cstruct/exceptions.py new file mode 100644 index 0000000..d651205 --- /dev/null +++ b/dissect/cstruct/exceptions.py @@ -0,0 +1,16 @@ +class Error(Exception): + """Base class for exceptions for this module. + It is used to recognize errors specific to this module""" + pass + + +class ParserError(Error): + pass + + +class ResolveError(Error): + pass + + +class NullPointerDereference(Error): + pass diff --git a/dissect/cstruct/expression.py b/dissect/cstruct/expression.py new file mode 100644 index 0000000..ed5293a --- /dev/null +++ b/dissect/cstruct/expression.py @@ -0,0 +1,77 @@ +class Expression(object): + """Expression parser for simple calculations in definitions.""" + + operators = [ + ('*', lambda a, b: a * b), + ('/', lambda a, b: a // b), + ('%', lambda a, b: a % b), + ('+', lambda a, b: a + b), + ('-', lambda a, b: a - b), + ('>>', lambda a, b: a >> b), + ('<<', lambda a, b: a << b), + ('&', lambda a, b: a & b), + ('^', lambda a, b: a ^ b), + ('|', lambda a, b: a | b), + ] + + def __init__(self, cstruct, expression): + self.cstruct = cstruct + self.expression = expression + + def __repr__(self): + return self.expression + + def evaluate(self, context={}): + level = 0 + levels = [] + buf = '' + + for i in range(len(self.expression)): + if self.expression[i] == '(': + level += 1 + levels.append(buf) + buf = '' + continue + + if self.expression[i] == ')': + level -= 1 + value = self.evaluate_part(buf, context) + buf = levels.pop() + buf += str(value) + continue + + buf += self.expression[i] + + return self.evaluate_part(buf, context) + + def evaluate_part(self, buf, context): + buf = buf.strip() + + # Very simple way to support an expression(part) that is a single, + # negative value. To use negative values in more complex expressions, + # they must be wrapped in brackets, e.g.: 2 * (-5). + # + # To have full support for the negation operator a proper expression + # parser must be build. + if buf.startswith('-') and buf[1:].isnumeric(): + return int(buf) + + for operator in self.operators: + if operator[0] in buf: + a, b = buf.rsplit(operator[0], 1) + + return operator[1]( + self.evaluate_part(a, context), + self.evaluate_part(b, context) + ) + + if buf in context: + return context[buf] + + if buf.startswith('0x'): + return int(buf, 16) + + if buf in self.cstruct.consts: + return self.cstruct.consts[buf] + + return int(buf) diff --git a/dissect/cstruct/parser.py b/dissect/cstruct/parser.py new file mode 100644 index 0000000..f548cc0 --- /dev/null +++ b/dissect/cstruct/parser.py @@ -0,0 +1,582 @@ +import re +import ast +from dissect.cstruct.compiler import Compiler +from dissect.cstruct.exceptions import ParserError +from dissect.cstruct.expression import Expression +from dissect.cstruct.types.base import Array +from dissect.cstruct.types.structure import Structure, Field, Union +from dissect.cstruct.types.flag import Flag +from dissect.cstruct.types.enum import Enum +from dissect.cstruct.types.pointer import Pointer + + +class Parser(object): + """Base class for definition parsers. + + Args: + cs: An instance of cstruct. + """ + + def __init__(self, cs): + self.cstruct = cs + + def parse(self, data): + """This function should parse definitions to cstruct types. + + Args: + data: Data to parse definitions from, usually a string. + """ + raise NotImplementedError() + + +class TokenParser(Parser): + """ + Args: + cs: An instance of cstruct. + compiled: Whether structs should be compiled or not. + """ + + def __init__(self, cs, compiled=True): + super().__init__(cs) + + self.compiler = Compiler(self.cstruct) if compiled else None + self.TOK = self._tokencollection() + + @staticmethod + def _tokencollection(): + TOK = TokenCollection() + TOK.add(r'#\[(?P[^\]]+)\](?=\s*)', 'CONFIG_FLAG') + TOK.add(r'#define\s+(?P[^\s]+)\s+(?P[^\r\n]+)\s*', 'DEFINE') + TOK.add(r'typedef(?=\s)', 'TYPEDEF') + TOK.add(r'(?:struct|union)(?=\s|{)', 'STRUCT') + TOK.add(r'(?Penum|flag)\s+(?P[^\s:{]+)\s*(:\s' + r'*(?P[^\s]+)\s*)?\{(?P[^}]+)\}\s*(?=;)', 'ENUM') + TOK.add(r'(?<=})\s*(?P(?:[a-zA-Z0-9_]+\s*,\s*)+[a-zA-Z0-9_]+)\s*(?=;)', 'DEFS') + TOK.add(r'(?P\*?[a-zA-Z0-9_]+)(?:\s*:\s*(?P\d+))?(?:\[(?P[^;\n]*)\])?\s*(?=;)', 'NAME') + TOK.add(r'[a-zA-Z_][a-zA-Z0-9_]*', 'IDENTIFIER') + TOK.add(r'[{}]', 'BLOCK') + TOK.add(r'\$(?P[^\s]+) = (?P{[^}]+})\w*[\r\n]+', 'LOOKUP') + TOK.add(r';', 'EOL') + TOK.add(r'\s+', None) + TOK.add(r'.', None) + + return TOK + + def _constant(self, tokens): + const = tokens.consume() + pattern = self.TOK.patterns[self.TOK.DEFINE] + match = pattern.match(const.value).groupdict() + + value = match['value'] + try: + value = ast.literal_eval(value) + except (ValueError, SyntaxError): + pass + + try: + value = Expression(self.cstruct, value).evaluate() + except Exception: + pass + + self.cstruct.consts[match['name']] = value + + def _enum(self, tokens): + # We cheat with enums because the entire enum is in the token + etok = tokens.consume() + + pattern = self.TOK.patterns[self.TOK.ENUM] + # Dirty trick because the regex expects a ; but we don't want it to be part of the value + # TODO: do we? + d = pattern.match(etok.value + ';').groupdict() + enumtype = d['enumtype'] + + nextval = 0 + if enumtype == 'flag': + nextval = 1 + + values = {} + for line in d['values'].splitlines(): + for v in line.split(','): + key, sep, val = v.partition('=') + key = key.strip() + val = val.strip() + if not key: + continue + if not val: + val = nextval + else: + val = Expression(self.cstruct, val).evaluate() + + if enumtype == 'flag': + high_bit = val.bit_length() - 1 + nextval = 2 ** (high_bit + 1) + else: + nextval = val + 1 + + values[key] = val + + if not d['type']: + d['type'] = 'uint32' + + enumcls = Enum + if enumtype == 'flag': + enumcls = Flag + + enum = enumcls( + self.cstruct, d['name'], self.cstruct.resolve(d['type']), values + ) + self.cstruct.addtype(enum.name, enum) + + tokens.eol() + + def _typedef(self, tokens): + tokens.consume() + type_ = None + + if tokens.next == self.TOK.IDENTIFIER: + ident = tokens.consume() + type_ = self.cstruct.resolve(ident.value) + elif tokens.next == self.TOK.STRUCT: + # The register thing is a bit dirty + # Basically consumes all NAME tokens and + # registers the struct + type_ = self._struct(tokens, register=True) + + names = self._names(tokens) + for name in names: + self.cstruct.addtype(name, type_) + + def _struct(self, tokens, register=False): + stype = tokens.consume() + + names = [] + if tokens.next == self.TOK.IDENTIFIER: + ident = tokens.consume() + names.append(ident.value) + + if tokens.next == self.TOK.NAME: + if not len(names): + raise ParserError("line {:d}: unexpected anonymous struct".format( + self._lineno(tokens.next) + )) + return self.cstruct.resolve(names[0]) + + if tokens.next != self.TOK.BLOCK: + raise ParserError(f"line {self._lineno(tokens.next):d}: expected start of block '{tokens.next}'") + + fields = [] + tokens.consume() + while len(tokens): + if tokens.next == self.TOK.BLOCK and tokens.next.value == '}': + tokens.consume() + break + + field = self._parse_field(tokens) + fields.append(field) + + # Parsing names consumes the EOL token + names.extend(self._names(tokens)) + name = names[0] if names else None + + if stype.value.startswith('union'): + class_ = Union + else: + class_ = Structure + is_anonymous = False + if not name: + is_anonymous = True + name = self.cstruct._next_anonymous() + + st = class_(self.cstruct, name, fields, anonymous=is_anonymous) + if self.compiler and 'nocompile' not in tokens.flags: + st = self.compiler.compile(st) + + # This is pretty dirty + if register: + if not names: + raise ParserError("line {:d}: struct has no name".format( + self._lineno(stype) + )) + + for name in names: + self.cstruct.addtype(name, st) + tokens.reset_flags() + return st + + def _lookup(self, tokens): + # Just like enums, we cheat and have the entire lookup in the token + ltok = tokens.consume() + + pattern = self.TOK.patterns[self.TOK.LOOKUP] + # Dirty trick because the regex expects a ; but we don't want it to be part of the value + # TODO: do we? + m = pattern.match(ltok.value + ';') + d = ast.literal_eval(m.group(2)) + self.cstruct.lookups[m.group(1)] = dict( + [(self.cstruct.consts[k], v) for k, v in d.items()] + ) + + def _parse_field(self, tokens): + type_ = None + if tokens.next == self.TOK.IDENTIFIER: + ident = tokens.consume() + type_ = self.cstruct.resolve(ident.value) + elif tokens.next == self.TOK.STRUCT: + type_ = self._struct(tokens) + if tokens.next != self.TOK.NAME: + return Field(type_.name, type_) + + if tokens.next != self.TOK.NAME: + raise ParserError("line {:d}: expected name".format(self._lineno(tokens.next))) + nametok = tokens.consume() + + pattern = self.TOK.patterns[self.TOK.NAME] + # Dirty trick because the regex expects a ; but we don't want it to be part of the value + d = pattern.match(nametok.value + ';').groupdict() + + name = d['name'] + count = d['count'] + if count is not None: + if count == '': + count = None + else: + count = Expression(self.cstruct, count) + try: + count = count.evaluate() + except Exception: + pass + + type_ = Array(self.cstruct, type_, count) + + if name.startswith('*'): + name = name[1:] + type_ = Pointer(self.cstruct, type_) + + tokens.eol() + return Field(name, type_, int(d['bits']) if d['bits'] else None) + + def _names(self, tokens): + names = [] + while True: + if tokens.next == self.TOK.EOL: + tokens.eol() + break + + if tokens.next not in (self.TOK.NAME, self.TOK.DEFS): + break + + ntoken = tokens.consume() + if ntoken == self.TOK.NAME: + names.append(ntoken.value) + elif ntoken == self.TOK.DEFS: + for name in ntoken.value.strip().split(','): + names.append(name.strip()) + + return names + + @staticmethod + def _remove_comments(string): + # https://stackoverflow.com/a/18381470 + pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|//[^\r\n]*$)" + # first group captures quoted strings (double or single) + # second group captures comments (//single-line or /* multi-line */) + regex = re.compile(pattern, re.MULTILINE | re.DOTALL) + + def _replacer(match): + # if the 2nd group (capturing comments) is not None, + # it means we have captured a non-quoted (real) comment string. + if match.group(2) is not None: + return "" # so we will return empty to remove the comment + else: # otherwise, we will return the 1st group + return match.group(1) # captured quoted-string + + return regex.sub(_replacer, string) + + @staticmethod + def _lineno(tok): + """Quick and dirty line number calculator""" + + match = tok.match + return match.string.count('\n', 0, match.start()) + + def _config_flag(self, tokens): + flag_token = tokens.consume() + pattern = self.TOK.patterns[self.TOK.CONFIG_FLAG] + tok_dict = pattern.match(flag_token.value).groupdict() + tokens.flags.extend(tok_dict['values'].split(',')) + + def parse(self, data): + scanner = re.Scanner(self.TOK.tokens) + data = self._remove_comments(data) + tokens, remaining = scanner.scan(data) + + if len(remaining): + raise ParserError("line {:d}: invalid syntax in definition".format( + data.count('\n', 0, len(data) - len(remaining)) + )) + + tokens = TokenConsumer(tokens) + while True: + token = tokens.next + if token is None: + break + + if token == self.TOK.CONFIG_FLAG: + self._config_flag(tokens) + elif token == self.TOK.DEFINE: + self._constant(tokens) + elif token == self.TOK.TYPEDEF: + self._typedef(tokens) + elif token == self.TOK.STRUCT: + self._struct(tokens, register=True) + elif token == self.TOK.ENUM: + self._enum(tokens) + elif token == self.TOK.LOOKUP: + self._lookup(tokens) + else: + raise ParserError(f"line {self._lineno(token):d}: unexpected token {token!r}") + + +class CStyleParser(Parser): + """Definition parser for C-like structure syntax. + + Args: + cs: An instance of cstruct + compiled: Whether structs should be compiled or not. + """ + + def __init__(self, cs, compiled=True): + self.compiled = compiled + super().__init__(cs) + + def _constants(self, data): + r = re.finditer(r'#define\s+(?P[^\s]+)\s+(?P[^\r\n]+)\s*\n', data) + for t in r: + d = t.groupdict() + v = d['value'].rsplit('//')[0] + + try: + v = ast.literal_eval(v) + except (ValueError, SyntaxError): + pass + + self.cstruct.consts[d['name']] = v + + def _enums(self, data): + r = re.finditer( + r'(?Penum|flag)\s+(?P[^\s:{]+)\s*(:\s*(?P[^\s]+)\s*)?\{(?P[^}]+)\}\s*;', + data, + ) + for t in r: + d = t.groupdict() + enumtype = d['enumtype'] + + nextval = 0 + if enumtype == 'flag': + nextval = 1 + + values = {} + for line in d['values'].split('\n'): + line, sep, comment = line.partition("//") + for v in line.split(","): + key, sep, val = v.partition("=") + key = key.strip() + val = val.strip() + if not key: + continue + if not val: + val = nextval + else: + val = Expression(self.cstruct, val).evaluate() + + if enumtype == 'flag': + high_bit = val.bit_length() - 1 + nextval = 2 ** (high_bit + 1) + else: + nextval = val + 1 + + values[key] = val + + if not d['type']: + d['type'] = 'uint32' + + enumcls = Enum + if enumtype == 'flag': + enumcls = Flag + + enum = enumcls( + self.cstruct, d['name'], self.cstruct.resolve(d['type']), values + ) + self.cstruct.addtype(enum.name, enum) + + def _structs(self, data): + compiler = Compiler(self.cstruct) + r = re.finditer( + r'(#(?P(?:compile))\s+)?' + r'((?Ptypedef)\s+)?' + r'(?P[^\s]+)\s+' + r'(?P[^\s]+)?' + r'(?P' + r'\s*{[^}]+\}(?P\s+[^;\n]+)?' + r')?\s*;', + data, + ) + for t in r: + d = t.groupdict() + + if d['name']: + name = d['name'] + elif d['defs']: + name = d['defs'].strip().split(',')[0].strip() + else: + raise ParserError("No name for struct") + + if d['type'] == 'struct': + data = self._parse_fields(d['fields'][1:-1].strip()) + st = Structure(self.cstruct, name, data) + if d['flags'] == 'compile' or self.compiled: + st = compiler.compile(st) + elif d['typedef'] == 'typedef': + st = d['type'] + else: + continue + + if d['name']: + self.cstruct.addtype(d['name'], st) + + if d['defs']: + for td in d['defs'].strip().split(','): + td = td.strip() + self.cstruct.addtype(td, st) + + def _parse_fields(self, s): + fields = re.finditer( + r'(?P[^\s]+)\s+(?P[^\s\[:]+)(:(?P\d+))?(\[(?P[^;\n]*)\])?;', + s, + ) + + result = [] + for f in fields: + d = f.groupdict() + if d['type'].startswith('//'): + continue + + type_ = self.cstruct.resolve(d['type']) + + d['name'] = d['name'].replace('(', '').replace(')', '') + + # Maybe reimplement lazy type references later + # _type = TypeReference(self, d['type']) + if d['count'] is not None: + if d['count'] == '': + count = None + else: + count = Expression(self.cstruct, d['count']) + try: + count = count.evaluate() + except Exception: + pass + + type_ = Array(self.cstruct, type_, count) + + if d['name'].startswith('*'): + d['name'] = d['name'][1:] + type_ = Pointer(self.cstruct, type_) + + field = Field(d['name'], type_, int(d['bits']) if d['bits'] else None) + result.append(field) + + return result + + def _lookups(self, data, consts): + r = re.finditer(r'\$(?P[^\s]+) = ({[^}]+})\w*\n', data) + + for t in r: + d = ast.literal_eval(t.group(2)) + self.cstruct.lookups[t.group(1)] = dict( + [(self.cstruct.consts[k], v) for k, v in d.items()] + ) + + # TODO: Implement proper parsing + def parse(self, data): + self._constants(data) + self._enums(data) + self._structs(data) + self._lookups(data, self.cstruct.consts) + + +class Token(object): + __slots__ = ('token', 'value', 'match') + + def __init__(self, token, value, match): + self.token = token + self.value = value + self.match = match + + def __eq__(self, other): + if isinstance(other, Token): + other = other.token + + return self.token == other + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return "".format(self.token, self.value) + + +class TokenCollection(object): + def __init__(self): + self.tokens = [] + self.lookup = {} + self.patterns = {} + + def __getattr__(self, attr): + try: + return self.lookup[attr] + except AttributeError: + pass + + return object.__getattribute__(self, attr) + + def add(self, regex, name): + if name is None: + self.tokens.append((regex, None)) + else: + self.lookup[name] = name + self.patterns[name] = re.compile(regex) + self.tokens.append((regex, lambda s, t: Token(name, t, s.match))) + + +class TokenConsumer(object): + def __init__(self, tokens): + self.tokens = tokens + self.flags = [] + + def __contains__(self, token): + return token in self.tokens + + def __len__(self): + return len(self.tokens) + + def __repr__(self): + return ''.format(self.next) + + @property + def next(self): + try: + return self.tokens[0] + except IndexError: + return None + + def consume(self): + return self.tokens.pop(0) + + def reset_flags(self): + self.flags = [] + + def eol(self): + token = self.consume() + if token.token != 'EOL': + raise ParserError("line {:d}: expected EOL".format(self._lineno(token))) diff --git a/dissect/cstruct/types/__init__.py b/dissect/cstruct/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dissect/cstruct/types/base.py b/dissect/cstruct/types/base.py new file mode 100644 index 0000000..e03edcb --- /dev/null +++ b/dissect/cstruct/types/base.py @@ -0,0 +1,192 @@ +from io import BytesIO +from dissect.cstruct.expression import Expression + + +class BaseType(object): + """Base class for cstruct type classes.""" + + def __init__(self, cstruct): + self.cstruct = cstruct + + def __getitem__(self, count): + return Array(self.cstruct, self, count) + + def __call__(self, *args, **kwargs): + if len(args) > 0: + return self.read(*args, **kwargs) + + result = self.default() + if kwargs: + for k, v in kwargs.items(): + setattr(result, k, v) + + return result + + def reads(self, data): + """Parse the given data according to the type that implements this class. + + Args: + data: Byte string to parse. + + Returns: + The parsed value of this type. + """ + + return self._read(BytesIO(data)) + + def dumps(self, data): + """Dump the given data according to the type that implements this class. + + Args: + data: Data to dump. + + Returns: + The resulting bytes. + """ + out = BytesIO() + self._write(out, data) + return out.getvalue() + + def read(self, obj, *args, **kwargs): + """Parse the given data according to the type that implements this class. + + Args: + obj: Data to parse. Can be a (byte) string or a file-like object. + + Returns: + The parsed value of this type. + """ + if isinstance(obj, (str, bytes, memoryview)): + return self.reads(obj) + + return self._read(obj) + + def write(self, stream, data): + """Write the given data to a writable file-like object according to the + type that implements this class. + + Args: + stream: Writable file-like object to write to. + data: Data to write. + + Returns: + The amount of bytes written. + """ + return self._write(stream, data) + + def _read(self, stream): + raise NotImplementedError() + + def _read_array(self, stream, count): + return [self._read(stream) for _ in range(count)] + + def _read_0(self, stream): + raise NotImplementedError() + + def _write(self, stream, data): + raise NotImplementedError() + + def _write_array(self, stream, data): + num = 0 + for i in data: + num += self._write(stream, i) + + return num + + def _write_0(self, stream, data): + raise NotImplementedError() + + def default(self): + """Return a default value of this type.""" + raise NotImplementedError() + + def default_array(self, count): + """Return a default array of this type.""" + return [self.default() for _ in range(count)] + + +class Array(BaseType): + """Implements a fixed or dynamically sized array type. + + Example: + When using the default C-style parser, the following syntax is supported: + + x[3] -> 3 -> static length. + x[] -> None -> null-terminated. + x[expr] -> expr -> dynamic length. + """ + + def __init__(self, cstruct, type_, count): + self.type = type_ + self.count = count + self.null_terminated = self.count is None + self.dynamic = isinstance(self.count, Expression) + super().__init__(cstruct) + + def __repr__(self): + if self.null_terminated: + return '{0!r}[]'.format(self.type) + + return '{0!r}[{1}]'.format(self.type, self.count) + + def __len__(self): + if self.dynamic or self.null_terminated: + raise TypeError("Dynamic size") + + return len(self.type) * self.count + + def _read(self, stream, context=None): + if self.null_terminated: + return self.type._read_0(stream) + + if self.dynamic: + count = self.count.evaluate(context) + else: + count = self.count + + return self.type._read_array(stream, max(0, count)) + + def _write(self, f, data): + if self.null_terminated: + return self.type._write_0(f, data) + + return self.type._write_array(f, data) + + def default(self): + if self.dynamic or self.null_terminated: + return [] + + return self.type.default_array(self.count) + + +class RawType(BaseType): + """Base class for raw types that have a name and size.""" + + def __init__(self, cstruct, name=None, size=0): + self.name = name + self.size = size + super().__init__(cstruct) + + def __len__(self): + return self.size + + def __repr__(self): + if self.name: + return self.name + + return BaseType.__repr__(self) + + def _read(self, stream): + raise NotImplementedError() + + def _read_0(self, stream): + raise NotImplementedError() + + def _write(self, stream, data): + raise NotImplementedError() + + def _write_0(self, stream, data): + raise NotImplementedError() + + def default(self): + raise NotImplementedError() diff --git a/dissect/cstruct/types/bytesinteger.py b/dissect/cstruct/types/bytesinteger.py new file mode 100644 index 0000000..7ba5c85 --- /dev/null +++ b/dissect/cstruct/types/bytesinteger.py @@ -0,0 +1,90 @@ +from dissect.cstruct.types.base import RawType + + +class BytesInteger(RawType): + """Implements an integer type that can span an arbitrary amount of bytes.""" + + def __init__(self, cstruct, name, size, signed): + self.signed = signed + super().__init__(cstruct, name, size) + + @staticmethod + def parse(buf, size, count, signed, endian): + nums = [] + + for c in range(count): + num = 0 + data = buf[c * size:(c + 1) * size] + if endian == '<': # little-endian (LE) + data = b''.join(data[i:i + 1] for i in reversed(range(len(data)))) + + ints = list(data) + for i in ints: + num = (num << 8) | i + + if signed and (num & (1 << (size * 8 - 1))): + bias = 1 << (size * 8 - 1) + num -= bias * 2 + + nums.append(num) + + return nums + + @staticmethod + def pack(data, size, endian): + buf = [] + for i in data: + num = int(i) + if num < 0: + num += 1 << (size * 8) + + d = [b'\x00'] * size + i = size - 1 + + while i >= 0: + b = num & 255 + d[i] = bytes((b,)) + num >>= 8 + i -= 1 + + if endian == '<': + d = b''.join(d[i:i + 1][0] for i in reversed(range(len(d)))) + else: + d = b''.join(d) + + buf.append(d) + + return b''.join(buf) + + def _read(self, stream): + return self.parse(stream.read(self.size * 1), self.size, 1, self.signed, self.cstruct.endian)[0] + + def _read_array(self, stream, count): + return self.parse(stream.read(self.size * count), self.size, count, self.signed, self.cstruct.endian) + + def _read_0(self, stream): + result = [] + + while True: + v = self._read(stream) + if v == 0: + break + + result.append(v) + + return result + + def _write(self, stream, data): + return stream.write(self.pack([data], self.size, self.cstruct.endian)) + + def _write_array(self, stream, data): + return stream.write(self.pack(data, self.size, self.cstruct.endian)) + + def _write_0(self, stream, data): + return self._write_array(stream, data + [0]) + + def default(self): + return 0 + + def default_array(self, count): + return [0] * count diff --git a/dissect/cstruct/types/chartype.py b/dissect/cstruct/types/chartype.py new file mode 100644 index 0000000..8942bb4 --- /dev/null +++ b/dissect/cstruct/types/chartype.py @@ -0,0 +1,52 @@ +from dissect.cstruct.types.base import RawType + + +class CharType(RawType): + """Implements a character type that can properly handle strings.""" + + def __init__(self, cstruct): + super().__init__(cstruct, 'char', 1) + + def _read(self, stream): + return stream.read(1) + + def _read_array(self, stream, count): + if count == 0: + return b'' + + return stream.read(count) + + def _read_0(self, stream): + byte_array = [] + while True: + bytes_stream = stream.read(1) + if bytes_stream == b'': + raise EOFError() + + if bytes_stream == b'\x00': + break + + byte_array.append(bytes_stream) + + return b''.join(byte_array) + + def _write(self, stream, data): + if isinstance(data, int): + data = chr(data) + + if isinstance(data, str): + data = data.encode('latin-1') + + return stream.write(data) + + def _write_array(self, stream, data): + return self._write(stream, data) + + def _write_0(self, stream, data): + return self._write(stream, data + b'\x00') + + def default(self): + return b'\x00' + + def default_array(self, count): + return b'\x00' * count diff --git a/dissect/cstruct/types/enum.py b/dissect/cstruct/types/enum.py new file mode 100644 index 0000000..07f74fa --- /dev/null +++ b/dissect/cstruct/types/enum.py @@ -0,0 +1,113 @@ +from dissect.cstruct.types.base import RawType + + +class Enum(RawType): + """Implements an Enum type. + + Enums can be made using any type. The API for accessing enums and their + values is very similar to Python 3 native enums. + + Example: + When using the default C-style parser, the following syntax is supported: + + enum [: ] { + + }; + + For example, an enum that has A=1, B=5 and C=6 could be written like so: + + enum Test : uint16 { + A, B=5, C + }; + """ + + def __init__(self, cstruct, name, type_, values): + self.type = type_ + self.values = values + self.reverse = {} + + for k, v in values.items(): + self.reverse[v] = k + + super().__init__(cstruct, name, len(self.type)) + + def __call__(self, value): + if isinstance(value, int): + return EnumInstance(self, value) + return super(Enum, self).__call__(value) + + def __getitem__(self, attr): + return self(self.values[attr]) + + def __getattr__(self, attr): + try: + return self(self.values[attr]) + except KeyError: + raise AttributeError(attr) + + def __contains__(self, attr): + return attr in self.values + + def _read(self, stream): + v = self.type._read(stream, ) + return self(v) + + def _read_array(self, stream, count): + return list(map(self, self.type._read_array(stream, count))) + + def _read_0(self, stream): + return list(map(self, self.type._read_0(stream))) + + def _write(self, stream, data): + data = data.value if isinstance(data, EnumInstance) else data + return self.type._write(stream, data) + + def _write_array(self, stream, data): + data = [d.value if isinstance(d, EnumInstance) else d for d in data] + return self.type._write_array(stream, data) + + def _write_0(self, stream, data): + data = [d.value if isinstance(d, EnumInstance) else d for d in data] + return self.type._write_0(stream, data) + + def default(self): + return self(0) + + def default_array(self, count): + return [self.default() for _ in range(count)] + + +class EnumInstance(object): + """Implements a value instance of an Enum""" + + def __init__(self, enum, value): + self.enum = enum + self.value = value + + def __eq__(self, value): + if isinstance(value, EnumInstance) and value.enum is not self.enum: + return False + + if hasattr(value, 'value'): + value = value.value + + return self.value == value + + def __ne__(self, value): + return self.__eq__(value) is False + + def __hash__(self): + return hash((self.enum, self.value)) + + def __str__(self): + return '{}.{}'.format(self.enum.name, self.name) + + def __repr__(self): + return '<{}.{}: {}>'.format(self.enum.name, self.name, self.value) + + @property + def name(self): + if self.value not in self.enum.reverse: + return '{}_{}'.format(self.enum.name, self.value) + + return self.enum.reverse[self.value] diff --git a/dissect/cstruct/types/flag.py b/dissect/cstruct/types/flag.py new file mode 100644 index 0000000..35a68ad --- /dev/null +++ b/dissect/cstruct/types/flag.py @@ -0,0 +1,105 @@ +from dissect.cstruct.types.enum import Enum, EnumInstance + + +class Flag(Enum): + """Implements a Flag type. + + Flags can be made using any type. The API for accessing flags and their + values is very similar to Python 3 native flags. + + Example: + When using the default C-style parser, the following syntax is supported: + + flag [: ] { + + }; + + For example, a flag that has A=1, B=4 and C=8 could be written like so: + + flag Test : uint16 { + A, B=4, C + }; + """ + + def __call__(self, value): + if isinstance(value, int): + return FlagInstance(self, value) + + return super(Enum, self).__call__(value) + + +class FlagInstance(EnumInstance): + """Implements a value instance of a Flag""" + + def __bool__(self): + return bool(self.value) + + __nonzero__ = __bool__ + + def __or__(self, other): + if hasattr(other, 'value'): + other = other.value + + return self.__class__(self.enum, self.value | other) + + def __and__(self, other): + if hasattr(other, 'value'): + other = other.value + + return self.__class__(self.enum, self.value & other) + + def __xor__(self, other): + if hasattr(other, 'value'): + other = other.value + + return self.__class__(self.enum, self.value ^ other) + + __ror__ = __or__ + __rand__ = __and__ + __rxor__ = __xor__ + + def __invert__(self): + return self.__class__(self.enum, ~self.value) + + def __str__(self): + if self.name is not None: + return '{}.{}'.format(self.enum.name, self.name) + + members, _ = self.decompose() + return '{}.{}'.format( + self.enum.name, + '|'.join([str(name or value) for name, value in members]), + ) + + def __repr__(self): + if self.name is not None: + return '<{}.{}: {}>'.format(self.enum.name, self.name, self.value) + + members, _ = self.decompose() + return '<{}.{}: {}>'.format( + self.enum.name, + '|'.join([str(name or value) for name, value in members]), + self.value + ) + + @property + def name(self): + return self.enum.reverse.get(self.value, None) + + def decompose(self): + members = [] + not_covered = self.value + + for name, value in self.enum.values.items(): + if value and ((value & self.value) == value): + members.append((name, value)) + not_covered &= ~value + + if not members: + members.append((None, self.value)) + + members.sort(key=lambda m: m[0], reverse=True) + if len(members) > 1 and members[0][1] == self.value: + members.pop(0) + + return members, not_covered diff --git a/dissect/cstruct/types/instance.py b/dissect/cstruct/types/instance.py new file mode 100644 index 0000000..80de294 --- /dev/null +++ b/dissect/cstruct/types/instance.py @@ -0,0 +1,68 @@ +from io import BytesIO + + +class Instance(object): + """Holds parsed structure data.""" + __slots__ = ('_type', '_values', '_sizes') + + def __init__(self, type_, values, sizes=None): + # Done in this manner to check if the attr is in the lookup + object.__setattr__(self, '_type', type_) + object.__setattr__(self, '_values', values) + object.__setattr__(self, '_sizes', sizes) + + def __getattr__(self, attr): + try: + return self._values[attr] + except KeyError: + raise AttributeError("Invalid attribute: %r" % attr) + + def __setattr__(self, attr, value): + if attr not in self._type.lookup: + raise AttributeError("Invalid attribute: %r" % attr) + + self._values[attr] = value + + def __getitem__(self, item): + return self._values[item] + + def __contains__(self, attr): + return attr in self._values + + def __repr__(self): + return '<%s %s>' % ( + self._type.name, + ', '.join( + [ + '%s=%s' % (k, hex(v) if isinstance(v, (int, int)) else repr(v)) + for k, v in self._values.items() + ] + ), + ) + + def __len__(self): + return len(self.dumps()) + + def _size(self, field): + return self._sizes[field] + + def write(self, fh): + """Write this structure to a writable file-like object. + + Args: + fh: File-like objects that supports writing. + + Returns: + The amount of bytes written. + """ + return self._type.write(fh, self) + + def dumps(self): + """Dump this structure to a byte string. + + Returns: + The raw bytes of this structure. + """ + s = BytesIO() + self.write(s) + return s.getvalue() diff --git a/dissect/cstruct/types/packedtype.py b/dissect/cstruct/types/packedtype.py new file mode 100644 index 0000000..7686cd1 --- /dev/null +++ b/dissect/cstruct/types/packedtype.py @@ -0,0 +1,53 @@ +import struct + +from dissect.cstruct.types.base import RawType + + +class PackedType(RawType): + """Implements a packed type that uses Python struct packing characters.""" + + def __init__(self, cstruct, name, size, packchar): + super().__init__(cstruct, name, size) + self.packchar = packchar + + def _read(self, stream): + return self._read_array(stream, 1)[0] + + def _read_array(self, stream, count): + length = self.size * count + data = stream.read(length) + fmt = self.cstruct.endian + str(count) + self.packchar + + if len(data) != length: + raise EOFError("Read %d bytes, but expected %d" % (len(data), length)) + + return list(struct.unpack(fmt, data)) + + def _read_0(self, stream): + byte_array = [] + while True: + bytes_stream = stream.read(self.size) + unpacked_struct = struct.unpack(self.cstruct.endian + self.packchar, bytes_stream)[0] + + if unpacked_struct == 0: + break + + byte_array.append(unpacked_struct) + + return byte_array + + def _write(self, stream, data): + return self._write_array(stream, [data]) + + def _write_array(self, stream, data): + fmt = self.cstruct.endian + str(len(data)) + self.packchar + return stream.write(struct.pack(fmt, *data)) + + def _write_0(self, stream, data): + return self._write_array(stream, data + [0]) + + def default(self): + return 0 + + def default_array(self, count): + return [0] * count diff --git a/dissect/cstruct/types/pointer.py b/dissect/cstruct/types/pointer.py new file mode 100644 index 0000000..1abd17d --- /dev/null +++ b/dissect/cstruct/types/pointer.py @@ -0,0 +1,64 @@ +from dissect.cstruct.exceptions import NullPointerDereference +from dissect.cstruct.types.base import Array, RawType + + +class Pointer(RawType): + """Implements a pointer to some other type.""" + + def __init__(self, cstruct, target): + self.cstruct = cstruct + self.type = target + super().__init__(cstruct) + + def __len__(self): + return len(self.cstruct.pointer) + + def __repr__(self): + return ''.format(self.type) + + def _read(self, stream, ctx): + addr = self.cstruct.pointer(stream) + return PointerInstance(self.type, stream, addr, ctx) + + +class PointerInstance(object): + """Like the Instance class, but for structures referenced by a pointer.""" + + def __init__(self, type_name, stream, addr, ctx): + self._stream = stream + self._type = type_name + self._addr = addr + self._ctx = ctx + self._value = None + + def __getattr__(self, attr): + return getattr(self._get(), attr) + + def __str__(self): + return str(self._get()) + + def __nonzero__(self): + return self._addr != 0 + + def __repr__(self): + return "".format(self._type, self._addr) + + def _get(self): + if self._addr == 0: + raise NullPointerDereference() + + if self._value is None: + # Read current position of file read/write pointer + position = self._stream.tell() + # Reposition the file read/write pointer + self._stream.seek(self._addr) + + if isinstance(self._type, Array): + value = self._type._read(self._stream, self._ctx) + else: + value = self._type._read(self._stream, ) + + self._stream.seek(position) + self._value = value + + return self._value diff --git a/dissect/cstruct/types/structure.py b/dissect/cstruct/types/structure.py new file mode 100644 index 0000000..a5a1d7b --- /dev/null +++ b/dissect/cstruct/types/structure.py @@ -0,0 +1,254 @@ +from collections import OrderedDict +from io import BytesIO +from dissect.cstruct.bitbuffer import BitBuffer +from dissect.cstruct.types.base import Array, BaseType +from dissect.cstruct.types.instance import Instance +from dissect.cstruct.types.pointer import Pointer + + +class Field(object): + """Holds a structure field.""" + + def __init__(self, name, type_, bits=None, offset=None): + self.name = name + self.type = type_ + self.bits = bits + self.offset = offset + + def __repr__(self): + return ''.format(self.name, self.type) + + +class Structure(BaseType): + """Type class for structures.""" + + def __init__(self, cstruct, name, fields=None, anonymous=False): + super().__init__(cstruct) + self.name = name + self.size = None + self.lookup = OrderedDict() + self.fields = fields + self.anonymous = anonymous + + for field in self.fields: + self.lookup[field.name] = field + + self._calc_offsets() + + def __len__(self): + if self.size is None: + self.size = self._calc_size() + + return self.size + + def __repr__(self): + return ''.format(self.name) + + def _calc_offsets(self): + offset = 0 + bits_type = None + bits_remaining = 0 + + for field in self.fields: + if field.bits: + if bits_remaining == 0 or field.type != bits_type: + bits_type = field.type + bits_remaining = bits_type.size * 8 + + if offset is not None: + field.offset = offset + offset += bits_type.size + else: + field.offset = None + + bits_remaining -= field.bits + continue + + field.offset = offset + if offset is not None: + try: + offset += len(field.type) + except TypeError: + offset = None + + def _calc_size(self): + size = 0 + bits_type = None + bits_remaining = 0 + + for field in self.fields: + if field.bits: + if bits_remaining == 0 or field.type != bits_type: + bits_type = field.type + bits_remaining = bits_type.size * 8 + size += bits_type.size + + bits_remaining -= field.bits + continue + + field_len = len(field.type) + size += field_len + + if field.offset is not None: + size = max(size, field.offset + field_len) + + return size + + def _read(self, stream, *args, **kwargs): + bit_buffer = BitBuffer(stream, self.cstruct.endian) + struct_start = stream.tell() + + result = OrderedDict() + sizes = {} + for field in self.fields: + start = stream.tell() + field_type = self.cstruct.resolve(field.type) + + if field.offset: + if start != struct_start + field.offset: + stream.seek(struct_start + field.offset) + start = struct_start + field.offset + + if field.bits: + result[field.name] = bit_buffer.read(field_type, field.bits) + continue + else: + bit_buffer.reset() + + if isinstance(field_type, (Array, Pointer)): + v = field_type._read(stream, result) + else: + v = field_type._read(stream) + + if isinstance(field_type, Structure) and field_type.anonymous: + sizes.update(v._sizes) + result.update(v._values) + else: + sizes[field.name] = stream.tell() - start + result[field.name] = v + + return Instance(self, result, sizes) + + def _write(self, stream, data): + bit_buffer = BitBuffer(stream, self.cstruct.endian) + num = 0 + + for field in self.fields: + offset = stream.tell() + + if field.bits: + bit_buffer.write(field.type, getattr(data, field.name), field.bits) + continue + + if bit_buffer._type: + bit_buffer.flush() + + if isinstance(field.type, Structure) and field.type.anonymous: + field.type._write(stream, data) + else: + field.type._write(stream, getattr(data, field.name)) + num += stream.tell() - offset + + if bit_buffer._type: + bit_buffer.flush() + + return num + + def add_field(self, name, type_, offset=None): + """Add a field to this structure. + + Args: + name: The field name. + type_: The field type. + offset: The field offset. + """ + field = Field(name, type_, offset=offset) + self.fields.append(field) + self.lookup[name] = field + self.size = None + + def default(self): + """Create and return an empty Instance from this structure. + + Returns: + An empty Instance from this structure. + """ + result = OrderedDict() + for field in self.fields: + result[field.name] = field.type.default() + + return Instance(self, result) + + def show(self, indent=0): + """Pretty print this structure.""" + if indent == 0: + print("struct {}".format(self.name)) + + for field in self.fields: + if field.offset is None: + offset = '0x??' + else: + offset = '0x{:02x}'.format(field.offset) + + print("{}+{} {} {}".format(' ' * indent, offset, field.name, field.type)) + + if isinstance(field.type, Structure): + field.type.show(indent + 1) + + +class Union(Structure): + """Type class for unions""" + + def __repr__(self): + return ''.format(self.name) + + def _calc_offsets(self): + """Overridden because we don't use this for unions""" + pass + + def _calc_size(self): + return max(len(field.type) for field in self.fields) + + def _read(self, stream): + buf = BytesIO(memoryview(stream.read(len(self)))) + result = OrderedDict() + sizes = {} + + for field in self.fields: + start = 0 + buf.seek(0) + field_type = self.cstruct.resolve(field.type) + + if field.offset: + buf.seek(field.offset) + start = field.offset + + if isinstance(field_type, (Array, Pointer)): + v = field_type._read(buf, result) + else: + v = field_type._read(buf) + + if isinstance(field_type, Structure) and field_type.anonymous: + sizes.update(v._sizes) + result.update(v._values) + else: + sizes[field.name] = buf.tell() - start + result[field.name] = v + + return Instance(self, result, sizes) + + def _write(self, stream, data): + offset = stream.tell() + + # Find the largest field + field = max(self.fields, key=lambda e: len(e.type)) + + # Write the value to the stream using the largest file's field type + field.type._write(stream, getattr(data, field.name)) + + return stream.tell() - offset + + def show(self, indent=0): + # TODO: Implement + + raise NotImplementedError() diff --git a/dissect/cstruct/types/voidtype.py b/dissect/cstruct/types/voidtype.py new file mode 100644 index 0000000..13eab00 --- /dev/null +++ b/dissect/cstruct/types/voidtype.py @@ -0,0 +1,11 @@ +from dissect.cstruct.types.base import RawType + + +class VoidType(RawType): + """Implements a void type.""" + + def __init__(self): + super().__init__(None, 'void') + + def _read(self, stream): + return None diff --git a/dissect/cstruct/types/wchartype.py b/dissect/cstruct/types/wchartype.py new file mode 100644 index 0000000..4c0912f --- /dev/null +++ b/dissect/cstruct/types/wchartype.py @@ -0,0 +1,55 @@ +from dissect.cstruct.types.base import RawType + + +class WcharType(RawType): + """Implements a wide-character type.""" + + def __init__(self, cstruct): + super().__init__(cstruct, 'wchar', 2) + + @property + def encoding(self): + if self.cstruct.endian == '<': # little-endian (LE) + return 'utf-16-le' + elif self.cstruct.endian == '>': # big-endian (BE) + return 'utf-16-be' + + def _read(self, stream): + return stream.read(2).decode(self.encoding) + + def _read_array(self, stream, count): + if count == 0: + return u'' + + data = stream.read(2 * count) + return data.decode(self.encoding) + + def _read_0(self, stream): + byte_string = b'' + while True: + bytes_stream = stream.read(2) + + if len(bytes_stream) != 2: + raise EOFError() + + if bytes_stream == b'\x00\x00': + break + + byte_string += bytes_stream + + return byte_string.decode(self.encoding) + + def _write(self, stream, data): + return stream.write(data.encode(self.encoding)) + + def _write_array(self, stream, data): + return self._write(stream, data) + + def _write_0(self, stream, data): + return self._write(stream, data + u'\x00') + + def default(self): + return u'\x00' + + def default_array(self, count): + return u'\x00' * count diff --git a/dissect/cstruct/utils.py b/dissect/cstruct/utils.py new file mode 100644 index 0000000..b445fd3 --- /dev/null +++ b/dissect/cstruct/utils.py @@ -0,0 +1,175 @@ +import string +import pprint + +from dissect.cstruct.types.instance import Instance +from dissect.cstruct.types.structure import Structure + +COLOR_RED = '\033[1;31m' +COLOR_GREEN = '\033[1;32m' +COLOR_YELLOW = '\033[1;33m' +COLOR_BLUE = '\033[1;34m' +COLOR_PURPLE = '\033[1;35m' +COLOR_CYAN = '\033[1;36m' +COLOR_WHITE = '\033[1;37m' +COLOR_NORMAL = '\033[1;0m' + +COLOR_BG_RED = '\033[1;41m\033[1;37m' +COLOR_BG_GREEN = '\033[1;42m\033[1;37m' +COLOR_BG_YELLOW = '\033[1;43m\033[1;37m' +COLOR_BG_BLUE = '\033[1;44m\033[1;37m' +COLOR_BG_PURPLE = '\033[1;45m\033[1;37m' +COLOR_BG_CYAN = '\033[1;46m\033[1;37m' +COLOR_BG_WHITE = '\033[1;47m\033[1;30m' + +PRINTABLE = string.digits + string.ascii_letters + string.punctuation + " " + + +def _hexdump(bytes_hex, offset=0, prefix="", palette=None): + """Hexdump some data. + + Args: + bytes_hex: Bytes to hexdump. + offset: Byte offset of the hexdump. + prefix: Optional prefix. + palette: Colorize the hexdump using this color pattern. + """ + if palette: + palette = palette[::-1] + + remaining = 0 + active = None + + for i in range(0, len(bytes_hex), 16): + values = "" + chars = [] + + for j in range(16): + if not active and palette: + remaining, active = palette.pop() + values += active + elif active and j == 0: + values += active + + if i + j >= len(bytes_hex): + values += " " + else: + char = bytes_hex[i + j] + char = chr(char) + + print_char = char if char in PRINTABLE else "." + + if active: + values += "{:02x}".format(ord(char)) + chars.append(active + print_char + COLOR_NORMAL) + else: + values += "{:02x}".format(ord(char)) + chars.append(print_char) + + remaining -= 1 + if remaining == 0: + active = None + + if palette is not None: + values += COLOR_NORMAL + + if j == 15: + if palette is not None: + values += COLOR_NORMAL + + values += " " + if j == 7: + values += " " + + chars = "".join(chars) + yield "{}{:08x} {:48s} {}".format(prefix, offset + i, values, chars) + + +def hexdump(bytes_hex, palette=None, offset=0, prefix="", output='print'): + """Hexdump some data. + + Args: + bytes_hex: Bytes to hexdump. + palette: Colorize the hexdump using this color pattern. + offset: Byte offset of the hexdump. + prefix: Optional prefix. + output: Output format, can be 'print', 'generator' or 'string'. + """ + generator = _hexdump(bytes_hex, offset=offset, prefix=prefix, palette=palette) + if output == 'print': + print("\n".join(generator)) + elif output == 'generator': + return generator + elif output == 'string': + return '\n'.join(list(generator)) + else: + raise ValueError("Invalid output argument: '{:s}' (should be 'print', 'generator' or 'string').".format(output)) + + +def _dumpstruct(generic_obj, obj_dump, color, data, output, offset): + palette = [] + colors = [ + (COLOR_RED, COLOR_BG_RED), + (COLOR_GREEN, COLOR_BG_GREEN), + (COLOR_YELLOW, COLOR_BG_YELLOW), + (COLOR_BLUE, COLOR_BG_BLUE), + (COLOR_PURPLE, COLOR_BG_PURPLE), + (COLOR_CYAN, COLOR_BG_CYAN), + (COLOR_WHITE, COLOR_BG_WHITE), + ] + ci = 0 + out = ["struct {}:".format(obj_dump.name)] + foreground, background = None, None + for field in generic_obj._type.fields: + if color: + foreground, background = colors[ci % len(colors)] + palette.append((generic_obj._size(field.name), background)) + ci += 1 + + value = getattr(generic_obj, field.name) + if isinstance(value, str): + value = repr(value) + elif isinstance(value, int): + value = hex(value) + elif isinstance(value, list): + value = pprint.pformat(value) + if '\n' in value: + value = value.replace('\n', '\n{}'.format(' ' * (len(field.name) + 4))) + + if color: + out.append("- {}{}{}: {}".format(foreground, field.name, COLOR_NORMAL, value)) + else: + out.append("- {}: {}".format(field.name, value)) + + out = '\n'.join(out) + + if output == 'print': + print() + hexdump(data, palette, offset=offset) + print() + print(out) + elif output == 'string': + return '\n'.join(['', hexdump(data, palette, offset=offset, output='string'), '', out]) + + +def dumpstruct(obj_dump, data=None, offset=0, color=True, output='print'): + """Dump a structure or parsed structure instance. + + Prints a colorized hexdump and parsed structure output. + + Args: + obj_dump: Structure or Instance to dump. + data: Bytes to parse the Structure on, if t is not a parsed Instance. + offset: Byte offset of the hexdump. + output: Output format, can be 'print' or 'string'. + """ + if output not in ('print', 'string'): + raise ValueError( + "Invalid output argument: '{:s}' (should be 'print' or 'string').".format(output) + ) + + if isinstance(obj_dump, Instance): + return _dumpstruct(obj_dump, obj_dump._type, color, obj_dump.dumps(), output, offset) + elif isinstance(obj_dump, Structure) and data: + return _dumpstruct(obj_dump(data), obj_dump, color, data, output, offset) + else: + raise ValueError("Invalid arguments") diff --git a/examples/disk.py b/examples/disk.py index fe708a1..6bfe0b3 100644 --- a/examples/disk.py +++ b/examples/disk.py @@ -1,5 +1,6 @@ import sys -from dissect import cstruct + +from dissect.cstruct import cstruct, dumpstruct disk_def = """ #define MAX_MBR_CODE_SIZE 0x1b6 @@ -75,7 +76,7 @@ // 56 (0x38) 72 bytes Partition name (36 UTF-16LE code units) """ -c_disk = cstruct.cstruct() +c_disk = cstruct() c_disk.load(disk_def) SECTOR_SIZE = 512 @@ -91,23 +92,28 @@ def __init__(self, disk, offset, size, vtype, name, guid=None): self.guid = guid def __repr__(self): - return "".format(self.offset, self.size, self.type, self.name) + return "".format( + self.offset, + self.size, + self.type, + self.name + ) -def partitions(fh, mbr, offset): - for p in mbr.part: - part_offset = offset + p.sector_ofs * SECTOR_SIZE +def partitions(fh_part, mbr_part, offset): + for mbr_p in mbr_part.part: + part_offset = offset + mbr_p.sector_ofs * SECTOR_SIZE - if p.type == 0x00: + if mbr_p.type == 0x00: continue - if p.type == 0x05: - fh.seek(part_offset) - e_mbr = c_disk.mbr(fh) - for part in partitions(fh, e_mbr, part_offset): - yield part + if mbr_p.type == 0x05: + fh_part.seek(part_offset) + e_mbr = c_disk.mbr(fh_part) + for y_part in partitions(fh_part, e_mbr, part_offset): + yield y_part - yield Partition(fh, part_offset, p.sector_size * SECTOR_SIZE, p.type, None) + yield Partition(fh_part, part_offset, mbr_p.sector_size * SECTOR_SIZE, mbr_p.type, None) if __name__ == '__main__': @@ -120,13 +126,13 @@ def partitions(fh, mbr, offset): if mbr.bootsig != 0xaa55: sys.exit("Not a valid MBR") - cstruct.dumpstruct(mbr) + dumpstruct(mbr) for p in partitions(fh, mbr, 0): if p.type == 0xee: fh.seek(p.offset) gpt = c_disk.GPT_HEADER(fh) - cstruct.dumpstruct(gpt) + dumpstruct(gpt) fh.seek(gpt.lba_partition_array * SECTOR_SIZE) for _ in range(gpt.partition_table_count): diff --git a/examples/mirai.py b/examples/mirai.py index b382b51..679d0c3 100644 --- a/examples/mirai.py +++ b/examples/mirai.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -from dissect import cstruct +from dissect.cstruct import cstruct, dumpstruct + import socket import struct -protocol = cstruct.cstruct() +protocol = cstruct() protocol.load(""" enum AttackType : uint8 { @@ -49,4 +50,4 @@ for o in record.attack_options: print('OPTION: {} - {}'.format(o.type, o.value)) - cstruct.dumpstruct(protocol.MiraiAttack, data) + dumpstruct(protocol.MiraiAttack, data) diff --git a/examples/pe.py b/examples/pe.py index 8c1c7f6..d335370 100644 --- a/examples/pe.py +++ b/examples/pe.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import sys -from dissect import cstruct + +from dissect.cstruct import cstruct, dumpstruct PE_DEF = """ #define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16 @@ -125,7 +126,7 @@ } IMAGE_SECTION_HEADER; """ -pestruct = cstruct.cstruct() +pestruct = cstruct() pestruct.load(PE_DEF) @@ -150,10 +151,10 @@ else: optional_header = pestruct.IMAGE_OPTIONAL_HEADER(fh) - cstruct.dumpstruct(mz) - cstruct.dumpstruct(file_header) - cstruct.dumpstruct(optional_header) + dumpstruct(mz) + dumpstruct(file_header) + dumpstruct(optional_header) sections = [pestruct.IMAGE_SECTION_HEADER(fh) for _ in range(file_header.NumberOfSections)] for s in sections: - cstruct.dumpstruct(s) + dumpstruct(s) diff --git a/examples/secdesc.py b/examples/secdesc.py index b5974cb..95331d2 100644 --- a/examples/secdesc.py +++ b/examples/secdesc.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # flake8: noqa -from dissect import cstruct -from io import BytesIO +import zlib import binascii +from io import BytesIO +from dissect.cstruct import cstruct cdef = """ struct SECURITY_DESCRIPTOR { @@ -56,7 +57,7 @@ LDAP_SID Sid; }; """ -c_secd = cstruct.cstruct() +c_secd = cstruct() c_secd.load(cdef, compiled=True) @@ -96,7 +97,11 @@ def __init__(self, fh=None, in_obj=None): self.ldap_sid = in_obj def __repr__(self): - return "S-{}-{}-{}".format(self.ldap_sid.Revision, bytearray(self.ldap_sid.IdentifierAuthority.Value)[5], "-".join(['{:d}'.format(v) for v in self.ldap_sid.SubAuthority])) + return "S-{}-{}-{}".format( + self.ldap_sid.Revision, + bytearray(self.ldap_sid.IdentifierAuthority.Value)[5], + "-".join(['{:d}'.format(v) for v in self.ldap_sid.SubAuthority]) + ) class ACL(object): @@ -123,18 +128,18 @@ class ACCESS_DENIED_ACE(ACCESS_ALLOWED_ACE): class ACCESS_ALLOWED_OBJECT_ACE(object): # Flag constants - ACE_OBJECT_TYPE_PRESENT = 0x01 - ACE_INHERITED_OBJECT_TYPE_PRESENT = 0x02 + ACE_OBJECT_TYPE_PRESENT = 0x01 + ACE_INHERITED_OBJECT_TYPE_PRESENT = 0x02 # ACE type specific mask constants # Note that while not documented, these also seem valid # for ACCESS_ALLOWED_ACE types - ADS_RIGHT_DS_CONTROL_ACCESS = 0x00000100 - ADS_RIGHT_DS_CREATE_CHILD = 0x00000001 - ADS_RIGHT_DS_DELETE_CHILD = 0x00000002 - ADS_RIGHT_DS_READ_PROP = 0x00000010 - ADS_RIGHT_DS_WRITE_PROP = 0x00000020 - ADS_RIGHT_DS_SELF = 0x00000008 + ADS_RIGHT_DS_CONTROL_ACCESS = 0x00000100 + ADS_RIGHT_DS_CREATE_CHILD = 0x00000001 + ADS_RIGHT_DS_DELETE_CHILD = 0x00000002 + ADS_RIGHT_DS_READ_PROP = 0x00000010 + ADS_RIGHT_DS_WRITE_PROP = 0x00000020 + ADS_RIGHT_DS_SELF = 0x00000008 def __init__(self, fh): self.fh = fh @@ -146,29 +151,29 @@ class ACCESS_DENIED_OBJECT_ACE(ACCESS_ALLOWED_OBJECT_ACE): pass -""" -ACCESS_MASK as described in 2.4.3 -https://msdn.microsoft.com/en-us/library/cc230294.aspx -""" class ACCESS_MASK(object): + """ + ACCESS_MASK as described in 2.4.3 + https://msdn.microsoft.com/en-us/library/cc230294.aspx + """ # Flag constants - GENERIC_READ = 0x80000000 - GENERIC_WRITE = 0x04000000 - GENERIC_EXECUTE = 0x20000000 - GENERIC_ALL = 0x10000000 - MAXIMUM_ALLOWED = 0x02000000 - ACCESS_SYSTEM_SECURITY = 0x01000000 - SYNCHRONIZE = 0x00100000 - WRITE_OWNER = 0x00080000 - WRITE_DACL = 0x00040000 - READ_CONTROL = 0x00020000 - DELETE = 0x00010000 + GENERIC_READ = 0x80000000 + GENERIC_WRITE = 0x04000000 + GENERIC_EXECUTE = 0x20000000 + GENERIC_ALL = 0x10000000 + MAXIMUM_ALLOWED = 0x02000000 + ACCESS_SYSTEM_SECURITY = 0x01000000 + SYNCHRONIZE = 0x00100000 + WRITE_OWNER = 0x00080000 + WRITE_DACL = 0x00040000 + READ_CONTROL = 0x00020000 + DELETE = 0x00010000 def __init__(self, mask): self.mask = mask def has_priv(self, priv): - return self.mask & priv == priv + return (self.mask & priv) == priv def set_priv(self, priv): self.mask |= priv @@ -178,13 +183,13 @@ def remove_priv(self, priv): class ACE(object): - CONTAINER_INHERIT_ACE = 0x01 - FAILED_ACCESS_ACE_FLAG = 0x80 - INHERIT_ONLY_ACE = 0x08 - INHERITED_ACE = 0x10 - NO_PROPAGATE_INHERIT_ACE = 0x04 - OBJECT_INHERIT_ACE = 0x01 - SUCCESSFUL_ACCESS_ACE_FLAG = 0x04 + CONTAINER_INHERIT_ACE = 0x01 + FAILED_ACCESS_ACE_FLAG = 0x80 + INHERIT_ONLY_ACE = 0x08 + INHERITED_ACE = 0x10 + NO_PROPAGATE_INHERIT_ACE = 0x04 + OBJECT_INHERIT_ACE = 0x01 + SUCCESSFUL_ACCESS_ACE_FLAG = 0x04 def __init__(self, fh): self.fh = fh @@ -214,8 +219,43 @@ def __repr__(self): if __name__ == '__main__': - import zlib - d = BytesIO(zlib.decompress(binascii.unhexlify("789c636410e949916060680062110606861e206661a8606002d2ec51160c0a409a1988f759f37dfe30ffa2e03620e70773fac1a555d3f63fe3bd20b8a89561158381e723464606100093c8faf693a08f85e1867809906665f080eb175050885f5a7941708202c301ff4357ceb7a1eb6705eb671505d9354365a9b7e91ff127ae3ed9200f209b135089aa8f5c7382a964ce85fd5c7659421704172400f5e518bf25cd1c0b060e68a0b94f7d5c24517d5170ed7b60f8dcb87a96587d1deecb3eb3055f145c7914122f84f429c022c949cce780c205c1e5194077e6b16a12d26700d5574fa4ff58c1fa74c06104d2b7025d1f13441fc83d6a50b50250b5b21b57baad4b8872d8fee27f57c495b04bc86a0dc06a35c00a416a83a58a56ebcb5d109c210934d7216836723a04a9c319be1075ac5c68ea30c2134d1d2c1cda766c2df79a725170dd5e4c750c0c2a0c57189919f0852948cd7f467e026a24206a90fcaf02668b40c215c54e118629404d30316ea818583f544c084c20f2b42987ce8e17323ece3b3e843c929ab7e12646da45f6bb10240d80f4bd09489abffbbeae7383e263e5eb9f184ee0d527e5010e30662c71b58bc8b454c6826a4ec1544d865c1544da25d51c76a83f184f969e7c9593efdd2c79added5652a1bb1e694b040c283119a1644ce6878988bec719dcdbe369f312e5e03affd48fa882d2bd0f5cd21531f46994da43e92c21749df05b92d313e7c407d6dc4e983e57f62cba642347de24b366f0c7d33d171b3fa763de3c75d9f08c51f4c9febcdaa59c1a72e0aeebec0d0703cade000b1fa32484c2f307d9d5d9af233eeed70df7a365cebaaad5a1db1fa26569267df42952bf149487514217d0a507d6c44fa2f834c7da568fab8484c67307d7cdf4aec8aed8065f601fc7520ba3b89d587ee4e62d319ba7dc4ea43b74f8ac87847b78f587de8f6c931cd9a15ed050c97c3f8c3053dbd281c9cc674cb817078a2c79fda4bdfc9f3365c125cc506ac9bfbdede20d63e770bd3b89ccffb3c967ff7cc08610ab021565fc029ebde3a59607e68242d3fa0b719898d0762f5a1c7430863ef931f7b2e0ab63301c365674200d1faf4a3d575679cf5ddb0f6a929a34bdc6f62fd1742a6ff88d587eece4432ed23561fba7dc496d7e8e9b390c8f2135d5ff97383905787551c26e8dd7d3849c12d93d878a84c48c86fb292f6e9fb7de7c44959b57fc4eaab22d37fc4b6e9d1ed6b7ae595786cf505df851ed7aa228f6b8811abafa5d239ede8e17ecf4d99332a24df7b7813ab0fa3cd4d64bc7796dc5ff1ea28b07e3f8dbf7e474f67c4eac3b08fc8fa1dc33e22f5a1db37ebff8f0f130509a74f74fb88d5871e0fc4ea4377e7dcbc033a75f9594e1daa4ca2f7c4f30489b58fd8f60bbaff88d587eece1d8f938db2ff26f8b47f32d9bb6a6ee66b62ddb9872f59e3aa23b0fe3b485a3b84587de8ee3c406439889edfd1c72c88d577f9f8969af6f60d4e5b2c6c62dd675d9623142eb0be32ff35f7e809099b1ce69b6bf9bee83748c6a58f0fd4b797b26070606001777f49ea5721e923b5bf02ee2303e53678763036aebc24b8f23ffe76480699fa4aa1ee04850ac9fe1342e823a9df88641f49fd46247d24f51b91f491d62f47e823a9df2880180b5b96cb34db8637c6adfb5de0cceb6231bbf08e694941f43163d187633c83196c9810c9fae0e32702507db9f38e6d3fae7349b0d58f61c1c9e66f1cb8f4713240dc894b1f467c12a90f235e90f5218d0f4dae967ee51177d56d4f8eef97bfcbbb4cf18ff3e880c79fb0a66ba4f12b2d2654b5187ec0a316c3dde86a8534708f0540dd2ac280aa0ea3ce85aae386aa8395614f0f57d8cffabed76dc18eb91282b9772a51e316a88e11a2ee9efbb389f9370bbca7865fb3fff2f9cc0d94313a2115c8181d9e7285909a52a81a7c6387ec4c84d5f083d54830808c430f4790f85eb4714705201b5f7ec2270700bed3f0c5"))) + d = BytesIO(zlib.decompress(binascii.unhexlify( + '789c636410e949916060680062110606861e206661a8606002d2ec51160c0a409a1988f759f37dfe30' + 'ffa2e03620e70773fac1a555d3f63fe3bd20b8a89561158381e723464606100093c8faf693a08f85e1' + '867809906665f080eb175050885f5a7941708202c301ff4357ceb7a1eb6705eb671505d9354365a9b7' + 'e91ff127ae3ed9200f209b135089aa8f5c7382a964ce85fd5c7659421704172400f5e518bf25cd1c0b' + '060e68a0b94f7d5c24517d5170ed7b60f8dcb87a96587d1deecb3eb3055f145c7914122f84f429c022' + 'c949cce780c205c1e5194077e6b16a12d26700d5574fa4ff58c1fa74c06104d2b7025d1f13441fc83d' + '6a50b50250b5b21b57baad4b8872d8fee27f57c495b04bc86a0dc06a35c00a416a83a58a56ebcb5d10' + '9c210934d7216836723a04a9c319be1075ac5c68ea30c2134d1d2c1cda766c2df79a725170dd5e4c75' + '0c0c2a0c57189919f0852948cd7f467e026a24206a90fcaf02668b40c215c54e118629404d30316ea8' + '18583f544c084c20f2b42987ce8e17323ece3b3e843c929ab7e12646da45f6bb10240d80f4bd09489a' + 'bffbbeae7383e263e5eb9f184ee0d527e5010e30662c71b58bc8b454c6826a4ec1544d865c1544da25' + 'd51c76a83f184f969e7c9593efdd2c79added5652a1bb1e694b040c283119a1644ce6878988bec719d' + 'cdbe369f312e5e03affd48fa882d2bd0f5cd21531f46994da43e92c21749df05b92d313e7c407d6dc4' + 'e983e57f62cba642347de24b366f0c7d33d171b3fa763de3c75d9f08c51f4c9febcdaa59c1a72e0aee' + 'bec0d0703cade000b1fa32484c2f307d9d5d9af233eeed70df7a365cebaaad5a1db1fa26569267df42' + '952bf149487514217d0a507d6c44fa2f834c7da568fab8484c67307d7cdf4aec8aed8065f601fc7520' + 'ba3b89d587ee4e62d319ba7dc4ea43b74f8ac87847b78f587de8f6c931cd9a15ed050c97c3f8c3053d' + 'bd281c9cc674cb817078a2c79fda4bdfc9f3365c125cc506ac9bfbdede20d63e770bd3b89ccffb3c96' + '7ff7cc08610ab021565fc029ebde3a59607e68242d3fa0b719898d0762f5a1c7430863ef931f7b2e0a' + 'b63301c365674200d1faf4a3d575679cf5ddb0f6a929a34bdc6f62fd1742a6ff88d587eece4432ed23' + '561fba7dc496d7e8e9b390c8f2135d5ff97383905787551c26e8dd7d3849c12d93d878a84c48c86fb2' + '92f6e9fb7de7c44959b57fc4eaab22d37fc4b6e9d1ed6b7ae595786cf505df851ed7aa228f6b8811ab' + 'afa5d239ede8e17ecf4d99332a24df7b7813ab0fa3cd4d64bc7796dc5ff1ea28b07e3f8dbf7e474f67' + 'c4eac3b08fc8fa1dc33e22f5a1db37ebff8f0f130509a74f74fb88d5871e0fc4ea4377e7dcbc033a75' + 'f9594e1daa4ca2f7c4f30489b58fd8f60bbaff88d587eece1d8f938db2ff26f8b47f32d9bb6a6ee66b' + '62ddb9872f59e3aa23b0fe3b485a3b84587de8ee3c406439889edfd1c72c88d577f9f8969af6f60d4e' + '5b2c6c62dd675d9623142eb0be32ff35f7e809099b1ce69b6bf9bee83748c6a58f0fd4b797b2607060' + '6001777f49ea5721e923b5bf02ee2303e53678763036aebc24b8f23ffe76480699fa4aa1ee04850ac9' + 'fe1342e823a9df88641f49fd46247d24f51b91f491d62f47e823a9df2880180b5b96cb34db8637c6ad' + 'fb5de0cceb6231bbf08e694941f43163d187633c83196c9810c9fae0e32702507db9f38e6d3fae7349' + 'b0d58f61c1c9e66f1cb8f4713240dc894b1f467c12a90f235e90f5218d0f4dae967ee51177d56d4f8e' + 'ef97bfcbbb4cf18ff3e880c79fb0a66ba4f12b2d2654b5187ec0a316c3dde86a8534708f0540dd2ac2' + '80aa0ea3ce85aae386aa8395614f0f57d8cffabed76dc18eb91282b9772a51e316a88e11a2ee9efbb3' + '89f9370bbca7865fb3fff2f9cc0d94313a2115c8181d9e7285909a52a81a7c6387ec4c84d5f083d548' + '30808c430f4790f85eb4714705201b5f7ec2270700bed3f0c5'))) + sc = SecurityDescriptor(d) # print sc.descriptor diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7cbdf87 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[build-system] +requires = ["setuptools>=43.0.0", "wheel", "setuptools_scm[toml]>=3.4.1"] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] diff --git a/setup.py b/setup.py index 9b8e106..9d8f25a 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,10 @@ from setuptools import setup -with open("README.md", "r") as fh: +with open("README.md") as fh: long_description = fh.read() setup( name="dissect.cstruct", - version="1.0.0", author="Fox-IT", description="Structure parsing in Python made easy.", long_description=long_description, @@ -14,11 +13,10 @@ keywords="cstruct struct dissect structure binary pack packer unpack unpacker parser parsing", url="https://github.com/fox-it/dissect.cstruct", namespace_packages=['dissect'], - packages=['dissect.cstruct'], + packages=['dissect.cstruct', 'dissect.cstruct.types'], classifiers=[ "Development Status :: 5 - Production/Stable", "Topic :: Software Development :: Libraries :: Python Modules", - "Programming Language :: Python :: 2", "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License" ] diff --git a/tests/test_basic.py b/tests/test_basic.py index 5bb5d17..c647d04 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,8 +1,10 @@ import os import pytest from io import BytesIO + from dissect import cstruct -from dissect.cstruct.cstruct import BytesInteger +from dissect.cstruct.types.bytesinteger import BytesInteger +from dissect.cstruct.utils import dumpstruct, hexdump def test_simple_types(): @@ -18,7 +20,8 @@ def test_simple_types(): c.wchar[None](b'a\x00a\x00a') -def test_simple_struct(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_simple_struct(compiled): d = """ struct test { char magic[4]; @@ -31,7 +34,7 @@ def test_simple_struct(): }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'testt\x00e\x00s\x00t\x00\x01\x02\x03\x04\x05\x06\x07lalala\x00t\x00e\x00s\x00t\x00\x00\x00' a = c.test(d) @@ -60,7 +63,8 @@ def test_simple_struct(): assert size == len(d) == len(f.getvalue()) -def test_simple_struct_be(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_simple_struct_be(compiled): d = """ struct test { char magic[4]; @@ -73,7 +77,7 @@ def test_simple_struct_be(): }; """ c = cstruct.cstruct(endian='>') - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'test\x00t\x00e\x00s\x00t\x01\x02\x03\x04\x05\x06\x07lalala\x00\x00t\x00e\x00s\x00t\x00\x00' a = c.test(d) @@ -149,7 +153,102 @@ def test_bytes_integer_signed_be(): assert int40[2](b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe') == [-1, -2] -def test_enum(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_bytes_integer_struct_signed(compiled): + d = """ + struct test { + int24 a; + int24 b[2]; + int24 len; + int24 dync[len]; + int24 c; + int24 d[3]; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + a = c.test(b'AAABBBCCC\x02\x00\x00DDDEEE\xff\xff\xff\x01\xff\xff\x02\xff\xff\x03\xff\xff') + assert a.a == 0x414141 + assert a.b == [0x424242, 0x434343] + assert a.len == 0x02 + assert a.dync == [0x444444, 0x454545] + assert a.c == -1 + assert a.d == [-255, -254, -253] + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_bytes_integer_struct_unsigned(compiled): + d = """ + struct test { + uint24 a; + uint24 b[2]; + uint24 len; + uint24 dync[len]; + uint24 c; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + a = c.test(b'AAABBBCCC\x02\x00\x00DDDEEE\xff\xff\xff') + assert a.a == 0x414141 + assert a.b == [0x424242, 0x434343] + assert a.len == 0x02 + assert a.dync == [0x444444, 0x454545] + assert a.c == 0xffffff + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_bytes_integer_struct_signed_be(compiled): + d = """ + struct test { + int24 a; + int24 b[2]; + int24 len; + int24 dync[len]; + int24 c; + int24 d[3]; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + c.endian = '>' + + a = c.test(b'AAABBBCCC\x00\x00\x02DDDEEE\xff\xff\xff\xff\xff\x01\xff\xff\x02\xff\xff\x03') + assert a.a == 0x414141 + assert a.b == [0x424242, 0x434343] + assert a.len == 0x02 + assert a.dync == [0x444444, 0x454545] + assert a.c == -1 + assert a.d == [-255, -254, -253] + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_bytes_integer_struct_unsigned_be(compiled): + d = """ + struct test { + uint24 a; + uint24 b[2]; + uint24 len; + uint24 dync[len]; + uint24 c; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + c.endian = '>' + + a = c.test(b'AAABBBCCC\x00\x00\x02DDDEEE\xff\xff\xff') + assert a.a == 0x414141 + assert a.b == [0x424242, 0x434343] + assert a.len == 0x02 + assert a.dync == [0x444444, 0x454545] + assert a.c == 0xffffff + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_enum(compiled): d = """ enum Test16 : uint16 { A = 0x1, @@ -179,9 +278,14 @@ def test_enum(): struct test_term { Test16 null[]; }; + + struct test_expr { + uint16 size; + Test16 expr[size * 2]; + }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'\x01\x00\x02\x00\x01\x00\x00\x02\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\x00\x02\x00' a = c.test(d) @@ -214,6 +318,9 @@ def test_enum(): assert c.test_term(b'\x01\x00\x02\x00\x00\x00').null == [c.Test16.A, c.Test16.B] assert c.test_term(null=[c.Test16.A, c.Test16.B]).dumps() == b'\x01\x00\x02\x00\x00\x00' + assert c.test_expr(b'\x01\x00\x01\x00\x02\x00').expr == [c.Test16.A, c.Test16.B] + assert c.test_expr(size=1, expr=[c.Test16.A, c.Test16.B]).dumps() == b'\x01\x00\x01\x00\x02\x00' + x = { c.Test16.A: 'Test16.A', c.Test16.B: 'Test16.B', @@ -230,7 +337,8 @@ def test_enum(): x[c.Test32.A] -def test_enum_comments(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_enum_comments(compiled): d = """ enum Inline { hello=7, world, foo, bar }; // inline enum @@ -250,7 +358,7 @@ def test_enum_comments(): """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) assert c.Inline.hello == 7 assert c.Inline.world == 8 @@ -274,7 +382,110 @@ def test_enum_comments(): assert c.Test.a != c.Test.b -def test_bitfield(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_flag(compiled): + d = """ + flag Test { + a, + b, + c, + d + }; + + flag Odd { + a = 2, + b, + c, + d = 32, e, f, + g + }; + """ + + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + assert c.Test.a == 1 + assert c.Test.b == 2 + assert c.Test.c == 4 + assert c.Test.d == 8 + + assert c.Odd.a == 2 + assert c.Odd.b == 4 + assert c.Odd.c == 8 + assert c.Odd.d == 32 + assert c.Odd.e == 64 + assert c.Odd.f == 128 + assert c.Odd.g == 256 + + assert c.Test.a == c.Test.a + assert c.Test.a != c.Test.b + assert bool(c.Test(0)) is False + assert bool(c.Test(1)) is True + + assert c.Test.a | c.Test.b == 3 + assert str(c.Test.c | c.Test.d) == 'Test.d|c' + assert repr(c.Test.a | c.Test.b) == '' + assert c.Test(2) == c.Test.b + assert c.Test(3) == c.Test.a | c.Test.b + assert c.Test.c & 12 == c.Test.c + assert c.Test.b & 12 == 0 + assert c.Test.b ^ c.Test.a == c.Test.a | c.Test.b + + assert ~c.Test.a == -2 + assert str(~c.Test.a) == 'Test.d|c|b' + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_flag_read(compiled): + d = """ + flag Test16 : uint16 { + A = 0x1, + B = 0x2 + }; + + flag Test24 : uint24 { + A = 0x1, + B = 0x2 + }; + + flag Test32 : uint32 { + A = 0x1, + B = 0x2 + }; + + struct test { + Test16 a16; + Test16 b16; + Test24 a24; + Test24 b24; + Test32 a32; + Test32 b32; + Test16 l[2]; + Test16 c16; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + a = c.test(b'\x01\x00\x02\x00\x01\x00\x00\x02\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\x00\x02\x00\x03\x00') + assert a.a16.enum == c.Test16 and a.a16.value == c.Test16.A + assert a.b16.enum == c.Test16 and a.b16.value == c.Test16.B + assert a.a24.enum == c.Test24 and a.a24.value == c.Test24.A + assert a.b24.enum == c.Test24 and a.b24.value == c.Test24.B + assert a.a32.enum == c.Test32 and a.a32.value == c.Test32.A + assert a.b32.enum == c.Test32 and a.b32.value == c.Test32.B + + assert len(a.l) == 2 + assert a.l[0].enum == c.Test16 and a.l[0].value == c.Test16.A + assert a.l[1].enum == c.Test16 and a.l[1].value == c.Test16.B + + assert a.c16 == c.Test16.A | c.Test16.B + assert a.c16 & c.Test16.A + assert str(a.c16) == 'Test16.B|A' + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_bitfield(compiled): d = """ struct test { uint16 a:4; @@ -288,7 +499,7 @@ def test_bitfield(): }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'\x12\x34\xff\x00\x00\x00\x1f\x00\x01\x00\x00\x00' a = c.test(d) @@ -304,7 +515,8 @@ def test_bitfield(): assert a.dumps() == d -def test_bitfield_be(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_bitfield_be(compiled): d = """ struct test { uint16 a:4; @@ -319,7 +531,7 @@ def test_bitfield_be(): }; """ c = cstruct.cstruct(endian='>') - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'\x12\x34\x00\x00\x00\xff\x1f\x00\x00\x00\x00\x01' a = c.test(d) @@ -366,7 +578,8 @@ def test_write_be(): assert c.wchar.dumps('lala') == b'\x00l\x00a\x00l\x00a' -def test_write_struct(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_write_struct(compiled): d = """ struct test { char magic[4]; @@ -379,7 +592,7 @@ def test_write_struct(): }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'testt\x00e\x00s\x00t\x00\x01\x02\x03\x04\x05\x06\x07lalala\x00t\x00e\x00s\x00t\x00\x00\x00' a = c.test() @@ -395,10 +608,12 @@ def test_write_struct(): a.nope = 1 assert a.dumps() == d - assert c.test(magic=b'test', wmagic=u'test', a=0x01, b=0x0302, c=0x07060504, string=b'lalala', wstring=u'test').dumps() == d + assert c.test(magic=b'test', wmagic=u'test', a=0x01, b=0x0302, c=0x07060504, string=b'lalala', + wstring=u'test').dumps() == d -def test_write_struct_be(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_write_struct_be(compiled): d = """ struct test { char magic[4]; @@ -411,7 +626,7 @@ def test_write_struct_be(): }; """ c = cstruct.cstruct(endian='>') - c.load(d, compiled=False) + c.load(d, compiled=compiled) a = c.test() a.magic = 'test' @@ -425,7 +640,8 @@ def test_write_struct_be(): assert a.dumps() == b'test\x00t\x00e\x00s\x00t\x01\x02\x03\x04\x05\x06\x07lalala\x00\x00t\x00e\x00s\x00t\x00\x00' -def test_write_bitfield(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_write_bitfield(compiled): d = """ struct test { uint16 a:1; @@ -448,7 +664,8 @@ def test_write_bitfield(): assert a.dumps() == b'\x03\x00\xff\x00\x00\x00\x1f\x00' -def test_write_bitfield_be(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_write_bitfield_be(compiled): d = """ struct test { uint16 a:1; @@ -459,7 +676,7 @@ def test_write_bitfield_be(): }; """ c = cstruct.cstruct(endian='>') - c.load(d, compiled=False) + c.load(d, compiled=compiled) a = c.test() a.a = 0b1 @@ -471,7 +688,8 @@ def test_write_bitfield_be(): assert a.dumps() == b'\xc0\x00\x00\x00\x00\xff\xf8\x00' -def test_write_enum(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_write_enum(compiled): d = """ enum Test16 : uint16 { A = 0x1, @@ -499,7 +717,7 @@ def test_write_enum(): }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) a = c.test() a.a16 = c.Test16.A @@ -513,7 +731,8 @@ def test_write_enum(): assert a.dumps() == b'\x01\x00\x02\x00\x01\x00\x00\x02\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\x00\x02\x00' -def test_enum_name(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_enum_name(compiled): d = """ enum Color: uint16 { RED = 1, @@ -528,7 +747,7 @@ def test_enum_name(): }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) Color = c.Color Pixel = c.Pixel @@ -547,7 +766,8 @@ def test_enum_name(): assert pixel.color.value == 0xFF -def test_pointers(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_pointers(compiled): d = """ struct test { char magic[4]; @@ -564,7 +784,7 @@ def test_pointers(): }; """ c = cstruct.cstruct(pointer='uint16') - c.load(d, compiled=False) + c.load(d, compiled=compiled) d = b'\x02\x00testt\x00e\x00s\x00t\x00\x01\x02\x03\x04\x05\x06\x07lalala\x00t\x00e\x00s\x00t\x00\x00\x00' p = c.ptrtest(d) @@ -585,24 +805,26 @@ def test_pointers(): c.ptrtest(b'\x00\x00').ptr.magic -def test_duplicate_type(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_duplicate_type(compiled): d = """ struct test { uint32 a; }; """ c = cstruct.cstruct() - c.load(d, compiled=False) + c.load(d, compiled=compiled) with pytest.raises(ValueError): c.load(d) -def test_load_file(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_load_file(compiled): path = os.path.join(os.path.dirname(__file__), 'data/testdef.txt') c = cstruct.cstruct() - c.loadfile(path, compiled=False) + c.loadfile(path, compiled=compiled) assert 'test' in c.typedefs @@ -614,7 +836,7 @@ def test_read_type_name(): def test_type_resolve(): c = cstruct.cstruct() - assert c.resolve('byte') == c.int8 + assert c.resolve('BYTE') == c.int8 with pytest.raises(cstruct.ResolveError) as excinfo: c.resolve('fake') @@ -634,6 +856,7 @@ def test_constants(): #define a 1 #define b 0x2 #define c "test" + #define d 1 << 1 """ c = cstruct.cstruct() c.load(d) @@ -641,16 +864,11 @@ def test_constants(): assert c.a == 1 assert c.b == 2 assert c.c == "test" + assert c.d == 2 - with pytest.raises(AttributeError): - c.d - - c.load("""#define d = 1 << 1""") # Expressions in constants are currently not supported - with pytest.raises(AttributeError): - c.d - -def test_struct_definitions(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_struct_definitions(compiled): c = cstruct.cstruct() c.load(""" struct _test { @@ -658,7 +876,7 @@ def test_struct_definitions(): // uint32 comment uint32 b; } test, test1; - """, compiled=False) + """, compiled=compiled) assert c._test == c.test == c.test1 assert c.test.name == '_test' @@ -679,21 +897,23 @@ def test_typedef(): c = cstruct.cstruct() c.load("""typedef uint32 test;""") - assert c.test == 'uint32' + assert c.test == c.uint32 assert c.resolve('test') == c.uint32 -def test_lookups(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_lookups(compiled): c = cstruct.cstruct() c.load(""" #define test_1 1 #define test_2 2 $a = {'test_1': 3, 'test_2': 4} - """, compiled=False) + """, compiled=compiled) assert c.lookups['a'] == {1: 3, 2: 4} -def test_expressions(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_expressions(compiled): c = cstruct.cstruct() c.load(""" #define const 1 @@ -703,7 +923,7 @@ def test_expressions(): uint8 data_2[flag & (1 << 2)]; uint8 data_3[const]; }; - """, compiled=False) + """, compiled=compiled) a = c.test(b'\x01\x00\x01\x02\x03\xff') assert a.flag == 1 @@ -718,7 +938,8 @@ def test_expressions(): assert a.data_3 == [255] -def test_struct_sizes(): +@pytest.mark.parametrize('compiled', [True, False]) +def test_struct_sizes(compiled): c = cstruct.cstruct() c.load(""" struct static { @@ -728,45 +949,340 @@ def test_struct_sizes(): struct dynamic { uint32 test[]; }; - """, compiled=False) + """, compiled=compiled) assert len(c.static) == 4 - c.static.add_field("another", c.uint32) - assert len(c.static) == 8 - c.static.add_field("atoffset", c.uint32, 12) - assert len(c.static) == 16 - a = c.static(b'\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00') - assert a.test == 1 - assert a.another == 2 - assert a.atoffset == 3 + if not compiled: + c.static.add_field("another", c.uint32) + assert len(c.static) == 8 + c.static.add_field("atoffset", c.uint32, 12) + assert len(c.static) == 16 + + a = c.static(b'\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00') + assert a.test == 1 + assert a.another == 2 + assert a.atoffset == 3 + + with pytest.raises(TypeError) as excinfo: + len(c.dynamic) + assert str(excinfo.value) == "Dynamic size" + else: + with pytest.raises(NotImplementedError) as excinfo: + c.static.add_field("another", c.uint32) + assert str(excinfo.value) == "Can't add fields to a compiled structure" + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_default_constructors(compiled): + c = cstruct.cstruct() + c.load(""" + enum Enum { + a = 0, + b = 1 + }; + + flag Flag { + a = 0, + b = 1 + }; + + struct test { + uint32 t_int; + uint32 t_int_array[2]; + uint24 t_bytesint; + uint24 t_bytesint_array[2]; + char t_char; + char t_char_array[2]; + wchar t_wchar; + wchar t_wchar_array[2]; + Enum t_enum; + Enum t_enum_array[2]; + Flag t_flag; + Flag t_flag_array[2]; + }; + """, compiled=compiled) + + testobj = c.test() + assert testobj.t_int == 0 + assert testobj.t_int_array == [0, 0] + assert testobj.t_bytesint == 0 + assert testobj.t_bytesint_array == [0, 0] + assert testobj.t_char == b'\x00' + assert testobj.t_char_array == b'\x00\x00' + assert testobj.t_wchar == u'\x00' + assert testobj.t_wchar_array == u'\x00\x00' + assert testobj.t_enum == c.Enum(0) + assert testobj.t_enum_array == [c.Enum(0), c.Enum(0)] + assert testobj.t_flag == c.Flag(0) + assert testobj.t_flag_array == [c.Flag(0), c.Flag(0)] + + assert testobj.dumps() == b'\x00' * 54 + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_union(compiled): + d = """ + union test { + uint32 a; + char b[8]; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + assert len(c.test) == 8 + + a = c.test(b'zomgbeef') + assert a.a == 0x676d6f7a + assert a.b == b'zomgbeef' + + assert a.dumps() == b'zomgbeef' + assert c.test().dumps() == b'\x00\x00\x00\x00\x00\x00\x00\x00' + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_nested_struct(compiled): + c = cstruct.cstruct() + c.load(""" + struct test_named { + char magic[4]; + struct { + uint32 a; + uint32 b; + } a; + struct { + char c[8]; + } b; + }; + + struct test_anonymous { + char magic[4]; + struct { + uint32 a; + uint32 b; + }; + struct { + char c[8]; + }; + }; + """, compiled=compiled) + + assert len(c.test_named) == len(c.test_anonymous) == 20 + + a = c.test_named(b'zomg\x39\x05\x00\x00\x28\x23\x00\x00deadbeef') + assert a.magic == b'zomg' + assert a.a.a == 1337 + assert a.a.b == 9000 + assert a.b.c == b'deadbeef' + + b = c.test_anonymous(b'zomg\x39\x05\x00\x00\x28\x23\x00\x00deadbeef') + assert b.magic == b'zomg' + assert b.a == 1337 + assert b.b == 9000 + assert b.c == b'deadbeef' + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_nested_union(compiled): + d = """ + struct test { + char magic[4]; + union { + struct { + uint32 a; + uint32 b; + } a; + struct { + char b[8]; + } b; + } c; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=False) + + assert len(c.test) == 12 + + a = c.test(b'zomgholybeef') + assert a.magic == b'zomg' + assert a.c.a.a == 0x796c6f68 + assert a.c.a.b == 0x66656562 + assert a.c.b.b == b'holybeef' + + assert a.dumps() == b'zomgholybeef' + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_anonymous_union_struct(compiled): + d = """ + typedef struct test + { + union + { + uint32 a; + struct + { + char b[3]; + char c; + }; + }; + uint32 d; + } + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + b = b'\x01\x01\x02\x02\x03\x03\x04\x04' + a = c.test(b) + + assert a.a == 0x02020101 + assert a.b == b'\x01\x01\x02' + assert a.c == b'\x02' + assert a.d == 0x04040303 - with pytest.raises(TypeError) as excinfo: - len(c.dynamic) - assert str(excinfo.value) == "Dynamic size" + assert a.dumps() == b + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_config_flag_nocompile(compiled): + d = """ + struct compiled_global + { + uint32 a; + }; + + #[nocompile] + struct never_compiled + { + uint32 a; + }; + """ + c = cstruct.cstruct() + c.load(d, compiled=compiled) + + if compiled: + assert '+compiled' in repr(c.compiled_global) + + assert '+compiled' not in repr(c.never_compiled) def test_hexdump(capsys): - cstruct.hexdump(b'\x00' * 16) + hexdump(b'\x00' * 16) captured = capsys.readouterr() assert captured.out == "00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................\n" + out = hexdump(b'\x00' * 16, output='string') + assert out == "00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................" + + out = hexdump(b'\x00' * 16, output='generator') + assert next(out) == "00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................" -def test_dumpstruct(capsys): + with pytest.raises(ValueError) as excinfo: + hexdump('b\x00', output='str') + assert str(excinfo.value) == "Invalid output argument: 'str' (should be 'print', 'generator' or 'string')." + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_dumpstruct(capsys, compiled): c = cstruct.cstruct() c.load(""" struct test { uint32 testval; }; - """, compiled=False) + """, compiled=compiled) data = b'\x39\x05\x00\x00' a = c.test(data) - cstruct.dumpstruct(c.test, data) + dumpstruct(c.test, data) captured_1 = capsys.readouterr() - cstruct.dumpstruct(a) + dumpstruct(a) captured_2 = capsys.readouterr() assert captured_1.out == captured_2.out + + out_1 = dumpstruct(c.test, data, output='string') + out_2 = dumpstruct(a, output='string') + + assert out_1 == out_2 + + with pytest.raises(ValueError) as excinfo: + dumpstruct(a, output='generator') + assert str(excinfo.value) == "Invalid output argument: 'generator' (should be 'print' or 'string')." + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_compiler_slicing_multiple(compiled): + c = cstruct.cstruct() + c.load(""" + struct compile_slicing { + char single; + char multiple[2]; + }; + """, compiled=compiled) + a = c.compile_slicing(b'\x01\x02\x03') + assert a.single == b'\x01' + assert a.multiple == b'\x02\x03' + + +@pytest.mark.parametrize('compiled', [True, False]) +def test_underscores_attribute(compiled): + c = cstruct.cstruct() + c.load(""" + struct __test { + uint32 test_val; + }; + """, compiled=compiled) + + data = b'\x39\x05\x00\x00' + a = c.__test(data) + assert a.test_val == 1337 + + +def test_half_compiled_struct(): + from dissect.cstruct import RawType + + class OffByOne(RawType): + def __init__(self, cstruct_obj): + self._t = cstruct_obj.uint64 + super().__init__(cstruct_obj, 'OffByOne', 8) + + def _read(self, stream): + return self._t._read(stream) + 1 + + def _write(self, stream, data): + return self._t._write(stream, data - 1) + + c = cstruct.cstruct() + # Add an unsupported type for the cstruct compiler + # so that it returns the original struct, + # only partially compiling the struct. + c.addtype("offbyone", OffByOne(c)) + c.load(""" + struct uncompiled { + uint32 a; + offbyone b; + uint16 c; + }; + + struct compiled { + char a[4]; + uncompiled b; + uint16 c; + }; + """, compiled=True) + + assert '+compiled' not in repr(c.uncompiled) + assert '+compiled' in repr(c.compiled) + + buf = b'zomg\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x04\x00' + obj = c.compiled(buf) + assert obj.a == b'zomg' + assert obj.b.a == 1 + assert obj.b.b == 3 + assert obj.b.c == 3 + assert obj.c == 4 + + assert obj.dumps() == buf diff --git a/tests/test_compiled.py b/tests/test_compiled.py deleted file mode 100644 index 2f618e7..0000000 --- a/tests/test_compiled.py +++ /dev/null @@ -1,244 +0,0 @@ -from dissect import cstruct - - -def test_compiled_struct(): - d = """ - struct test { - char magic[4]; - wchar wmagic[4]; - uint8 a; - uint16 b; - uint32 c; - char string[]; - wchar wstring[]; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - - d = b'testt\x00e\x00s\x00t\x00\x01\x02\x03\x04\x05\x06\x07lalala\x00t\x00e\x00s\x00t\x00\x00\x00' - a = c.test(d) - - assert a.magic == b'test' - assert a.wmagic == 'test' - assert a.a == 0x01 - assert a.b == 0x0302 - assert a.c == 0x07060504 - assert a.string == b'lalala' - assert a.wstring == 'test' - assert d == a.dumps() - - -def test_simple_struct_be(): - d = """ - struct test { - char magic[4]; - wchar wmagic[4]; - uint8 a; - uint16 b; - uint32 c; - char string[]; - wchar wstring[]; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - c.endian = '>' - - d = b'test\x00t\x00e\x00s\x00t\x01\x02\x03\x04\x05\x06\x07lalala\x00\x00t\x00e\x00s\x00t\x00\x00' - a = c.test(d) - - assert a.magic == b'test' - assert a.wmagic == 'test' - assert a.a == 0x01 - assert a.b == 0x0203 - assert a.c == 0x04050607 - assert a.string == b'lalala' - assert a.wstring == 'test' - assert d == a.dumps() - - -def test_compiled_int24(): - d = """ - struct test { - int24 a; - int24 b[2]; - int24 len; - int24 dync[len]; - int24 c; - int24 d[3]; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - - a = c.test(b'AAABBBCCC\x02\x00\x00DDDEEE\xff\xff\xff\x01\xff\xff\x02\xff\xff\x03\xff\xff') - assert a.a == 0x414141 - assert a.b == [0x424242, 0x434343] - assert a.len == 0x02 - assert a.dync == [0x444444, 0x454545] - assert a.c == -1 - assert a.d == [-255, -254, -253] - - -def test_compiled_uint24(): - d = """ - struct test { - uint24 a; - uint24 b[2]; - uint24 len; - uint24 dync[len]; - uint24 c; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - - a = c.test(b'AAABBBCCC\x02\x00\x00DDDEEE\xff\xff\xff') - assert a.a == 0x414141 - assert a.b == [0x424242, 0x434343] - assert a.len == 0x02 - assert a.dync == [0x444444, 0x454545] - assert a.c == 0xffffff - - -def test_compiled_int24_be(): - d = """ - struct test { - int24 a; - int24 b[2]; - int24 len; - int24 dync[len]; - int24 c; - int24 d[3]; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - c.endian = '>' - - a = c.test(b'AAABBBCCC\x00\x00\x02DDDEEE\xff\xff\xff\xff\xff\x01\xff\xff\x02\xff\xff\x03') - assert a.a == 0x414141 - assert a.b == [0x424242, 0x434343] - assert a.len == 0x02 - assert a.dync == [0x444444, 0x454545] - assert a.c == -1 - assert a.d == [-255, -254, -253] - - -def test_compiled_uint24_be(): - d = """ - struct test { - uint24 a; - uint24 b[2]; - uint24 len; - uint24 dync[len]; - uint24 c; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - c.endian = '>' - - a = c.test(b'AAABBBCCC\x00\x00\x02DDDEEE\xff\xff\xff') - assert a.a == 0x414141 - assert a.b == [0x424242, 0x434343] - assert a.len == 0x02 - assert a.dync == [0x444444, 0x454545] - assert a.c == 0xffffff - - -def test_compiled_enum(): - d = """ - enum Test16 : uint16 { - A = 0x1, - B = 0x2 - }; - - enum Test24 : uint24 { - A = 0x1, - B = 0x2 - }; - - enum Test32 : uint32 { - A = 0x1, - B = 0x2 - }; - - struct test { - Test16 a16; - Test16 b16; - Test24 a24; - Test24 b24; - Test32 a32; - Test32 b32; - Test16 l[2]; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - - a = c.test(b'\x01\x00\x02\x00\x01\x00\x00\x02\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\x00\x02\x00') - assert a.a16.enum == c.Test16 and a.a16.value == c.Test16.A - assert a.b16.enum == c.Test16 and a.b16.value == c.Test16.B - assert a.a24.enum == c.Test24 and a.a24.value == c.Test24.A - assert a.b24.enum == c.Test24 and a.b24.value == c.Test24.B - assert a.a32.enum == c.Test32 and a.a32.value == c.Test32.A - assert a.b32.enum == c.Test32 and a.b32.value == c.Test32.B - - assert len(a.l) == 2 - assert a.l[0].enum == c.Test16 and a.l[0].value == c.Test16.A - assert a.l[1].enum == c.Test16 and a.l[1].value == c.Test16.B - - -def test_compiled_bitfield(): - d = """ - struct test { - uint16 a:1; - uint16 b:1; - uint32 c; - uint16 d:2; - uint16 e:3; - }; - """ - c = cstruct.cstruct() - c.load(d, compiled=True) - - a = c.test(b'\x03\x00\xff\x00\x00\x00\x1f\x00') - assert a.a == 0b1 - assert a.b == 0b1 - assert a.c == 0xff - assert a.d == 0b11 - assert a.e == 0b111 - - -def test_pointers(): - d = """ - struct test { - char magic[4]; - wchar wmagic[4]; - uint8 a; - uint16 b; - uint32 c; - char string[]; - wchar wstring[]; - }; - - struct ptrtest { - test *ptr; - }; - """ - c = cstruct.cstruct(pointer='uint16') - c.load(d, compiled=True) - - d = b'\x02\x00testt\x00e\x00s\x00t\x00\x01\x02\x03\x04\x05\x06\x07lalala\x00t\x00e\x00s\x00t\x00\x00\x00' - p = c.ptrtest(d) - a = p.ptr - - assert a.magic == b'test' - assert a.wmagic == 'test' - assert a.a == 0x01 - assert a.b == 0x0302 - assert a.c == 0x07060504 - assert a.string == b'lalala' - assert a.wstring == 'test' diff --git a/tests/test_expression.py b/tests/test_expression.py new file mode 100644 index 0000000..cab29d3 --- /dev/null +++ b/tests/test_expression.py @@ -0,0 +1,74 @@ +import pytest + +from dissect.cstruct.expression import Expression + + +testdata = [ + ('1 * 0', 0), + ('1 * 1', 1), + ('7 * 8', 56), + ('7*8', 56), + ('7 *8', 56), + (' 7 * 8 ', 56), + ('0 / 1', 0), + ('1 / 1', 1), + ('2 / 2', 1), + ('3 / 2', 1), + ('4 / 2', 2), + ('1 % 1', 0), + ('1 % 2', 1), + ('5 % 3', 2), + ('0 + 0', 0), + ('1 + 0', 1), + ('1 + 3', 4), + ('0 - 0', 0), + ('1 - 0', 1), + ('0 - 1', -1), + ('1 - 3', -2), + ('3 - 1', 2), + ('0x0 >> 0', 0x0), + ('0x1 >> 0', 0x1), + ('0x1 >> 1', 0x0), + ('0xf0 >> 4', 0xf), + ('0x0 << 4', 0), + ('0x1 << 0', 1), + ('0xf << 4', 0xf0), + ('0 & 0', 0), + ('1 & 0', 0), + ('1 & 1', 1), + ('1 & 2', 0), + ('1 ^ 1', 0), + ('1 ^ 0', 1), + ('1 ^ 3', 2), + ('0 | 0', 0), + ('0 | 1', 1), + ('1 | 1', 1), + ('1 | 2', 3), + # This type of expression is not supported by the parser and will fail. + # ('4 * 1 + 1', 5), + ('-42', -42), + ('42 + (-42)', 0), + ('A + 5', 13), + ('21 - B', 8), + ('A + B', 21), +] + + +class Consts(object): + consts = { + 'A': 8, + 'B': 13, + } + + +def id_fn(val): + if isinstance(val, (str,)): + return val + + +@pytest.mark.parametrize('expression, answer', + testdata, + ids=id_fn) +def test_expression(expression, answer): + parser = Expression(Consts(), expression) + assert parser.evaluate() == answer diff --git a/tox.ini b/tox.ini index 0bbe2d9..e8f2da3 100644 --- a/tox.ini +++ b/tox.ini @@ -1,10 +1,50 @@ [tox] -envlist = py27,py3,pypy +envlist = lint, py3, pypy3 +# This version of tox will autoprovision itself and the requirements defined in +# requires if they are not available on the host system. +minversion = 3.8.0 +# This version of virtualenv installs a pip version of at least 19.0.1 in its +# venvs. +# Requiring minimally this version of virtualenv to be available prevents the +# need of having to explicitly specify a pip>=19.0 dependency in every testenv. +# pip>=19.0 is needed to ensure the sdist build by tox (which is build +# according to PEP 517 and PEP 518 by tox versions >= 3.4.0) is also installed +# properly (according to PEP 517 and PEP 518 by pip>=19.0) in the virtualenvs. +# If the dependency is not available on the host system, and the installed tox +# version is >= 3.3.0, tox will self bootstrap an environment with the proper +# versions (including the version of tox itself). +requires = virtualenv>=16.3.0 +isolated_build = true +# Putting the dist dir in the project directory instead of in the {toxworkdir}, +# makes the sdist more easily accesible and prevents the need of rebuilding it +# for the [testenv:build] target. +distdir = {toxinidir}/dist [testenv] deps = pytest - setuptools_scm commands = - pip install . - py.test --color=yes -vs + pytest --basetemp="{envtmpdir}" {posargs:--color=yes -v tests} + +[testenv:lint] +# Force the Python version here, so linting will be done with the correct +# Python version. There should be no difference between the CPython and pypy +# implementations, so we pick one. +basepython = python3 +deps = + flake8 +commands = + flake8 dissect tests setup.py + +[testenv:build] +# Force the Python version here, so building will be done with the correct +# Python version. As the distributions are pure Python, there should be no +# difference between the CPython and pypy implementations, so we pick one. +basepython = python3 +deps = +commands = + pip wheel --no-deps -w ./dist . + +[flake8] +max-line-length = 120 +statistics = True