From 4ecf1b8f7300a910b90f7879f75f1b2c659d869e Mon Sep 17 00:00:00 2001 From: mibe Date: Tue, 12 Dec 2023 09:49:39 +0000 Subject: [PATCH 1/4] Added the shared memory vault --- exasol/shared_memory_vault.py | 226 ++++++++++++++++++++++++++ test/unit/test_shared_memory_vault.py | 106 ++++++++++++ 2 files changed, 332 insertions(+) create mode 100644 exasol/shared_memory_vault.py create mode 100644 test/unit/test_shared_memory_vault.py diff --git a/exasol/shared_memory_vault.py b/exasol/shared_memory_vault.py new file mode 100644 index 0000000..254976c --- /dev/null +++ b/exasol/shared_memory_vault.py @@ -0,0 +1,226 @@ +from typing import Optional +from multiprocessing.shared_memory import SharedMemory +from math import ceil +from datetime import datetime +import struct + + +default_key = '1001011001100101' +default_max_size = 100 +default_storage_name = 'notebook_connector_vault' + + +def _xor(sequence: str, key: str) -> str: + return ''.join('0' if a == b else '1' for a, b in zip(sequence, key)) + + +def compute_crc(sequence: str, key: str) -> str: + """ + Computes a Cyclic Redundancy Check (CRC) code for a provided binary sequence using a provided key. + Check this wiki for details: https://en.wikipedia.org/wiki/Cyclic_redundancy_check + For example if the sequence is '11010011101100000' and the key is '011' the output crc code will + be '100'. Note that the n+1 bit long key, commonly used in literature, 1011 in the above example, + is assumed to have the most significant bit (MSB) equal 1. Here the MSB is omitted. + """ + n = len(key) + reminder = sequence[:n] + for i in range(n, len(sequence)): + starts_one = reminder[0] == '1' + reminder = reminder[1:] + sequence[i] + if starts_one: + reminder = _xor(reminder, key) + return reminder + + +def _get_byte_size(n: int) -> int: + """Returns the number of bytes required to store an integer value""" + return int(ceil(n.bit_length() / 8)) + + +def _get_key_size(key: str) -> int: + """Calculates the byte size of a CRC key""" + return int(ceil(len(key) / 8)) + + +def _bytes_to_bin(msg_bytes: bytes) -> str: + """Converts byte array to a binary string, e.g.[6, 7] => 01100111""" + return ''.join(format(c, '08b') for c in msg_bytes) + + +def encode(content: str, key: str, creation_time: datetime) -> (int, bytearray): + """ + Creates a bytearray with encoded content and its creation datetime. + Currently, the content is not being encrypted. It gets appended by the Cyclic Redundancy Check (CRC) + code. The CRC is computed over both the timestamp and the content. + + The need for a CRC is debatable. Its use is motivated by the problem of non-synchronised concurrent + access to the shared memory. In theory, the content can get corrupted by simultaneous reading/writing. + Hence, is the need to validate the data retrieved from the shared memory. On the other hand, given the + use case, the possibility of concurrent access is only hypothetical. The implication of reading an + impaired data is insignificant and CRC cannot guarantee the validity with 100% accuracy anyway. + """ + + # Prepend the content by the timestamp and convert the whole thing to a binary sequence. + ts_bytes = struct.pack('d', creation_time.timestamp()) + body = ts_bytes + content.encode('utf8') + bin_body = _bytes_to_bin(body) + + # Compute the CRC of the content right-padded with n zeros. + key_size = _get_key_size(key) + padding = '0' * len(key) + bin_cr = compute_crc(bin_body + padding, key) + cr = int(bin_cr, 2) + + # Put together the content bytes and the CRC bytes. + cont_size = len(body) + key_size + enc_content = bytearray(cont_size) + enc_content[:-key_size] = body + enc_content[-key_size:] = cr.to_bytes(key_size, byteorder='big', signed=False) + return cont_size, enc_content + + +def decode(enc_content: bytearray, key: str) -> (bool, datetime, str): + """ + Decodes and validates a content encoded in a bytearray. + Returns the validity flag, the datetime of the content creation and the textual content. + If the CRC code is invalid the function returns (False, , ''). + Otherwise, it returns (True, , ). + """ + + # Compute the CRC code of the content that should include its own CRC code. + key_size = _get_key_size(key) + bin_body = _bytes_to_bin(enc_content[:-key_size]) + bin_cr = _bytes_to_bin(enc_content[-key_size:])[-len(key):] + bin_cr = compute_crc(bin_body + bin_cr, key) + + # For a valid content the computed CRC should be zero. + if int(bin_cr, 2) == 0: + key_size = _get_key_size(key) + # Decode the content creation timestamp. + ts = struct.unpack('d', enc_content[:8])[0] + # Decode the content. + content = enc_content[8:-key_size].decode('utf8') + return True, datetime.fromtimestamp(ts), content + return False, datetime.min, '' + + +def _open_shared_memory(storage_name: str, max_size: int, must_exist: bool) -> Optional[SharedMemory]: + """ + Creates and returns a shared memory accessor object. If must_exist == False creates the shared + memory block if it doesn't exist. Otherwise, if the block doesn't exist returns None. + """ + + try: + return SharedMemory(name=storage_name, create=False, size=max_size) + except FileNotFoundError: + if must_exist: + return None + return SharedMemory(name=storage_name, create=False, size=max_size) + + +def write_to_sm(content: str, creation_time: Optional[datetime] = None, key: str = default_key, + max_size: int = default_max_size, storage_name: str = default_storage_name) -> bool: + """ + Saves a content and its creation time in a shared memory. + + The named shared memory block may or may not be already allocated. The function creates or opens + the block, writes the encoded content and closes the block. Currently, there are no provisions + for the destruction of the block. + + The content gets prepended by its length in bytes, so that a reading function knows how many + bytes to read. + + If the total length of the content doesn't fit into the maximum size of the shared memory block + the function does nothing and returns False. Otherwise, if the content is successfully stored + into the shared memory, it returns True. + + Parameters: + content - The content string to be stored into the stored to the shared memory + creation_time - Time when the content was created, which will also be stored to the shared memory. + If not provided the current time will be used. + key - A binary string used for computing the CRC. + max_size - Maximum size of the shared memory block in bytes. + storage_name - Name of the shared memory block. + """ + + # Encode the content and check if it fits into the shared memory block + creation_time = creation_time or datetime.now() + cont_size, enc_content = encode(content, key, creation_time) + size_size = _get_byte_size(max_size) + total_size = cont_size + size_size + if total_size > max_size: + return False + + # Open or create the named shared memory block. + pwd_memory = _open_shared_memory(storage_name, max_size, False) + try: + # Write the content size followed by the content itself. + pwd_memory.buf[:size_size] = cont_size.to_bytes(size_size, byteorder='big', signed=False) + pwd_memory.buf[size_size:total_size] = enc_content + finally: + pwd_memory.close() + return True + + +def read_from_sm(key: str = default_key, max_size: int = default_max_size, + storage_name: str = default_storage_name) -> (bool, datetime, str): + """ + Reads from the shared memory a content and the time when it was created . + + The shared memory block must already exist and hold a valid content. Like its writing counterpart, + this function opens and closes the shared memory block, but doesn't destroy it afterward. + + The content must be prepended by its length, so that the function knows how many bytes + to read. + + The function returns (True, , ) if a valid content + has been successfully retrieved from the shared memory. If the content is empty or too big (if the + size is to be believed), or it has been deemed invalid the function returns (False, , ''). + + Parameters: + key - A binary string used for computing the CRC. + max_size - Maximum size of the shared memory block in bytes. + storage_name - Name of the shared memory block. + """ + + size_size = _get_byte_size(max_size) + + pwd_memory = _open_shared_memory(storage_name, max_size, True) + if pwd_memory is None: + return False, datetime.min, '' + try: + cont_size = int.from_bytes(pwd_memory.buf[:size_size], byteorder='big') + total_size = cont_size + size_size + # Check if the size makes sense + if cont_size == 0 or total_size > max_size: + return False, datetime.min, '' + # Reade and decode the content. + enc_content = bytearray(pwd_memory.buf[size_size:total_size]) + return decode(enc_content, key) + finally: + pwd_memory.close() + + +def clear_sm(max_size: int = default_max_size, storage_name: str = default_storage_name, + delete_storage: bool = False) -> None: + """ + Invalidates the content stored in shared memory by setting its length to zero and optionally + destroys the shared memory block. The latter may not take an immediate effect though. + + Parameters: + max_size - Maximum size of the shared memory block in bytes. + storage_name - Name of the shared memory block. + delete_storage - If True will destroy the shared memory block. + """ + + pwd_memory = _open_shared_memory(storage_name, max_size, True) + if pwd_memory is not None: + try: + size_size = _get_byte_size(max_size) + if size_size <= max_size: + cont_size = 0 + pwd_memory.buf[:size_size] = cont_size.to_bytes(size_size, byteorder='big', signed=False) + if delete_storage: + pwd_memory.unlink() + finally: + pwd_memory.close() diff --git a/test/unit/test_shared_memory_vault.py b/test/unit/test_shared_memory_vault.py new file mode 100644 index 0000000..93f107c --- /dev/null +++ b/test/unit/test_shared_memory_vault.py @@ -0,0 +1,106 @@ +from unittest import mock +from exasol.shared_memory_vault import (compute_crc, encode, decode, write_to_sm, read_from_sm, clear_sm) +from datetime import datetime + + +def test_compute_crc(): + assert compute_crc('11010011101100000', '011') == '100' + + +def test_encode_decode(): + + dt = datetime(year=2023, month=12, day=11, hour=16, minute=35, second=21) + content = 'Supercalifragilisticexpialidocious' + key = '10011010' + success, enc_content = encode(content, key, dt) + assert success + success, dt_out, content_out = decode(enc_content, key) + assert success + assert dt_out == dt + assert content_out == content + + +def test_encode_corrupt_decode(): + + content = 'Go ahead, make my day.' + key = '10011010' + _, enc_content = encode(content, key, datetime.now()) + enc_content[0] ^= 127 + success, _, _ = decode(enc_content, key) + assert not success + + +@mock.patch("exasol.shared_memory_vault._open_shared_memory") +def test_write_read(mock_sm_factory): + + max_size = 200 + mock_sm = mock.MagicMock() + mock_sm.buf = bytearray(max_size) + mock_sm_factory.return_value = mock_sm + key = '100110100011' + content = 'The truth will set you free.' + dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) + success = write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + assert success + success, dt_out, content_out = read_from_sm(key=key, max_size=max_size) + assert success + assert dt_out == dt + assert content_out == content + + +@mock.patch("exasol.shared_memory_vault._open_shared_memory") +def test_write_corrupt_read(mock_sm_factory): + + max_size = 200 + mock_sm = mock.MagicMock() + mock_sm.buf = bytearray(max_size) + mock_sm_factory.return_value = mock_sm + key = '100110100011' + content = 'The truth will set you free.' + dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) + write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + mock_sm.buf = bytearray(max_size) + mock_sm.buf[10] = mock_sm.buf[10] + success, _, _ = read_from_sm(key=key, max_size=max_size) + assert not success + + +@mock.patch("exasol.shared_memory_vault._open_shared_memory") +def test_read_fail_no_sm(mock_sm_factory): + + # Simulate the case when the shared memory block doesn't exist. + mock_sm_factory.return_value = None + max_size = 200 + key = '100110100011' + success, _, _ = read_from_sm(key=key, max_size=max_size) + assert not success + + +@mock.patch("exasol.shared_memory_vault._open_shared_memory") +def test_write_fail_insufficient_memory(mock_sm_factory): + + max_size = 50 + mock_sm = mock.MagicMock() + mock_sm.buf = bytearray(max_size) + mock_sm_factory.return_value = mock_sm + key = '100110100011' + content = 'If you want something said, ask a man; if you want something done, ask a woman.' + dt = datetime(year=2023, month=12, day=12, hour=9, minute=19, second=10) + success = write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + assert not success + + +@mock.patch("exasol.shared_memory_vault._open_shared_memory") +def test_write_clear_read(mock_sm_factory): + + max_size = 200 + mock_sm = mock.MagicMock() + mock_sm.buf = bytearray(max_size) + mock_sm_factory.return_value = mock_sm + key = '100110100011' + content = 'The truth will set you free.' + dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) + write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + clear_sm(max_size=max_size) + success, _, _ = read_from_sm(key=key, max_size=max_size) + assert not success From 32b6bb7a6c3b0cb2b5c33fc6a9249505e593ba59 Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Tue, 12 Dec 2023 11:19:14 +0000 Subject: [PATCH 2/4] Update exasol/shared_memory_vault.py Co-authored-by: Nicola Coretti --- exasol/shared_memory_vault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol/shared_memory_vault.py b/exasol/shared_memory_vault.py index 254976c..071488a 100644 --- a/exasol/shared_memory_vault.py +++ b/exasol/shared_memory_vault.py @@ -115,7 +115,7 @@ def _open_shared_memory(storage_name: str, max_size: int, must_exist: bool) -> O except FileNotFoundError: if must_exist: return None - return SharedMemory(name=storage_name, create=False, size=max_size) + return SharedMemory(name=storage_name, create=True, size=max_size) def write_to_sm(content: str, creation_time: Optional[datetime] = None, key: str = default_key, From cf70d1e522c64ed7eeab6ac5c500a4aff69f95dc Mon Sep 17 00:00:00 2001 From: mibe Date: Tue, 12 Dec 2023 14:11:05 +0000 Subject: [PATCH 3/4] Some renaming --- exasol/shared_memory_vault.py | 76 +++++++++++++-------------- test/unit/test_shared_memory_vault.py | 38 +++++++------- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/exasol/shared_memory_vault.py b/exasol/shared_memory_vault.py index 071488a..c849c46 100644 --- a/exasol/shared_memory_vault.py +++ b/exasol/shared_memory_vault.py @@ -5,30 +5,30 @@ import struct -default_key = '1001011001100101' -default_max_size = 100 -default_storage_name = 'notebook_connector_vault' +DEFAULT_CRC_DIVISOR = '1001011001100101' +DEFAULT_MAX_SIZE = 100 +DEFAULT_STORAGE_NAME = 'notebook_connector_vault' -def _xor(sequence: str, key: str) -> str: - return ''.join('0' if a == b else '1' for a, b in zip(sequence, key)) +def _xor(sequence: str, crc_divisor: str) -> str: + return ''.join('0' if a == b else '1' for a, b in zip(sequence, crc_divisor)) -def compute_crc(sequence: str, key: str) -> str: +def compute_crc(sequence: str, crc_divisor: str) -> str: """ - Computes a Cyclic Redundancy Check (CRC) code for a provided binary sequence using a provided key. - Check this wiki for details: https://en.wikipedia.org/wiki/Cyclic_redundancy_check - For example if the sequence is '11010011101100000' and the key is '011' the output crc code will - be '100'. Note that the n+1 bit long key, commonly used in literature, 1011 in the above example, + Computes a Cyclic Redundancy Check (CRC) code for a provided binary sequence using a provided + polynomial divisor. Check this wiki for details: https://en.wikipedia.org/wiki/Cyclic_redundancy_check + For example if the sequence is '11010011101100000' and the divisor is '011' the output crc code will + be '100'. Note that the n+1 bit long divisor, commonly used in literature, 1011 in the above example, is assumed to have the most significant bit (MSB) equal 1. Here the MSB is omitted. """ - n = len(key) + n = len(crc_divisor) reminder = sequence[:n] for i in range(n, len(sequence)): starts_one = reminder[0] == '1' reminder = reminder[1:] + sequence[i] if starts_one: - reminder = _xor(reminder, key) + reminder = _xor(reminder, crc_divisor) return reminder @@ -37,9 +37,9 @@ def _get_byte_size(n: int) -> int: return int(ceil(n.bit_length() / 8)) -def _get_key_size(key: str) -> int: - """Calculates the byte size of a CRC key""" - return int(ceil(len(key) / 8)) +def _get_divisor_size(crc_divisor: str) -> int: + """Calculates the byte size of a CRC divisor""" + return int(ceil(len(crc_divisor) / 8)) def _bytes_to_bin(msg_bytes: bytes) -> str: @@ -47,7 +47,7 @@ def _bytes_to_bin(msg_bytes: bytes) -> str: return ''.join(format(c, '08b') for c in msg_bytes) -def encode(content: str, key: str, creation_time: datetime) -> (int, bytearray): +def encode(content: str, crc_divisor: str, creation_time: datetime) -> (int, bytearray): """ Creates a bytearray with encoded content and its creation datetime. Currently, the content is not being encrypted. It gets appended by the Cyclic Redundancy Check (CRC) @@ -66,20 +66,20 @@ def encode(content: str, key: str, creation_time: datetime) -> (int, bytearray): bin_body = _bytes_to_bin(body) # Compute the CRC of the content right-padded with n zeros. - key_size = _get_key_size(key) - padding = '0' * len(key) - bin_cr = compute_crc(bin_body + padding, key) + divisor_size = _get_divisor_size(crc_divisor) + padding = '0' * len(crc_divisor) + bin_cr = compute_crc(bin_body + padding, crc_divisor) cr = int(bin_cr, 2) # Put together the content bytes and the CRC bytes. - cont_size = len(body) + key_size + cont_size = len(body) + divisor_size enc_content = bytearray(cont_size) - enc_content[:-key_size] = body - enc_content[-key_size:] = cr.to_bytes(key_size, byteorder='big', signed=False) + enc_content[:-divisor_size] = body + enc_content[-divisor_size:] = cr.to_bytes(divisor_size, byteorder='big', signed=False) return cont_size, enc_content -def decode(enc_content: bytearray, key: str) -> (bool, datetime, str): +def decode(enc_content: bytearray, crc_divisor: str) -> (bool, datetime, str): """ Decodes and validates a content encoded in a bytearray. Returns the validity flag, the datetime of the content creation and the textual content. @@ -88,18 +88,18 @@ def decode(enc_content: bytearray, key: str) -> (bool, datetime, str): """ # Compute the CRC code of the content that should include its own CRC code. - key_size = _get_key_size(key) - bin_body = _bytes_to_bin(enc_content[:-key_size]) - bin_cr = _bytes_to_bin(enc_content[-key_size:])[-len(key):] - bin_cr = compute_crc(bin_body + bin_cr, key) + divisor_size = _get_divisor_size(crc_divisor) + bin_body = _bytes_to_bin(enc_content[:-divisor_size]) + bin_cr = _bytes_to_bin(enc_content[-divisor_size:])[-len(crc_divisor):] + bin_cr = compute_crc(bin_body + bin_cr, crc_divisor) # For a valid content the computed CRC should be zero. if int(bin_cr, 2) == 0: - key_size = _get_key_size(key) + divisor_size = _get_divisor_size(crc_divisor) # Decode the content creation timestamp. ts = struct.unpack('d', enc_content[:8])[0] # Decode the content. - content = enc_content[8:-key_size].decode('utf8') + content = enc_content[8:-divisor_size].decode('utf8') return True, datetime.fromtimestamp(ts), content return False, datetime.min, '' @@ -118,8 +118,8 @@ def _open_shared_memory(storage_name: str, max_size: int, must_exist: bool) -> O return SharedMemory(name=storage_name, create=True, size=max_size) -def write_to_sm(content: str, creation_time: Optional[datetime] = None, key: str = default_key, - max_size: int = default_max_size, storage_name: str = default_storage_name) -> bool: +def write_to_sm(content: str, creation_time: Optional[datetime] = None, crc_divisor: str = DEFAULT_CRC_DIVISOR, + max_size: int = DEFAULT_MAX_SIZE, storage_name: str = DEFAULT_STORAGE_NAME) -> bool: """ Saves a content and its creation time in a shared memory. @@ -138,14 +138,14 @@ def write_to_sm(content: str, creation_time: Optional[datetime] = None, key: str content - The content string to be stored into the stored to the shared memory creation_time - Time when the content was created, which will also be stored to the shared memory. If not provided the current time will be used. - key - A binary string used for computing the CRC. + crc_divisor - A binary string used for computing the CRC. max_size - Maximum size of the shared memory block in bytes. storage_name - Name of the shared memory block. """ # Encode the content and check if it fits into the shared memory block creation_time = creation_time or datetime.now() - cont_size, enc_content = encode(content, key, creation_time) + cont_size, enc_content = encode(content, crc_divisor, creation_time) size_size = _get_byte_size(max_size) total_size = cont_size + size_size if total_size > max_size: @@ -162,8 +162,8 @@ def write_to_sm(content: str, creation_time: Optional[datetime] = None, key: str return True -def read_from_sm(key: str = default_key, max_size: int = default_max_size, - storage_name: str = default_storage_name) -> (bool, datetime, str): +def read_from_sm(crc_divisor: str = DEFAULT_CRC_DIVISOR, max_size: int = DEFAULT_MAX_SIZE, + storage_name: str = DEFAULT_STORAGE_NAME) -> (bool, datetime, str): """ Reads from the shared memory a content and the time when it was created . @@ -178,7 +178,7 @@ def read_from_sm(key: str = default_key, max_size: int = default_max_size, size is to be believed), or it has been deemed invalid the function returns (False, , ''). Parameters: - key - A binary string used for computing the CRC. + crc_divisor - A binary string used for computing the CRC. max_size - Maximum size of the shared memory block in bytes. storage_name - Name of the shared memory block. """ @@ -196,12 +196,12 @@ def read_from_sm(key: str = default_key, max_size: int = default_max_size, return False, datetime.min, '' # Reade and decode the content. enc_content = bytearray(pwd_memory.buf[size_size:total_size]) - return decode(enc_content, key) + return decode(enc_content, crc_divisor) finally: pwd_memory.close() -def clear_sm(max_size: int = default_max_size, storage_name: str = default_storage_name, +def clear_sm(max_size: int = DEFAULT_MAX_SIZE, storage_name: str = DEFAULT_STORAGE_NAME, delete_storage: bool = False) -> None: """ Invalidates the content stored in shared memory by setting its length to zero and optionally diff --git a/test/unit/test_shared_memory_vault.py b/test/unit/test_shared_memory_vault.py index 93f107c..e8d6c89 100644 --- a/test/unit/test_shared_memory_vault.py +++ b/test/unit/test_shared_memory_vault.py @@ -11,10 +11,10 @@ def test_encode_decode(): dt = datetime(year=2023, month=12, day=11, hour=16, minute=35, second=21) content = 'Supercalifragilisticexpialidocious' - key = '10011010' - success, enc_content = encode(content, key, dt) + divisor = '10011010' + success, enc_content = encode(content, divisor, dt) assert success - success, dt_out, content_out = decode(enc_content, key) + success, dt_out, content_out = decode(enc_content, divisor) assert success assert dt_out == dt assert content_out == content @@ -23,10 +23,10 @@ def test_encode_decode(): def test_encode_corrupt_decode(): content = 'Go ahead, make my day.' - key = '10011010' - _, enc_content = encode(content, key, datetime.now()) + divisor = '10011010' + _, enc_content = encode(content, divisor, datetime.now()) enc_content[0] ^= 127 - success, _, _ = decode(enc_content, key) + success, _, _ = decode(enc_content, divisor) assert not success @@ -37,12 +37,12 @@ def test_write_read(mock_sm_factory): mock_sm = mock.MagicMock() mock_sm.buf = bytearray(max_size) mock_sm_factory.return_value = mock_sm - key = '100110100011' + divisor = '100110100011' content = 'The truth will set you free.' dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) - success = write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + success = write_to_sm(content, creation_time=dt, crc_divisor=divisor, max_size=max_size) assert success - success, dt_out, content_out = read_from_sm(key=key, max_size=max_size) + success, dt_out, content_out = read_from_sm(crc_divisor=divisor, max_size=max_size) assert success assert dt_out == dt assert content_out == content @@ -55,13 +55,13 @@ def test_write_corrupt_read(mock_sm_factory): mock_sm = mock.MagicMock() mock_sm.buf = bytearray(max_size) mock_sm_factory.return_value = mock_sm - key = '100110100011' + divisor = '100110100011' content = 'The truth will set you free.' dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) - write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + write_to_sm(content, creation_time=dt, crc_divisor=divisor, max_size=max_size) mock_sm.buf = bytearray(max_size) mock_sm.buf[10] = mock_sm.buf[10] - success, _, _ = read_from_sm(key=key, max_size=max_size) + success, _, _ = read_from_sm(crc_divisor=divisor, max_size=max_size) assert not success @@ -71,8 +71,8 @@ def test_read_fail_no_sm(mock_sm_factory): # Simulate the case when the shared memory block doesn't exist. mock_sm_factory.return_value = None max_size = 200 - key = '100110100011' - success, _, _ = read_from_sm(key=key, max_size=max_size) + divisor = '100110100011' + success, _, _ = read_from_sm(crc_divisor=divisor, max_size=max_size) assert not success @@ -83,10 +83,10 @@ def test_write_fail_insufficient_memory(mock_sm_factory): mock_sm = mock.MagicMock() mock_sm.buf = bytearray(max_size) mock_sm_factory.return_value = mock_sm - key = '100110100011' + divisor = '100110100011' content = 'If you want something said, ask a man; if you want something done, ask a woman.' dt = datetime(year=2023, month=12, day=12, hour=9, minute=19, second=10) - success = write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + success = write_to_sm(content, creation_time=dt, crc_divisor=divisor, max_size=max_size) assert not success @@ -97,10 +97,10 @@ def test_write_clear_read(mock_sm_factory): mock_sm = mock.MagicMock() mock_sm.buf = bytearray(max_size) mock_sm_factory.return_value = mock_sm - key = '100110100011' + divisor = '100110100011' content = 'The truth will set you free.' dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) - write_to_sm(content, creation_time=dt, key=key, max_size=max_size) + write_to_sm(content, creation_time=dt, crc_divisor=divisor, max_size=max_size) clear_sm(max_size=max_size) - success, _, _ = read_from_sm(key=key, max_size=max_size) + success, _, _ = read_from_sm(crc_divisor=divisor, max_size=max_size) assert not success From 2ea7a59ee5812f2f7505a2b623b94558002de586 Mon Sep 17 00:00:00 2001 From: mibe Date: Tue, 12 Dec 2023 15:49:30 +0000 Subject: [PATCH 4/4] Shortening the lines and making mypy happy --- exasol/shared_memory_vault.py | 120 +++++++++++++++----------- test/unit/test_shared_memory_vault.py | 14 +-- 2 files changed, 80 insertions(+), 54 deletions(-) diff --git a/exasol/shared_memory_vault.py b/exasol/shared_memory_vault.py index c849c46..06ac1e8 100644 --- a/exasol/shared_memory_vault.py +++ b/exasol/shared_memory_vault.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Tuple from multiprocessing.shared_memory import SharedMemory from math import ceil from datetime import datetime @@ -16,11 +16,13 @@ def _xor(sequence: str, crc_divisor: str) -> str: def compute_crc(sequence: str, crc_divisor: str) -> str: """ - Computes a Cyclic Redundancy Check (CRC) code for a provided binary sequence using a provided - polynomial divisor. Check this wiki for details: https://en.wikipedia.org/wiki/Cyclic_redundancy_check - For example if the sequence is '11010011101100000' and the divisor is '011' the output crc code will - be '100'. Note that the n+1 bit long divisor, commonly used in literature, 1011 in the above example, - is assumed to have the most significant bit (MSB) equal 1. Here the MSB is omitted. + Computes a Cyclic Redundancy Check (CRC) code for a provided binary sequence using + a provided polynomial divisor. + Check this wiki for details: https://en.wikipedia.org/wiki/Cyclic_redundancy_check + For example if the sequence is '11010011101100000' and the divisor is '011' the + output crc code will be '100'. Note that the n+1 bit long divisor, commonly used in + literature, 1011 in the above example, is assumed to have the most significant bit + (MSB) equal 1. Here the MSB is omitted. """ n = len(crc_divisor) reminder = sequence[:n] @@ -47,20 +49,25 @@ def _bytes_to_bin(msg_bytes: bytes) -> str: return ''.join(format(c, '08b') for c in msg_bytes) -def encode(content: str, crc_divisor: str, creation_time: datetime) -> (int, bytearray): +def encode(content: str, crc_divisor: str, + creation_time: datetime) -> Tuple[int, bytearray]: """ Creates a bytearray with encoded content and its creation datetime. - Currently, the content is not being encrypted. It gets appended by the Cyclic Redundancy Check (CRC) - code. The CRC is computed over both the timestamp and the content. - - The need for a CRC is debatable. Its use is motivated by the problem of non-synchronised concurrent - access to the shared memory. In theory, the content can get corrupted by simultaneous reading/writing. - Hence, is the need to validate the data retrieved from the shared memory. On the other hand, given the - use case, the possibility of concurrent access is only hypothetical. The implication of reading an - impaired data is insignificant and CRC cannot guarantee the validity with 100% accuracy anyway. + Currently, the content is not being encrypted. It gets appended by the + Cyclic Redundancy Check (CRC) code. The CRC is computed over both the timestamp + and the content. + + The need for a CRC is debatable. Its use is motivated by the problem of + non-synchronised concurrent access to the shared memory. In theory, the content + can get corrupted by simultaneous reading/writing. Hence, is the need to validate + the data retrieved from the shared memory. On the other hand, given the use case, + the possibility of concurrent access is only hypothetical. The implication of + reading an impaired data is insignificant and CRC cannot guarantee the validity + with 100% accuracy anyway. """ - # Prepend the content by the timestamp and convert the whole thing to a binary sequence. + # Prepend the content by the timestamp and convert the whole thing to a binary + # sequence. ts_bytes = struct.pack('d', creation_time.timestamp()) body = ts_bytes + content.encode('utf8') bin_body = _bytes_to_bin(body) @@ -75,16 +82,17 @@ def encode(content: str, crc_divisor: str, creation_time: datetime) -> (int, byt cont_size = len(body) + divisor_size enc_content = bytearray(cont_size) enc_content[:-divisor_size] = body - enc_content[-divisor_size:] = cr.to_bytes(divisor_size, byteorder='big', signed=False) + enc_content[-divisor_size:] = cr.to_bytes(divisor_size, byteorder='big', + signed=False) return cont_size, enc_content -def decode(enc_content: bytearray, crc_divisor: str) -> (bool, datetime, str): +def decode(enc_content: bytearray, crc_divisor: str) -> Tuple[bool, datetime, str]: """ Decodes and validates a content encoded in a bytearray. - Returns the validity flag, the datetime of the content creation and the textual content. - If the CRC code is invalid the function returns (False, , ''). - Otherwise, it returns (True, , ). + Returns the validity flag, the datetime of the content creation and the textual + content. If the CRC code is invalid the function returns (False, , + ''). Otherwise, it returns (True, , ). """ # Compute the CRC code of the content that should include its own CRC code. @@ -104,10 +112,12 @@ def decode(enc_content: bytearray, crc_divisor: str) -> (bool, datetime, str): return False, datetime.min, '' -def _open_shared_memory(storage_name: str, max_size: int, must_exist: bool) -> Optional[SharedMemory]: +def _open_shared_memory(storage_name: str, max_size: int, + must_exist: bool) -> Optional[SharedMemory]: """ - Creates and returns a shared memory accessor object. If must_exist == False creates the shared - memory block if it doesn't exist. Otherwise, if the block doesn't exist returns None. + Creates and returns a shared memory accessor object. If must_exist == False creates + the shared memory block if it doesn't exist. Otherwise, if the block doesn't exist + returns None. """ try: @@ -118,25 +128,29 @@ def _open_shared_memory(storage_name: str, max_size: int, must_exist: bool) -> O return SharedMemory(name=storage_name, create=True, size=max_size) -def write_to_sm(content: str, creation_time: Optional[datetime] = None, crc_divisor: str = DEFAULT_CRC_DIVISOR, - max_size: int = DEFAULT_MAX_SIZE, storage_name: str = DEFAULT_STORAGE_NAME) -> bool: +def write_to_sm(content: str, creation_time: Optional[datetime] = None, + crc_divisor: str = DEFAULT_CRC_DIVISOR, + max_size: int = DEFAULT_MAX_SIZE, + storage_name: str = DEFAULT_STORAGE_NAME) -> bool: """ Saves a content and its creation time in a shared memory. - The named shared memory block may or may not be already allocated. The function creates or opens - the block, writes the encoded content and closes the block. Currently, there are no provisions - for the destruction of the block. + The named shared memory block may or may not be already allocated. The function + creates or opens the block, writes the encoded content and closes the block. + Currently, there are no provisions for the destruction of the block. - The content gets prepended by its length in bytes, so that a reading function knows how many - bytes to read. + The content gets prepended by its length in bytes, so that a reading function knows + how many bytes to read. - If the total length of the content doesn't fit into the maximum size of the shared memory block - the function does nothing and returns False. Otherwise, if the content is successfully stored - into the shared memory, it returns True. + If the total length of the content doesn't fit into the maximum size of the shared + memory block the function does nothing and returns False. Otherwise, if the content + is successfully stored into the shared memory, it returns True. Parameters: - content - The content string to be stored into the stored to the shared memory - creation_time - Time when the content was created, which will also be stored to the shared memory. + content - The content string to be stored into the stored to the + shared memory. + creation_time - Time when the content was created, which will also be stored to the + shared memory. If not provided the current time will be used. crc_divisor - A binary string used for computing the CRC. max_size - Maximum size of the shared memory block in bytes. @@ -153,29 +167,35 @@ def write_to_sm(content: str, creation_time: Optional[datetime] = None, crc_divi # Open or create the named shared memory block. pwd_memory = _open_shared_memory(storage_name, max_size, False) + if pwd_memory is None: + return False try: # Write the content size followed by the content itself. - pwd_memory.buf[:size_size] = cont_size.to_bytes(size_size, byteorder='big', signed=False) + pwd_memory.buf[:size_size] = cont_size.to_bytes(size_size, byteorder='big', + signed=False) pwd_memory.buf[size_size:total_size] = enc_content finally: pwd_memory.close() return True -def read_from_sm(crc_divisor: str = DEFAULT_CRC_DIVISOR, max_size: int = DEFAULT_MAX_SIZE, - storage_name: str = DEFAULT_STORAGE_NAME) -> (bool, datetime, str): +def read_from_sm(crc_divisor: str = DEFAULT_CRC_DIVISOR, + max_size: int = DEFAULT_MAX_SIZE, + storage_name: str = DEFAULT_STORAGE_NAME) -> Tuple[bool, datetime, str]: """ Reads from the shared memory a content and the time when it was created . - The shared memory block must already exist and hold a valid content. Like its writing counterpart, - this function opens and closes the shared memory block, but doesn't destroy it afterward. + The shared memory block must already exist and hold a valid content. Like its + writing counterpart, this function opens and closes the shared memory block, but + doesn't destroy it afterward. - The content must be prepended by its length, so that the function knows how many bytes - to read. + The content must be prepended by its length, so that the function knows how many + bytes to read. - The function returns (True, , ) if a valid content - has been successfully retrieved from the shared memory. If the content is empty or too big (if the - size is to be believed), or it has been deemed invalid the function returns (False, , ''). + The function returns (True, , ) if a + valid content has been successfully retrieved from the shared memory. If the content + is empty or too big (if the size is to be believed), or it has been deemed invalid + the function returns (False, , ''). Parameters: crc_divisor - A binary string used for computing the CRC. @@ -204,8 +224,9 @@ def read_from_sm(crc_divisor: str = DEFAULT_CRC_DIVISOR, max_size: int = DEFAULT def clear_sm(max_size: int = DEFAULT_MAX_SIZE, storage_name: str = DEFAULT_STORAGE_NAME, delete_storage: bool = False) -> None: """ - Invalidates the content stored in shared memory by setting its length to zero and optionally - destroys the shared memory block. The latter may not take an immediate effect though. + Invalidates the content stored in shared memory by setting its length to zero and + optionally destroys the shared memory block. The latter may not take an immediate + effect though. Parameters: max_size - Maximum size of the shared memory block in bytes. @@ -219,7 +240,8 @@ def clear_sm(max_size: int = DEFAULT_MAX_SIZE, storage_name: str = DEFAULT_STORA size_size = _get_byte_size(max_size) if size_size <= max_size: cont_size = 0 - pwd_memory.buf[:size_size] = cont_size.to_bytes(size_size, byteorder='big', signed=False) + pwd_memory.buf[:size_size] = cont_size.to_bytes( + size_size, byteorder='big', signed=False) if delete_storage: pwd_memory.unlink() finally: diff --git a/test/unit/test_shared_memory_vault.py b/test/unit/test_shared_memory_vault.py index e8d6c89..2e50ae7 100644 --- a/test/unit/test_shared_memory_vault.py +++ b/test/unit/test_shared_memory_vault.py @@ -1,6 +1,7 @@ -from unittest import mock -from exasol.shared_memory_vault import (compute_crc, encode, decode, write_to_sm, read_from_sm, clear_sm) from datetime import datetime +from unittest import mock +from exasol.shared_memory_vault import (compute_crc, encode, decode, write_to_sm, + read_from_sm, clear_sm) def test_compute_crc(): @@ -40,7 +41,8 @@ def test_write_read(mock_sm_factory): divisor = '100110100011' content = 'The truth will set you free.' dt = datetime(year=2023, month=12, day=12, hour=8, minute=39, second=45) - success = write_to_sm(content, creation_time=dt, crc_divisor=divisor, max_size=max_size) + success = write_to_sm(content, creation_time=dt, crc_divisor=divisor, + max_size=max_size) assert success success, dt_out, content_out = read_from_sm(crc_divisor=divisor, max_size=max_size) assert success @@ -84,9 +86,11 @@ def test_write_fail_insufficient_memory(mock_sm_factory): mock_sm.buf = bytearray(max_size) mock_sm_factory.return_value = mock_sm divisor = '100110100011' - content = 'If you want something said, ask a man; if you want something done, ask a woman.' + content = 'If you want something said, ask a man; ' \ + 'if you want something done, ask a woman.' dt = datetime(year=2023, month=12, day=12, hour=9, minute=19, second=10) - success = write_to_sm(content, creation_time=dt, crc_divisor=divisor, max_size=max_size) + success = write_to_sm(content, creation_time=dt, crc_divisor=divisor, + max_size=max_size) assert not success