diff --git a/.github/workflows/non-test-tox.yml b/.github/workflows/non-test-tox.yml index 7393b99..5a1bd9e 100644 --- a/.github/workflows/non-test-tox.yml +++ b/.github/workflows/non-test-tox.yml @@ -1,6 +1,10 @@ name: Coverage, mypi -on: [push] +on: + push: + workflow_dispatch: + pull_request: + types: [ opened, synchronize, reopened ] jobs: build: diff --git a/.github/workflows/tox-tests.yml b/.github/workflows/tox-tests.yml index 1888247..74611b0 100644 --- a/.github/workflows/tox-tests.yml +++ b/.github/workflows/tox-tests.yml @@ -1,6 +1,11 @@ name: Tox Tests -on: [push] +on: + push: + workflow_dispatch: + pull_request: + types: [ opened, synchronize, reopened ] + jobs: build: @@ -8,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python: [ '3.7', '3.8', '3.9', '3.10', '3.11', '3.12' ] steps: - uses: actions/checkout@v2 diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..6893a4b --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,38 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + jobs: + post_create_environment: + - python -m pip install sphinx_rtd_theme + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt \ No newline at end of file diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5cb02fe..94a4c90 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,42 @@ +2.1.0 +----- + +* Added configurable postprocessing, that allows to modify value retrieved from the cache + * Added built-in implementation, that applies deep-copy +* Fixed missing invalidation module in api docs +* Fixed MANIFEST.in + +2.0.0 +----- + +* Changed exception handling + * now exceptions are chained (before they were added in `args`) + * timeout errors are now chained (before they were not included at all) + * in case of dogpiling, all callers are now notified about the error (see issue #23) + +1.2.2 +----- + +* Fixed an example, that used deprecated `utcnow` + +1.2.1 +----- + +* Fixed UTC related deprecation warnings in Python 3.12+ + +1.2.0 +----- + +* Added support for Python 3.12 +* Added warning that support for Tornado is deprecated and will be removed in future + (it causes more and more hacks/workarounds while Tornado importance is diminishing). + +1.1.5 +----- + +* Expanded docs adding section on how to achieve granular expire/update time control (different settings per entry). +* Minor fix for contribution guide (after migration, Travis was still mentioned instead of GitHub Actions). + 1.1.4 ----- diff --git a/MANIFEST.in b/MANIFEST.in index f579a65..cd759f4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include requirements.txt include README.rst -include CHANGELOG.md +include CHANGELOG.rst diff --git a/README.rst b/README.rst index c45c010..9490e9b 100644 --- a/README.rst +++ b/README.rst @@ -76,6 +76,9 @@ For configuration options see `Configurability`_. You can use ``memoize`` with both `asyncio `_ and `Tornado `_ - please see the appropriate example: +.. warning:: + Support for `Tornado `_ is planned to be removed in the future. + asyncio ~~~~~~~ @@ -189,6 +192,10 @@ With *memoize* you have under control: least-recently-updated strategy is already provided; * entry builder (see :class:`memoize.entrybuilder.CacheEntryBuilder`) which has control over ``update_after`` & ``expires_after`` described in `Tunable eviction & async refreshing`_ +* value post-processing (see :class:`memoize.postprocessing.Postprocessing`); + noop is the default one; + deep-copy post-processing is also provided (be wary of deep-copy cost & limitations, + but deep-copying allows callers to safely modify values retrieved from an in-memory cache). All of these elements are open for extension (you can implement and plug-in your own). Please contribute! @@ -206,19 +213,21 @@ Example how to customize default config (everything gets overridden): from memoize.entrybuilder import ProvidedLifeSpanCacheEntryBuilder from memoize.eviction import LeastRecentlyUpdatedEvictionStrategy from memoize.key import EncodedMethodNameAndArgsKeyExtractor + from memoize.postprocessing import DeepcopyPostprocessing from memoize.storage import LocalInMemoryCacheStorage from memoize.wrapper import memoize - - @memoize(configuration=MutableCacheConfiguration - .initialized_with(DefaultInMemoryCacheConfiguration()) - .set_method_timeout(value=timedelta(minutes=2)) - .set_entry_builder(ProvidedLifeSpanCacheEntryBuilder(update_after=timedelta(minutes=2), - expire_after=timedelta(minutes=5))) - .set_eviction_strategy(LeastRecentlyUpdatedEvictionStrategy(capacity=2048)) - .set_key_extractor(EncodedMethodNameAndArgsKeyExtractor(skip_first_arg_as_self=False)) - .set_storage(LocalInMemoryCacheStorage()) - ) + @memoize( + configuration=MutableCacheConfiguration + .initialized_with(DefaultInMemoryCacheConfiguration()) + .set_method_timeout(value=timedelta(minutes=2)) + .set_entry_builder(ProvidedLifeSpanCacheEntryBuilder(update_after=timedelta(minutes=2), + expire_after=timedelta(minutes=5))) + .set_eviction_strategy(LeastRecentlyUpdatedEvictionStrategy(capacity=2048)) + .set_key_extractor(EncodedMethodNameAndArgsKeyExtractor(skip_first_arg_as_self=False)) + .set_storage(LocalInMemoryCacheStorage()) + .set_postprocessing(DeepcopyPostprocessing()) + ) async def cached(): return 'dummy' @@ -229,7 +238,8 @@ Still, you can use default configuration which: * uses in-memory storage; * uses method instance & arguments to infer cache key; * stores up to 4096 elements in cache and evicts entries according to least recently updated policy; -* refreshes elements after 10 minutes & ignores unrefreshed elements after 30 minutes. +* refreshes elements after 10 minutes & ignores unrefreshed elements after 30 minutes; +* does not post-process cached values. If that satisfies you, just use default config: @@ -424,5 +434,80 @@ Then you could just pass args and kwargs for which you want to invalidate entry. # Invalidation # 2 # expensive - computation - 59 + if __name__ == "__main__": + asyncio.get_event_loop().run_until_complete(main()) + + +Openness to granular TTL +------------------------ + +Default configuration sets update and expiry based on fixed values, which are the same for all entries. +If you need to set different TTLs for different entries, you can do so by providing +a custom :class:`memoize.entrybuilder.CacheEntryBuilder`. + +.. code-block:: python + + import datetime + import asyncio + import random + from dataclasses import dataclass + + from memoize.wrapper import memoize + from memoize.configuration import DefaultInMemoryCacheConfiguration, MutableCacheConfiguration + from memoize.entry import CacheKey, CacheEntry + from memoize.entrybuilder import CacheEntryBuilder + from memoize.storage import LocalInMemoryCacheStorage + + + @dataclass + class ValueWithTTL: + value: str + ttl_seconds: int # for instance, it could be derived from Cache-Control response header + + + class TtlRespectingCacheEntryBuilder(CacheEntryBuilder): + def build(self, key: CacheKey, value: ValueWithTTL): + now = datetime.datetime.now(datetime.timezone.utc) + ttl_ends_at = now + datetime.timedelta(seconds=value.ttl_seconds) + return CacheEntry( + created=now, + update_after=ttl_ends_at, + # allowing stale data for 10% of TTL + expires_after=ttl_ends_at + datetime.timedelta(seconds=value.ttl_seconds // 10), + value=value + ) + + + storage = LocalInMemoryCacheStorage() # overridden & extracted for demonstration purposes only + + + @memoize(configuration=MutableCacheConfiguration + .initialized_with(DefaultInMemoryCacheConfiguration()) + .set_entry_builder(TtlRespectingCacheEntryBuilder()) + .set_storage(storage)) + async def external_call(key: str): + return ValueWithTTL( + value=f'{key}-result-{random.randint(1, 100)}', + ttl_seconds=random.randint(60, 300) + ) + + + async def main(): + await external_call('a') + await external_call('b') + await external_call('b') + + print("Entries persisted in the cache:") + for entry in storage._data.values(): + print('Entry: ', entry.value) + print('Effective TTL: ', (entry.update_after - entry.created).total_seconds()) + + # Entries persisted in the cache: + # Entry: ValueWithTTL(value='a-result-79', ttl_seconds=148) + # Effective TTL: 148.0 + # Entry: ValueWithTTL(value='b-result-27', ttl_seconds=192) + # Effective TTL: 192.0 + + if __name__ == "__main__": asyncio.get_event_loop().run_until_complete(main()) \ No newline at end of file diff --git a/docs/contributing.rst b/docs/contributing.rst index e44fadf..0c37c93 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -47,4 +47,4 @@ PyPi python3 -m pip install --user --upgrade twine python3 -m twine check dist/* - # actual upload will be done by Travis + # actual upload will be done by GitHub Actions diff --git a/docs/source/memoize.rst b/docs/source/memoize.rst index 3182f0a..879f2ec 100644 --- a/docs/source/memoize.rst +++ b/docs/source/memoize.rst @@ -8,103 +8,118 @@ memoize.coerced module ---------------------- .. automodule:: memoize.coerced - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.configuration module ---------------------------- .. automodule:: memoize.configuration - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.entry module -------------------- .. automodule:: memoize.entry - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.entrybuilder module --------------------------- .. automodule:: memoize.entrybuilder - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.eviction module ----------------------- .. automodule:: memoize.eviction - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.exceptions module ------------------------- .. automodule:: memoize.exceptions - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: + +memoize.invalidation module +--------------------------- + +.. automodule:: memoize.invalidation + :members: + :undoc-members: + :show-inheritance: memoize.key module ------------------ .. automodule:: memoize.key - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.memoize\_configuration module ------------------------------------- .. automodule:: memoize.memoize_configuration - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: + +memoize.postprocessing module +----------------------------- + +.. automodule:: memoize.postprocessing + :members: + :undoc-members: + :show-inheritance: memoize.serde module -------------------- .. automodule:: memoize.serde - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.statuses module ----------------------- .. automodule:: memoize.statuses - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.storage module ---------------------- .. automodule:: memoize.storage - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: memoize.wrapper module ---------------------- .. automodule:: memoize.wrapper - :members: - :undoc-members: - :show-inheritance: - + :members: + :undoc-members: + :show-inheritance: Module contents --------------- .. automodule:: memoize - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/configuration/custom_configuration.py b/examples/configuration/custom_configuration.py index f2b70db..07e724a 100644 --- a/examples/configuration/custom_configuration.py +++ b/examples/configuration/custom_configuration.py @@ -4,18 +4,21 @@ from memoize.entrybuilder import ProvidedLifeSpanCacheEntryBuilder from memoize.eviction import LeastRecentlyUpdatedEvictionStrategy from memoize.key import EncodedMethodNameAndArgsKeyExtractor +from memoize.postprocessing import DeepcopyPostprocessing from memoize.storage import LocalInMemoryCacheStorage from memoize.wrapper import memoize -@memoize(configuration=MutableCacheConfiguration - .initialized_with(DefaultInMemoryCacheConfiguration()) - .set_method_timeout(value=timedelta(minutes=2)) - .set_entry_builder(ProvidedLifeSpanCacheEntryBuilder(update_after=timedelta(minutes=2), - expire_after=timedelta(minutes=5))) - .set_eviction_strategy(LeastRecentlyUpdatedEvictionStrategy(capacity=2048)) - .set_key_extractor(EncodedMethodNameAndArgsKeyExtractor(skip_first_arg_as_self=False)) - .set_storage(LocalInMemoryCacheStorage()) - ) +@memoize( + configuration=MutableCacheConfiguration + .initialized_with(DefaultInMemoryCacheConfiguration()) + .set_method_timeout(value=timedelta(minutes=2)) + .set_entry_builder(ProvidedLifeSpanCacheEntryBuilder(update_after=timedelta(minutes=2), + expire_after=timedelta(minutes=5))) + .set_eviction_strategy(LeastRecentlyUpdatedEvictionStrategy(capacity=2048)) + .set_key_extractor(EncodedMethodNameAndArgsKeyExtractor(skip_first_arg_as_self=False)) + .set_storage(LocalInMemoryCacheStorage()) + .set_postprocessing(DeepcopyPostprocessing()) +) async def cached(): return 'dummy' diff --git a/examples/ttl/__init__.py b/examples/ttl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/ttl/ttl_asyncio.py b/examples/ttl/ttl_asyncio.py new file mode 100644 index 0000000..d6223bd --- /dev/null +++ b/examples/ttl/ttl_asyncio.py @@ -0,0 +1,68 @@ +# needed if one has tornado installed (could be removed otherwise) +from memoize import memoize_configuration +memoize_configuration.force_asyncio = True + +import datetime +import asyncio +import random +from dataclasses import dataclass + +from memoize.wrapper import memoize +from memoize.configuration import DefaultInMemoryCacheConfiguration, MutableCacheConfiguration +from memoize.entry import CacheKey, CacheEntry +from memoize.entrybuilder import CacheEntryBuilder +from memoize.storage import LocalInMemoryCacheStorage + + +@dataclass +class ValueWithTTL: + value: str + ttl_seconds: int # for instance, it could be derived from Cache-Control response header + + +class TtlRespectingCacheEntryBuilder(CacheEntryBuilder): + def build(self, key: CacheKey, value: ValueWithTTL): + now = datetime.datetime.now(datetime.timezone.utc) + ttl_ends_at = now + datetime.timedelta(seconds=value.ttl_seconds) + return CacheEntry( + created=now, + update_after=ttl_ends_at, + # allowing stale data for 10% of TTL + expires_after=ttl_ends_at + datetime.timedelta(seconds=value.ttl_seconds // 10), + value=value + ) + + +storage = LocalInMemoryCacheStorage() # overridden & extracted for demonstration purposes only + + +@memoize(configuration=MutableCacheConfiguration + .initialized_with(DefaultInMemoryCacheConfiguration()) + .set_entry_builder(TtlRespectingCacheEntryBuilder()) + .set_storage(storage)) +async def external_call(key: str): + return ValueWithTTL( + value=f'{key}-result-{random.randint(1, 100)}', + ttl_seconds=random.randint(60, 300) + ) + + +async def main(): + await external_call('a') + await external_call('b') + await external_call('b') + + print("Entries persisted in the cache:") + for entry in storage._data.values(): + print('Entry: ', entry.value) + print('Effective TTL: ', (entry.update_after - entry.created).total_seconds()) + + # Entries persisted in the cache: + # Entry: ValueWithTTL(value='a-result-79', ttl_seconds=148) + # Effective TTL: 148.0 + # Entry: ValueWithTTL(value='b-result-27', ttl_seconds=192) + # Effective TTL: 192.0 + + +if __name__ == "__main__": + asyncio.get_event_loop().run_until_complete(main()) diff --git a/memoize/coerced.py b/memoize/coerced.py index 29ee405..b779597 100644 --- a/memoize/coerced.py +++ b/memoize/coerced.py @@ -5,6 +5,7 @@ import datetime import importlib.util import logging +import sys from memoize.memoize_configuration import force_asyncio @@ -47,9 +48,89 @@ def _timeout_error_type(): logger.info('Using asyncio instead of torando') + # this backported version of `wait_for` is taken from Python 3.11 + # and allows to continue having these `coerced` functions working (they are at least partially based on hacks) + # in general we need to drop tornado support either way (so this temporary solution would be gone either way) + async def wait_for(fut, timeout): + """Wait for the single Future or coroutine to complete, with timeout. + + Coroutine will be wrapped in Task. + + Returns result of the Future or coroutine. When a timeout occurs, + it cancels the task and raises TimeoutError. To avoid the task + cancellation, wrap it in shield(). + + If the wait is cancelled, the task is also cancelled. + + This function is a coroutine. + """ + + from asyncio import events, ensure_future, exceptions + from asyncio.tasks import _cancel_and_wait, _release_waiter + import functools + loop = events.get_running_loop() + + if timeout is None: + return await fut + + if timeout <= 0: + fut = ensure_future(fut, loop=loop) + + if fut.done(): + return fut.result() + + await _cancel_and_wait(fut) + try: + return fut.result() + except exceptions.CancelledError as exc: + raise exceptions.TimeoutError() from exc + + waiter = loop.create_future() + timeout_handle = loop.call_later(timeout, _release_waiter, waiter) + cb = functools.partial(_release_waiter, waiter) + + fut = ensure_future(fut, loop=loop) + fut.add_done_callback(cb) + + try: + # wait until the future completes or the timeout + try: + await waiter + except exceptions.CancelledError: + if fut.done(): + return fut.result() + else: + fut.remove_done_callback(cb) + # We must ensure that the task is not running + # after wait_for() returns. + # See https://bugs.python.org/issue32751 + await _cancel_and_wait(fut) + raise + + if fut.done(): + return fut.result() + else: + fut.remove_done_callback(cb) + # We must ensure that the task is not running + # after wait_for() returns. + # See https://bugs.python.org/issue32751 + await _cancel_and_wait(fut) + # In case task cancellation failed with some + # exception, we should re-raise it + # See https://bugs.python.org/issue40607 + try: + return fut.result() + except exceptions.CancelledError as exc: + raise exceptions.TimeoutError() from exc + finally: + timeout_handle.cancel() + # ignore for mypy as types are resolved in runtime def _apply_timeout(method_timeout: datetime.timedelta, future: asyncio.Future) -> asyncio.Future: # type: ignore - return asyncio.wait_for(future, method_timeout.total_seconds()) + if sys.version_info >= (3, 12, 0): + return wait_for(future, method_timeout.total_seconds()) + else: + return asyncio.wait_for(future, method_timeout.total_seconds()) def _call_later(delay: datetime.timedelta, callback): diff --git a/memoize/configuration.py b/memoize/configuration.py index b618018..1059bd1 100644 --- a/memoize/configuration.py +++ b/memoize/configuration.py @@ -9,6 +9,7 @@ from memoize.entrybuilder import CacheEntryBuilder, ProvidedLifeSpanCacheEntryBuilder from memoize.eviction import EvictionStrategy, LeastRecentlyUpdatedEvictionStrategy from memoize.key import KeyExtractor, EncodedMethodReferenceAndArgsKeyExtractor +from memoize.postprocessing import Postprocessing, NoPostprocessing from memoize.storage import CacheStorage from memoize.storage import LocalInMemoryCacheStorage @@ -51,15 +52,24 @@ def eviction_strategy(self) -> EvictionStrategy: """ Determines which EvictionStrategy is to be used by cache. """ raise NotImplementedError() + @abstractmethod + def postprocessing(self) -> Postprocessing: + """ Determines which/if Postprocessing is to be used by cache. """ + raise NotImplementedError() + def __str__(self) -> str: return self.__repr__() def __repr__(self) -> str: - return "{name}[configured={configured}, method_timeout={method_timeout}, entry_builder={entry_builder}," \ - " key_extractor={key_extractor}, storage={storage}, eviction_strategy={eviction_strategy}]" \ - .format(name=self.__class__, configured=self.configured(), method_timeout=self.method_timeout(), - entry_builder=self.entry_builder(), key_extractor=self.key_extractor(), storage=self.storage(), - eviction_strategy=self.eviction_strategy()) + return (f"{self.__class__}[" + f"configured={self.configured()}, " + f"method_timeout={self.method_timeout()}, " + f"entry_builder={self.entry_builder()}, " + f"key_extractor={self.key_extractor()}, " + f"storage={self.storage()}, " + f"eviction_strategy={self.eviction_strategy()}, " + f"postprocessing={self.postprocessing()}" + f"]") class MutableCacheConfiguration(CacheConfiguration): @@ -67,7 +77,7 @@ class MutableCacheConfiguration(CacheConfiguration): May be also used to customize existing configuration (for example a default one, which is immutable).""" def __init__(self, configured: bool, storage: CacheStorage, key_extractor: KeyExtractor, - eviction_strategy: EvictionStrategy, entry_builder: CacheEntryBuilder, + eviction_strategy: EvictionStrategy, entry_builder: CacheEntryBuilder, postprocessing: Postprocessing, method_timeout: timedelta) -> None: self.__storage = storage self.__configured = configured @@ -75,6 +85,7 @@ def __init__(self, configured: bool, storage: CacheStorage, key_extractor: KeyEx self.__entry_builder = entry_builder self.__method_timeout = method_timeout self.__eviction_strategy = eviction_strategy + self.__postprocessing = postprocessing @staticmethod def initialized_with(configuration: CacheConfiguration) -> 'MutableCacheConfiguration': @@ -85,6 +96,7 @@ def initialized_with(configuration: CacheConfiguration) -> 'MutableCacheConfigur entry_builder=configuration.entry_builder(), method_timeout=configuration.method_timeout(), eviction_strategy=configuration.eviction_strategy(), + postprocessing=configuration.postprocessing(), ) def method_timeout(self) -> timedelta: @@ -105,6 +117,9 @@ def entry_builder(self) -> CacheEntryBuilder: def eviction_strategy(self) -> EvictionStrategy: return self.__eviction_strategy + def postprocessing(self) -> Postprocessing: + return self.__postprocessing + def set_method_timeout(self, value: timedelta) -> 'MutableCacheConfiguration': self.__method_timeout = value return self @@ -129,6 +144,10 @@ def set_eviction_strategy(self, value: EvictionStrategy) -> 'MutableCacheConfigu self.__eviction_strategy = value return self + def set_postprocessing(self, value: Postprocessing) -> 'MutableCacheConfiguration': + self.__postprocessing = value + return self + class DefaultInMemoryCacheConfiguration(CacheConfiguration): """ Default parameters that describe in-memory cache. Be ware that parameters used do not suit every case. """ @@ -142,6 +161,7 @@ def __init__(self, capacity: int = 4096, method_timeout: timedelta = timedelta(m self.__key_extractor = EncodedMethodReferenceAndArgsKeyExtractor() self.__eviction_strategy = LeastRecentlyUpdatedEvictionStrategy(capacity=capacity) self.__entry_builder = ProvidedLifeSpanCacheEntryBuilder(update_after=update_after, expire_after=expire_after) + self.__postprocessing = NoPostprocessing() def configured(self) -> bool: return self.__configured @@ -160,3 +180,6 @@ def eviction_strategy(self) -> LeastRecentlyUpdatedEvictionStrategy: def key_extractor(self) -> EncodedMethodReferenceAndArgsKeyExtractor: return self.__key_extractor + + def postprocessing(self) -> Postprocessing: + return self.__postprocessing diff --git a/memoize/entrybuilder.py b/memoize/entrybuilder.py index fbe1ae4..3bd0673 100644 --- a/memoize/entrybuilder.py +++ b/memoize/entrybuilder.py @@ -34,7 +34,7 @@ def update_timeouts(self, update_after: datetime.timedelta, expire_after: dateti self._update_after = update_after def build(self, key: CacheKey, value: CachedValue) -> CacheEntry: - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) return CacheEntry(created=now, update_after=now + self._update_after, expires_after=now + self._expires_after, diff --git a/memoize/postprocessing.py b/memoize/postprocessing.py new file mode 100644 index 0000000..2d39232 --- /dev/null +++ b/memoize/postprocessing.py @@ -0,0 +1,30 @@ +import copy +from abc import ABCMeta, abstractmethod +from typing import Any + +ValueType = Any + + +class Postprocessing(metaclass=ABCMeta): + @abstractmethod + def apply(self, original: ValueType) -> ValueType: + """Transforms value just before returning from the cache.""" + raise NotImplementedError() + + +class NoPostprocessing(Postprocessing): + def apply(self, original: ValueType) -> ValueType: + """Applies no postprocessing (returns original value).""" + return original + + +class DeepcopyPostprocessing(Postprocessing): + def apply(self, original: ValueType) -> ValueType: + """ + Performs deep copy of the value. Useful when you want to prevent modifying the value cached in memory + (so callers could modify their copies safely). + + Have in mind that this operation may be expensive, + and may not be suitable for all types of values (see docs on copy.deepcopy). + """ + return copy.deepcopy(original) diff --git a/memoize/serde.py b/memoize/serde.py index b299ebf..69aa90f 100644 --- a/memoize/serde.py +++ b/memoize/serde.py @@ -12,7 +12,7 @@ # ignoring type error as mypy falsely reports json is already imported import json # type: ignore from abc import ABCMeta, abstractmethod -from datetime import datetime +from datetime import datetime, timezone from typing import Callable, Any @@ -62,9 +62,9 @@ def __init__(self, string_encoding: str = "utf-8", def deserialize(self, data: bytes) -> CacheEntry: as_dict = json.loads(codecs.decode(data, self.__string_encoding)) return CacheEntry( - created=datetime.utcfromtimestamp(as_dict['created']), - update_after=datetime.utcfromtimestamp(as_dict['update_after']), - expires_after=datetime.utcfromtimestamp(as_dict['expires_after']), + created=datetime.fromtimestamp(as_dict['created'], timezone.utc), + update_after=datetime.fromtimestamp(as_dict['update_after'], timezone.utc), + expires_after=datetime.fromtimestamp(as_dict['expires_after'], timezone.utc), value=self.__reversible_repr_to_value(as_dict['value']), ) diff --git a/memoize/statuses.py b/memoize/statuses.py index 2e4e83d..cc04c17 100644 --- a/memoize/statuses.py +++ b/memoize/statuses.py @@ -4,7 +4,7 @@ import datetime import logging from asyncio import Future -from typing import Optional, Dict, Awaitable +from typing import Optional, Dict, Awaitable, Union from memoize import coerced from memoize.entry import CacheKey, CacheEntry @@ -14,8 +14,7 @@ class UpdateStatuses: def __init__(self, update_lock_timeout: datetime.timedelta = datetime.timedelta(minutes=5)) -> None: self.logger = logging.getLogger(__name__) self._update_lock_timeout = update_lock_timeout - # type declaration should not be in comment once we drop py35 support - self._updates_in_progress = {} # type: Dict[CacheKey, Future] + self._updates_in_progress: Dict[CacheKey, Future] = {} def is_being_updated(self, key: CacheKey) -> bool: """Checks if update for given key is in progress. Obtained info is valid until control gets back to IO-loop.""" @@ -49,17 +48,19 @@ def mark_updated(self, key: CacheKey, entry: CacheEntry) -> None: update = self._updates_in_progress.pop(key) update.set_result(entry) - def mark_update_aborted(self, key: CacheKey) -> None: + def mark_update_aborted(self, key: CacheKey, exception: Exception) -> None: """Informs that update failed to complete. - Calls to 'is_being_updated' will return False until 'mark_being_updated' will be called.""" + Calls to 'is_being_updated' will return False until 'mark_being_updated' will be called. + Accepts exception to propagate it across all clients awaiting an update.""" if key not in self._updates_in_progress: raise ValueError('Key {} is not being updated'.format(key)) update = self._updates_in_progress.pop(key) - update.set_result(None) + update.set_result(exception) - async def await_updated(self, key: CacheKey) -> Awaitable[Optional[CacheEntry]]: + async def await_updated(self, key: CacheKey) -> Awaitable[Union[CacheEntry, Exception]]: """Waits (asynchronously) until update in progress has benn finished. - Returns updated entry or None if update failed/timed-out. + Returns awaitable with the updated entry + (or awaitable with an exception if update failed/timed-out). Should be called only if 'is_being_updated' returned True (and since then IO-loop has not been lost).""" if not self.is_being_updated(key): raise ValueError('Key {} is not being updated'.format(key)) diff --git a/memoize/wrapper.py b/memoize/wrapper.py index c1a8631..3e20226 100644 --- a/memoize/wrapper.py +++ b/memoize/wrapper.py @@ -77,8 +77,8 @@ async def refresh(actual_entry: Optional[CacheEntry], key: CacheKey, if actual_entry is None and update_statuses.is_being_updated(key): logger.debug('As entry expired, waiting for results of concurrent refresh %s', key) entry = await update_statuses.await_updated(key) - if entry is None: - raise CachedMethodFailedException('Concurrent refresh failed to complete') + if isinstance(entry, Exception): + raise CachedMethodFailedException('Concurrent refresh failed to complete') from entry return entry elif actual_entry is not None and update_statuses.is_being_updated(key): logger.debug('As update point reached but concurrent update already in progress, ' @@ -103,12 +103,12 @@ async def refresh(actual_entry: Optional[CacheEntry], key: CacheKey, return offered_entry except (asyncio.TimeoutError, _timeout_error_type()) as e: logger.debug('Timeout for %s: %s', key, e) - update_statuses.mark_update_aborted(key) - raise CachedMethodFailedException('Refresh timed out') + update_statuses.mark_update_aborted(key, e) + raise CachedMethodFailedException('Refresh timed out') from e except Exception as e: logger.debug('Error while refreshing cache for %s: %s', key, e) - update_statuses.mark_update_aborted(key) - raise CachedMethodFailedException('Refresh failed to complete', e) + update_statuses.mark_update_aborted(key, e) + raise CachedMethodFailedException('Refresh failed to complete') from e @functools.wraps(method) async def wrapper(*args, **kwargs): @@ -124,7 +124,7 @@ async def wrapper(*args, **kwargs): if current_entry is not None: configuration_snapshot.eviction_strategy().mark_read(key) - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) def value_future_provider(): return _apply_timeout(configuration_snapshot.method_timeout(), method(*args, **kwargs)) @@ -145,6 +145,6 @@ def value_future_provider(): else: result = current_entry - return result.value + return configuration_snapshot.postprocessing().apply(result.value) return wrapper diff --git a/setup.py b/setup.py index 52beb17..e6ff7bc 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,4 @@ -try: - from setuptools import setup -except ImportError: - from distutils.core import setup +from setuptools import setup def prepare_description(): @@ -14,7 +11,7 @@ def prepare_description(): setup( name='py-memoize', - version='1.1.4', + version='2.1.0', author='Michal Zmuda', author_email='zmu.michal@gmail.com', url='https://github.com/DreamLab/memoize', @@ -42,6 +39,7 @@ def prepare_description(): 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Framework :: AsyncIO', 'Intended Audience :: Developers' ] diff --git a/tests/asynciotests/test_showcase.py b/tests/asynciotests/test_showcase.py index 0f13245..7402623 100644 --- a/tests/asynciotests/test_showcase.py +++ b/tests/asynciotests/test_showcase.py @@ -94,5 +94,5 @@ async def get_value_or_throw(arg, kwarg=None): self.assertEqual('ok #2', res4) # value from cache - still relevant self.assertEqual('ok #2', res5) # stale from cache - refresh in background self.assertEqual('ok #2', res6) # stale from cache - should be updated but method throws - expected = CachedMethodFailedException('Refresh failed to complete', ValueError('throws #4', )) - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(str(context.exception), str(CachedMethodFailedException('Refresh failed to complete'))) + self.assertEqual(str(context.exception.__cause__), str(ValueError("throws #4"))) diff --git a/tests/asynciotests/test_wrapper_manually_applied_on_asyncio.py b/tests/asynciotests/test_wrapper_manually_applied_on_asyncio.py index 7b0cd34..1d71250 100644 --- a/tests/asynciotests/test_wrapper_manually_applied_on_asyncio.py +++ b/tests/asynciotests/test_wrapper_manually_applied_on_asyncio.py @@ -1,3 +1,4 @@ +from memoize.coerced import _timeout_error_type from tests.py310workaround import fix_python_3_10_compatibility fix_python_3_10_compatibility() @@ -274,8 +275,8 @@ async def get_value(arg, kwarg=None): await get_value_cached('test1', kwarg='args1') # then - expected = CachedMethodFailedException('Refresh failed to complete', ValueError('Get lost', )) - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(str(context.exception), str(CachedMethodFailedException('Refresh failed to complete'))) + self.assertEqual(str(context.exception.__cause__), str(ValueError("Get lost"))) @gen_test async def test_should_throw_exception_on_refresh_timeout(self): @@ -295,8 +296,8 @@ async def get_value(arg, kwarg=None): await get_value_cached('test1', kwarg='args1') # then - expected = CachedMethodFailedException('Refresh timed out') - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(context.exception.__cause__.__class__, _timeout_error_type()) @staticmethod async def _call_thrice(call): diff --git a/tests/asynciotests/test_wrapper_on_asyncio.py b/tests/asynciotests/test_wrapper_on_asyncio.py index 7428a17..2fbaa13 100644 --- a/tests/asynciotests/test_wrapper_on_asyncio.py +++ b/tests/asynciotests/test_wrapper_on_asyncio.py @@ -1,3 +1,4 @@ +from memoize.coerced import _timeout_error_type from tests.py310workaround import fix_python_3_10_compatibility fix_python_3_10_compatibility() @@ -160,6 +161,69 @@ async def get_value(arg, kwarg=None): self.assertEqual(0, res1) self.assertEqual(1, res2) + @gen_test + async def test_should_return_exception_for_all_concurrent_callers(self): + # given + value = 0 + + @memoize() + async def get_value(arg, kwarg=None): + raise ValueError(f'stub{value}') + + # when + res1 = get_value('test', kwarg='args1') + res2 = get_value('test', kwarg='args1') + res3 = get_value('test', kwarg='args1') + + # then + with self.assertRaises(Exception) as context: + await res1 + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(str(context.exception.__cause__), str(ValueError('stub0'))) + + with self.assertRaises(Exception) as context: + await res2 + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(str(context.exception.__cause__), str(ValueError('stub0'))) + + with self.assertRaises(Exception) as context: + await res3 + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(str(context.exception.__cause__), str(ValueError('stub0'))) + + @gen_test + async def test_should_return_timeout_for_all_concurrent_callers(self): + # given + value = 0 + + @memoize(configuration=DefaultInMemoryCacheConfiguration(method_timeout=timedelta(milliseconds=1))) + async def get_value(arg, kwarg=None): + await _ensure_asyncio_background_tasks_finished() + time.sleep(.200) + await _ensure_asyncio_background_tasks_finished() + return value + + # when + res1 = get_value('test', kwarg='args1') + res2 = get_value('test', kwarg='args1') + res3 = get_value('test', kwarg='args1') + + # then + with self.assertRaises(Exception) as context: + await res1 + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(context.exception.__cause__.__class__, _timeout_error_type()) + + with self.assertRaises(Exception) as context: + await res2 + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(context.exception.__cause__.__class__, _timeout_error_type()) + + with self.assertRaises(Exception) as context: + await res3 + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(context.exception.__cause__.__class__, _timeout_error_type()) + @gen_test async def test_should_return_same_value_on_constant_key_function(self): # given @@ -253,8 +317,8 @@ async def get_value(arg, kwarg=None): await get_value('test1', kwarg='args1') # then - expected = CachedMethodFailedException('Refresh failed to complete', ValueError('Get lost', )) - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(str(context.exception), str(CachedMethodFailedException('Refresh failed to complete'))) + self.assertEqual(str(context.exception.__cause__), str(ValueError("Get lost"))) @gen_test async def test_should_throw_exception_on_refresh_timeout(self): @@ -272,8 +336,8 @@ async def get_value(arg, kwarg=None): await get_value('test1', kwarg='args1') # then - expected = CachedMethodFailedException('Refresh timed out') - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(context.exception.__cause__.__class__, _timeout_error_type()) @staticmethod async def _call_thrice(call): diff --git a/tests/tornadotests/test_wrapper.py b/tests/tornadotests/test_wrapper.py index 6ef3af7..1d8f896 100644 --- a/tests/tornadotests/test_wrapper.py +++ b/tests/tornadotests/test_wrapper.py @@ -1,3 +1,4 @@ +from memoize.coerced import _timeout_error_type from tests.py310workaround import fix_python_3_10_compatibility fix_python_3_10_compatibility() @@ -265,8 +266,8 @@ def get_value(arg, kwarg=None): yield get_value('test1', kwarg='args1') # then - expected = CachedMethodFailedException('Refresh failed to complete', ValueError('Get lost', )) - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(str(context.exception), str(CachedMethodFailedException('Refresh failed to complete'))) + self.assertEqual(str(context.exception.__cause__), str(ValueError("Get lost"))) @gen_test def test_should_throw_exception_on_refresh_timeout(self): @@ -285,5 +286,6 @@ def get_value(arg, kwarg=None): yield get_value('test1', kwarg='args1') # then - expected = CachedMethodFailedException('Refresh timed out') - self.assertEqual(str(expected), str(context.exception)) # ToDo: consider better comparision + self.assertEqual(context.exception.__class__, CachedMethodFailedException) + self.assertEqual(context.exception.__cause__.__class__, _timeout_error_type()) + diff --git a/tests/unit/test_postprocessing.py b/tests/unit/test_postprocessing.py new file mode 100644 index 0000000..0b0888e --- /dev/null +++ b/tests/unit/test_postprocessing.py @@ -0,0 +1,60 @@ +from memoize.postprocessing import DeepcopyPostprocessing +from tests.py310workaround import fix_python_3_10_compatibility + +fix_python_3_10_compatibility() + +from unittest.mock import Mock + +from tornado import gen +from tornado.testing import AsyncTestCase, gen_test + +from memoize.configuration import MutableCacheConfiguration, DefaultInMemoryCacheConfiguration +from memoize.wrapper import memoize + + +class KeyExtractorInteractionsTests(AsyncTestCase): + + @gen_test + def test_postprocessing_is_applied(self): + # given + postprocessing = Mock() + postprocessing.apply = Mock(return_value='overridden-by-postprocessing') + + @memoize( + configuration=MutableCacheConfiguration + .initialized_with(DefaultInMemoryCacheConfiguration()) + .set_postprocessing(postprocessing) + ) + @gen.coroutine + def sample_method(arg): + return f"value-for-{arg}" + + # when + result = yield sample_method('test') + + # then + postprocessing.apply.assert_called_once() + postprocessing.apply.assert_called_once_with('value-for-test') + self.assertEqual(result, 'overridden-by-postprocessing') + + @gen_test + def test_postprocessing_based_on_deepcopy_prevents_modifying_value_cached_in_memory(self): + # given + + @memoize( + configuration=MutableCacheConfiguration + .initialized_with(DefaultInMemoryCacheConfiguration()) + .set_postprocessing(DeepcopyPostprocessing()) + ) + @gen.coroutine + def sample_method(arg): + return {'arg': arg, 'list': [4, 5, 1, 2, 3]} # unsorted + + # when + result1 = yield sample_method('test') + result2 = yield sample_method('test') + result1['list'].sort() + + # then + self.assertEqual(result1, {'arg': 'test', 'list': [1, 2, 3, 4, 5]}) # sorted in-place + self.assertEqual(result2, {'arg': 'test', 'list': [4, 5, 1, 2, 3]}) # still unsorted diff --git a/tests/unit/test_serde.py b/tests/unit/test_serde.py index 7378be3..29d5a92 100644 --- a/tests/unit/test_serde.py +++ b/tests/unit/test_serde.py @@ -5,7 +5,7 @@ import codecs import json import pickle -from datetime import datetime +from datetime import datetime, timezone from pickle import HIGHEST_PROTOCOL, DEFAULT_PROTOCOL from unittest.mock import Mock @@ -67,8 +67,8 @@ class JsonSerDeTests(AsyncTestCase): @gen_test def test_should_encode_as_readable_json(self): # given - cache_entry = CacheEntry(datetime.utcfromtimestamp(1), datetime.utcfromtimestamp(2), - datetime.utcfromtimestamp(3), "in") + cache_entry = CacheEntry(datetime.fromtimestamp(1, timezone.utc), datetime.fromtimestamp(2, timezone.utc), + datetime.fromtimestamp(3, timezone.utc), "in") serde = JsonSerDe(string_encoding='utf-8') # when @@ -90,15 +90,15 @@ def test_should_decode_readable_json(self): bytes = serde.deserialize(b'{"created":1,"update_after":2,"expires_after":3,"value":"value"}') # then - cache_entry = CacheEntry(datetime.utcfromtimestamp(1), datetime.utcfromtimestamp(2), - datetime.utcfromtimestamp(3), "value") + cache_entry = CacheEntry(datetime.fromtimestamp(1, timezone.utc), datetime.fromtimestamp(2, timezone.utc), + datetime.fromtimestamp(3, timezone.utc), "value") self.assertEqual(bytes, cache_entry) @gen_test def test_should_apply_value_transformations_on_serialization(self): # given - cache_entry = CacheEntry(datetime.utcfromtimestamp(1), datetime.utcfromtimestamp(2), - datetime.utcfromtimestamp(3), "in") + cache_entry = CacheEntry(datetime.fromtimestamp(1, timezone.utc), datetime.fromtimestamp(2, timezone.utc), + datetime.fromtimestamp(3, timezone.utc), "in") encode = Mock(return_value="out") decode = Mock(return_value="in") serde = JsonSerDe(string_encoding='utf-8', value_to_reversible_repr=encode, reversible_repr_to_value=decode) @@ -125,8 +125,8 @@ def test_should_apply_value_transformations_on_deserialization(self): data = serde.deserialize(b'{"created":1,"update_after":2,"expires_after":3,"value":"in"}') # then - cache_entry = CacheEntry(datetime.utcfromtimestamp(1), datetime.utcfromtimestamp(2), - datetime.utcfromtimestamp(3), "out") + cache_entry = CacheEntry(datetime.fromtimestamp(1, timezone.utc), datetime.fromtimestamp(2, timezone.utc), + datetime.fromtimestamp(3, timezone.utc), "out") self.assertEqual(data, cache_entry) decode.assert_called_once_with("in") diff --git a/tests/unit/test_statuses.py b/tests/unit/test_statuses.py index 92274f0..71c76a4 100644 --- a/tests/unit/test_statuses.py +++ b/tests/unit/test_statuses.py @@ -62,14 +62,14 @@ def test_should_be_mark_as_updated(self): def test_should_raise_exception_during_mark_update_as_aborted(self): # given/when/then with self.assertRaises(ValueError): - self.update_statuses.mark_update_aborted('key') + self.update_statuses.mark_update_aborted('key', Exception('stub')) def test_should_mark_update_as_aborted(self): # given self.update_statuses.mark_being_updated('key') # when - self.update_statuses.mark_update_aborted('key') + self.update_statuses.mark_update_aborted('key', Exception('stub')) # then self.assertFalse(self.update_statuses.is_being_updated('key')) @@ -105,3 +105,23 @@ async def test_should_await_updated_return_entry(self): # then self.assertIsNone(result) self.assertFalse(self.update_statuses.is_being_updated('key')) + + @gen_test + async def test_concurrent_callers_should_all_get_exception_on_aborted_update(self): + # given + self.update_statuses.mark_being_updated('key') + + # when + result1 = self.update_statuses.await_updated('key') + result2 = self.update_statuses.await_updated('key') + result3 = self.update_statuses.await_updated('key') + self.update_statuses.mark_update_aborted('key', ValueError('stub')) + result1 = await result1 + result2 = await result2 + result3 = await result3 + + # then + self.assertFalse(self.update_statuses.is_being_updated('key')) + self.assertEqual(str(result1), str(ValueError('stub'))) + self.assertEqual(str(result2), str(ValueError('stub'))) + self.assertEqual(str(result3), str(ValueError('stub'))) diff --git a/tox.ini b/tox.ini index 66fcd60..edc339b 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,7 @@ skipdist = True # py-{asyncio,tornado} are added for GitHub Actions (where we have only one interpreter at the time) # py{35,36,37,38,39}-{asyncio,tornado} are added for development purposes (where one has multiple interpreters) -envlist = py-{asyncio,tornado},py{37,38,39,310,311}-{asyncio,tornado},coverage-py310,mypy-py310 +envlist = py-{asyncio,tornado},py{37,38,39,310,311,312}-{asyncio,tornado},coverage-py310,mypy-py310 [testenv] setenv = @@ -19,6 +19,9 @@ commands = coverage-py37: coverage report mypy-py37: mypy memoize deps = + setuptools # for setup.py to work (distutils is removed from Python 3.12) + tornado: backports.ssl-match-hostname # for tornado-based tests to run with Python 3.12 + asyncio: backports.ssl-match-hostname # unit tests that run for asyncio are still based on tornado.testing asyncio: tornado>4,<5 tornado: tornado>4,<5 coverage: coverage