Skip to content

Commit

Permalink
Fixing Dict memoization sorting before normalizing keys is unsafe'
Browse files Browse the repository at this point in the history
  • Loading branch information
MundiaNderi committed Mar 18, 2024
1 parent e364791 commit c462aa0
Show file tree
Hide file tree
Showing 45 changed files with 45 additions and 5 deletions.
3 changes: 2 additions & 1 deletion parsl/dataflow/dflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,8 @@ def launch_task(self, task_record: TaskRecord) -> Future:

return exec_fu

def _add_input_deps(self, executor: str, args: Sequence[Any], kwargs: Dict[str, Any], func: Callable) -> Tuple[Sequence[Any], Dict[str, Any], Callable]:
def _add_input_deps(self, executor: str, args: Sequence[Any], kwargs: Dict[str, Any], func: Callable) -> Tuple[Sequence[Any], Dict[str, Any],
Callable]:
"""Look for inputs of the app that are files. Give the data manager
the opportunity to replace a file with a data future for that file,
for example wrapping the result of a staging action.
Expand Down
15 changes: 11 additions & 4 deletions parsl/dataflow/memoization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pickle
from parsl.dataflow.taskrecord import TaskRecord

from typing import Dict, Any, List, Optional, TYPE_CHECKING
from typing import Dict, Any, List, Optional, TYPE_CHECKING # avoid circular imports

if TYPE_CHECKING:
from parsl import DataFlowKernel # import loop at runtime - needed for typechecking - TODO turn into "if typing:"
Expand All @@ -14,7 +14,9 @@

import types

logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__) # logger named name for logging purposes

# memoization function with a single dispatch decorator


@singledispatch
Expand Down Expand Up @@ -49,6 +51,8 @@ def id_for_memo(obj: object, output_ref: bool = False) -> bytes:
logger.error("id_for_memo attempted on unknown type {}".format(type(obj)))
raise ValueError("unknown type for memoization: {}".format(type(obj)))

# type specific implementations - handle how each type should be serialized for memoization


@id_for_memo.register(str)
@id_for_memo.register(int)
Expand Down Expand Up @@ -94,10 +98,13 @@ def id_for_memo_dict(denormalized_dict: dict, output_ref: bool = False) -> bytes
if type(denormalized_dict) is not dict:
raise ValueError("id_for_memo_dict cannot work on subclasses of dict")

keys = sorted(denormalized_dict)
# keys = sorted(denormalized_dict) Line that sirosen commented on
# Proposed solution was to normalize the keys and then sort them
keymap = {id_for_memo(k): k for k in denormalized_dict}
normed_keys = sorted(keymap.values())

normalized_list = []
for k in keys:
for k in normed_keys:
normalized_list.append(id_for_memo(k))
normalized_list.append(id_for_memo(denormalized_dict[k], output_ref=output_ref))
return pickle.dumps(normalized_list)
Expand Down
32 changes: 32 additions & 0 deletions parsl/tests/test_python_apps/test_memoize_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pytest
import enum

# Define an enum - collection of related consonants


class Foo(enum.Enum):
x = enum.auto()
y = enum.auto()


# Test function demonstrating the issue with unstable sorting when keys
# are hashable but not comparable.


def test_unstable_sorting():
# Functions
def foo():
return 1

def bar():
return 2

# Dictionary with problematic keys
d = {foo: 1, bar: 2}

# Sort the dictionary, it should raise a TypeError
with pytest.raises(TypeError):
sorted(d)

# Create a dictionary with enum keys
d_enum = {Foo.x: 1, Foo.y: 2}
Binary file added runinfo/000/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/000/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/001/checkpoint/dfk.pkl
Binary file not shown.
Empty file.
Binary file added runinfo/002/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/002/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/003/checkpoint/dfk.pkl
Binary file not shown.
Empty file.
Binary file added runinfo/004/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/004/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/005/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/005/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/006/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/006/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/007/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/007/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/008/checkpoint/dfk.pkl
Binary file not shown.
Empty file.
Binary file added runinfo/009/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/009/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/010/checkpoint/dfk.pkl
Binary file not shown.
Empty file.
Binary file added runinfo/011/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/011/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/012/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/012/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/013/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/013/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/014/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/014/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/015/checkpoint/dfk.pkl
Binary file not shown.
Empty file.
Binary file added runinfo/016/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/016/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/017/checkpoint/dfk.pkl
Binary file not shown.
Empty file.
Binary file added runinfo/018/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/018/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/019/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/019/checkpoint/tasks.pkl
Binary file not shown.
Binary file added runinfo/020/checkpoint/dfk.pkl
Binary file not shown.
Binary file added runinfo/020/checkpoint/tasks.pkl
Binary file not shown.

0 comments on commit c462aa0

Please sign in to comment.