From 11ff829a1b4544a9d6dc405c63c8b347f411d827 Mon Sep 17 00:00:00 2001 From: Serah Nderi Date: Wed, 6 Mar 2024 10:38:31 +0300 Subject: [PATCH] Fixing Dict memoization sorting before normalizing keys is unsafe' --- parsl/dataflow/dflow.py | 3 +- parsl/dataflow/memoization.py | 15 +++++--- .../tests/test_python_apps/test_memoize_3.py | 32 ++++++++++++++++++ runinfo/000/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/000/checkpoint/tasks.pkl | Bin 0 -> 328 bytes runinfo/001/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/001/checkpoint/tasks.pkl | 0 runinfo/002/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/002/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/003/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/003/checkpoint/tasks.pkl | 0 runinfo/004/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/004/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/005/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/005/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/006/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/006/checkpoint/tasks.pkl | Bin 0 -> 164 bytes runinfo/007/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/007/checkpoint/tasks.pkl | Bin 0 -> 328 bytes runinfo/008/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/008/checkpoint/tasks.pkl | 0 runinfo/009/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/009/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/010/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/010/checkpoint/tasks.pkl | 0 runinfo/011/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/011/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/012/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/012/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/013/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/013/checkpoint/tasks.pkl | Bin 0 -> 164 bytes runinfo/014/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/014/checkpoint/tasks.pkl | Bin 0 -> 328 bytes runinfo/015/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/015/checkpoint/tasks.pkl | 0 runinfo/016/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/016/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/017/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/017/checkpoint/tasks.pkl | 0 runinfo/018/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/018/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/019/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/019/checkpoint/tasks.pkl | Bin 0 -> 415 bytes runinfo/020/checkpoint/dfk.pkl | Bin 0 -> 78 bytes runinfo/020/checkpoint/tasks.pkl | Bin 0 -> 164 bytes 45 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 parsl/tests/test_python_apps/test_memoize_3.py create mode 100644 runinfo/000/checkpoint/dfk.pkl create mode 100644 runinfo/000/checkpoint/tasks.pkl create mode 100644 runinfo/001/checkpoint/dfk.pkl create mode 100644 runinfo/001/checkpoint/tasks.pkl create mode 100644 runinfo/002/checkpoint/dfk.pkl create mode 100644 runinfo/002/checkpoint/tasks.pkl create mode 100644 runinfo/003/checkpoint/dfk.pkl create mode 100644 runinfo/003/checkpoint/tasks.pkl create mode 100644 runinfo/004/checkpoint/dfk.pkl create mode 100644 runinfo/004/checkpoint/tasks.pkl create mode 100644 runinfo/005/checkpoint/dfk.pkl create mode 100644 runinfo/005/checkpoint/tasks.pkl create mode 100644 runinfo/006/checkpoint/dfk.pkl create mode 100644 runinfo/006/checkpoint/tasks.pkl create mode 100644 runinfo/007/checkpoint/dfk.pkl create mode 100644 runinfo/007/checkpoint/tasks.pkl create mode 100644 runinfo/008/checkpoint/dfk.pkl create mode 100644 runinfo/008/checkpoint/tasks.pkl create mode 100644 runinfo/009/checkpoint/dfk.pkl create mode 100644 runinfo/009/checkpoint/tasks.pkl create mode 100644 runinfo/010/checkpoint/dfk.pkl create mode 100644 runinfo/010/checkpoint/tasks.pkl create mode 100644 runinfo/011/checkpoint/dfk.pkl create mode 100644 runinfo/011/checkpoint/tasks.pkl create mode 100644 runinfo/012/checkpoint/dfk.pkl create mode 100644 runinfo/012/checkpoint/tasks.pkl create mode 100644 runinfo/013/checkpoint/dfk.pkl create mode 100644 runinfo/013/checkpoint/tasks.pkl create mode 100644 runinfo/014/checkpoint/dfk.pkl create mode 100644 runinfo/014/checkpoint/tasks.pkl create mode 100644 runinfo/015/checkpoint/dfk.pkl create mode 100644 runinfo/015/checkpoint/tasks.pkl create mode 100644 runinfo/016/checkpoint/dfk.pkl create mode 100644 runinfo/016/checkpoint/tasks.pkl create mode 100644 runinfo/017/checkpoint/dfk.pkl create mode 100644 runinfo/017/checkpoint/tasks.pkl create mode 100644 runinfo/018/checkpoint/dfk.pkl create mode 100644 runinfo/018/checkpoint/tasks.pkl create mode 100644 runinfo/019/checkpoint/dfk.pkl create mode 100644 runinfo/019/checkpoint/tasks.pkl create mode 100644 runinfo/020/checkpoint/dfk.pkl create mode 100644 runinfo/020/checkpoint/tasks.pkl diff --git a/parsl/dataflow/dflow.py b/parsl/dataflow/dflow.py index 707e2abc23..f45ac37629 100644 --- a/parsl/dataflow/dflow.py +++ b/parsl/dataflow/dflow.py @@ -730,7 +730,8 @@ def launch_task(self, task_record: TaskRecord) -> Future: return exec_fu - def _add_input_deps(self, executor: str, args: Sequence[Any], kwargs: Dict[str, Any], func: Callable) -> Tuple[Sequence[Any], Dict[str, Any], Callable]: + def _add_input_deps(self, executor: str, args: Sequence[Any], kwargs: Dict[str, Any], func: Callable) -> Tuple[Sequence[Any], Dict[str, Any], + Callable]: """Look for inputs of the app that are files. Give the data manager the opportunity to replace a file with a data future for that file, for example wrapping the result of a staging action. diff --git a/parsl/dataflow/memoization.py b/parsl/dataflow/memoization.py index e4d657ccaa..2f2126ecba 100644 --- a/parsl/dataflow/memoization.py +++ b/parsl/dataflow/memoization.py @@ -5,7 +5,7 @@ import pickle from parsl.dataflow.taskrecord import TaskRecord -from typing import Dict, Any, List, Optional, TYPE_CHECKING +from typing import Dict, Any, List, Optional, TYPE_CHECKING # avoid circular imports if TYPE_CHECKING: from parsl import DataFlowKernel # import loop at runtime - needed for typechecking - TODO turn into "if typing:" @@ -14,7 +14,9 @@ import types -logger = logging.getLogger(__name__) +logger = logging.getLogger(__name__) # logger named name for logging purposes + +# memoization function with a single dispatch decorator @singledispatch @@ -49,6 +51,8 @@ def id_for_memo(obj: object, output_ref: bool = False) -> bytes: logger.error("id_for_memo attempted on unknown type {}".format(type(obj))) raise ValueError("unknown type for memoization: {}".format(type(obj))) +# type specific implementations - handle how each type should be serialized for memoization + @id_for_memo.register(str) @id_for_memo.register(int) @@ -94,10 +98,13 @@ def id_for_memo_dict(denormalized_dict: dict, output_ref: bool = False) -> bytes if type(denormalized_dict) is not dict: raise ValueError("id_for_memo_dict cannot work on subclasses of dict") - keys = sorted(denormalized_dict) + # keys = sorted(denormalized_dict) Line that sirosen commented on + # Proposed solution was to normalize the keys and then sort them + keymap = {id_for_memo(k): k for k in denormalized_dict} + normed_keys = sorted(keymap.values()) normalized_list = [] - for k in keys: + for k in normed_keys: normalized_list.append(id_for_memo(k)) normalized_list.append(id_for_memo(denormalized_dict[k], output_ref=output_ref)) return pickle.dumps(normalized_list) diff --git a/parsl/tests/test_python_apps/test_memoize_3.py b/parsl/tests/test_python_apps/test_memoize_3.py new file mode 100644 index 0000000000..4047189853 --- /dev/null +++ b/parsl/tests/test_python_apps/test_memoize_3.py @@ -0,0 +1,32 @@ +import pytest +import enum + +# Define an enum - collection of related consonants + + +class Foo(enum.Enum): + x = enum.auto() + y = enum.auto() + + +# Test function demonstrating the issue with unstable sorting when keys +# are hashable but not comparable. + + +def test_unstable_sorting(): + # Functions + def foo(): + return 1 + + def bar(): + return 2 + + # Dictionary with problematic keys + d = {foo: 1, bar: 2} + + # Sort the dictionary, it should raise a TypeError + with pytest.raises(TypeError): + sorted(d) + + # Create a dictionary with enum keys + d_enum = {Foo.x: 1, Foo.y: 2} diff --git a/runinfo/000/checkpoint/dfk.pkl b/runinfo/000/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..42edab3d8438858722522a1d2ff2b2566a85118e GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFFo7DLq^ziN)FR$@!&uB~!dvO7#HP*BZY7 literal 0 HcmV?d00001 diff --git a/runinfo/000/checkpoint/tasks.pkl b/runinfo/000/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..43e10cac9d644b7092cfbcc3694bf083dc02c4e4 GIT binary patch literal 328 zcmZ|HL25!V5CBjqAHiF6=}IPMl9`;qh0r@pW)dtE6~7ff_%=iwydsm-4hYsHvj2xNxhm=KvUMezw;Y4Z!F;(tv5 literal 0 HcmV?d00001 diff --git a/runinfo/003/checkpoint/dfk.pkl b/runinfo/003/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..82262c516874b881152e766b37fc7d7df977d510 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFFpIDLq^ziN)FR$@!&uB~!dvOZ5QR6B@$+ literal 0 HcmV?d00001 diff --git a/runinfo/003/checkpoint/tasks.pkl b/runinfo/003/checkpoint/tasks.pkl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/runinfo/004/checkpoint/dfk.pkl b/runinfo/004/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..bf23f7ba146299594b0e0f8f05bc638535b94501 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFFoNDLq^ziN)FR$@!&uB~!dvOZ5QRCK|*5 literal 0 HcmV?d00001 diff --git a/runinfo/004/checkpoint/tasks.pkl b/runinfo/004/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6d31d3eac987ec92e1ce522d417dc9fcfb450c0b GIT binary patch literal 415 zcmZ|I!A(Ok41i&TDkKgpz@_2 zmt%j-x6}IcKgq5tr6KcK1qZ_vy|Feg23crC{w#+iU<~9XrA-5fiAZwAT1_2 zmt%j-x6}IcKgq5tr6KcK1qZ_vy|Feg23crC{w#+iU<~9XrA-5fiAZwAT1DG|n_DCq z8BFQnOsz;xEhx#%&zs`c!&a19T$)od#ham2uK~M>mTBe|=BbHE=Ejx=DVAmi$ti|L WW@%;?$tETiDM^MYxJ+a!)dK*E88W{B literal 0 HcmV?d00001 diff --git a/runinfo/007/checkpoint/dfk.pkl b/runinfo/007/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8236f8554ae87b051eb7dac29e0795bb13e01133 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFFpYDLq^ziN)FR$@!&uB~!dvO7#HQTpGpz literal 0 HcmV?d00001 diff --git a/runinfo/007/checkpoint/tasks.pkl b/runinfo/007/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..43e10cac9d644b7092cfbcc3694bf083dc02c4e4 GIT binary patch literal 328 zcmZ|HL25!V5CBjqAHiF6=}IPMl9`;qh0r@pW)dtE6~7ff_%=iwydsm-4hYsHvj2xNxhm=KvUMe literal 0 HcmV?d00001 diff --git a/runinfo/010/checkpoint/dfk.pkl b/runinfo/010/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..85fbf53152b6d6a15658ae85865d755c4f62b0d2 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH;-DLq^ziN)FR$@!&uB~!dvOZ5QQ?i#@W literal 0 HcmV?d00001 diff --git a/runinfo/010/checkpoint/tasks.pkl b/runinfo/010/checkpoint/tasks.pkl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/runinfo/011/checkpoint/dfk.pkl b/runinfo/011/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ac39ce6997d92b494dd6a61892b73e336e6b00d6 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH!aV#i)}e>`@CwEgOZyR!J z@0%gX)v5)HRUt%;(#V`8fg?!FT1MpOYDfx@QglM_khQvjYXMRA%cwDq$cMXW{R_*X BgS7wv literal 0 HcmV?d00001 diff --git a/runinfo/012/checkpoint/dfk.pkl b/runinfo/012/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6a2fb46970d2bda2108060e837baa7bca94b0bde GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH!aV#i)}e>`@CwEgOZyR!J z@0%gX)v5)HRUt%;(#V`8fg?!FT1MpOYDfx@QglM_khQvjYXMRA%cwDq$cMXW{R_*X BgS7wv literal 0 HcmV?d00001 diff --git a/runinfo/013/checkpoint/dfk.pkl b/runinfo/013/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0dbeb2acdee59325a15dffe59d9b9df63b0b5ae0 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH<|DLq^ziN)FR$@!&uB~!eaO7#HQ9vZ;_ literal 0 HcmV?d00001 diff --git a/runinfo/013/checkpoint/tasks.pkl b/runinfo/013/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71fbd07dddf46427ceb72493bcabe20884ffd5ac GIT binary patch literal 164 zcmZo*nd;5}0ku;!dRQ_Ni!-M5C?uO0S(+Fc7@MUSrkW(0q$HUqSth2Km>DG|n_DCq z8BFQnOsz;xEhx#%&zs`c!&a19T$)od#ham2uK~M>mTBe|=BbHE=Ejx=DVAmi$ti|L WW@%;?$tETiDM^MYxJ+a!)dK*E88W{B literal 0 HcmV?d00001 diff --git a/runinfo/014/checkpoint/dfk.pkl b/runinfo/014/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..61cda9a00cc1779fd06e979a246bb58cd08380de GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH<2DLq^ziN)FR$@!&uB~!dvO7#HQHyXqM literal 0 HcmV?d00001 diff --git a/runinfo/014/checkpoint/tasks.pkl b/runinfo/014/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..43e10cac9d644b7092cfbcc3694bf083dc02c4e4 GIT binary patch literal 328 zcmZ|HL25!V5CBjqAHiF6=}IPMl9`;qh0r@pW)dtE6~7ff_%=iwydsm-4hYsHvj2xNxhm=KvUMee5& literal 0 HcmV?d00001 diff --git a/runinfo/016/checkpoint/tasks.pkl b/runinfo/016/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2a6331874dc697a78d8087aab357ef59778341c9 GIT binary patch literal 415 zcmZ|HJxW7C5CC9>AbJ47Qah_Kv%B-NH?UE>!t7+02%^E{flVL>c;)k|mSSP+eh2tY ztFMQ%KgZ>MUTtl^UFO@$2?`btFRdWuVoa4=0fKHh@iJea_O#HzL#mCtw6`WIF~fkglS literal 0 HcmV?d00001 diff --git a/runinfo/017/checkpoint/dfk.pkl b/runinfo/017/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..dc3c30d19d7c6ed6f38a92e67c87a0a0444a956a GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH=DDLq^ziN)FR$@!&uB~!dvOZ5QRa~jA1 literal 0 HcmV?d00001 diff --git a/runinfo/017/checkpoint/tasks.pkl b/runinfo/017/checkpoint/tasks.pkl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/runinfo/018/checkpoint/dfk.pkl b/runinfo/018/checkpoint/dfk.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ff38200ea62d3b04a171b19e840f886527e90af2 GIT binary patch literal 78 zcmZo*nd-~{0ku;!df19e^HMU4rt~Q5hZd(673%|u#EiU@)S^uNg2bZY9DSgI%)GRG aeFH;_DLq^ziN)FR$@!&uB~!dvOZ5QRh8oEL literal 0 HcmV?d00001 diff --git a/runinfo/018/checkpoint/tasks.pkl b/runinfo/018/checkpoint/tasks.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f13c7af35c3363740ff448250c6c7d6ac6c6830b GIT binary patch literal 415 zcmZ|IF-}7<41i&V3PcaU(&ddzmEAareFGM_L+m&z5`v2MNi0Y`z#GG1xD$t<5-a%+ z@crl2*TdCcpZ$JW?aOiBm#I|_N^VjFiwb$7oB+8cPl~(Hms|fZKb)T5Ui;H>{qD#4 z<{qD#4 z<9|8kwb;StOg7SfnHw zrcCMKOsz;xEhx#%&zs`c!&a19T$)od#ha;AuK~M>$tFgYCWZ#aW+{fLCW$5~N#;qG Wi76&#Mv2Mh7D+}1xJ+ay)dK*Pp)$V! literal 0 HcmV?d00001