From 5a45c648a8913519b89465db0e543c88376925b2 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Wed, 2 Oct 2024 01:00:03 +0200
Subject: [PATCH] attributes for BE/FE should not be sent (#4150)

---
 openhands/events/serialization/event.py          |  7 +++++++
 .../DelegatorAgent/test_edits/prompt_002.log     |  2 +-
 .../DelegatorAgent/test_edits/prompt_003.log     |  2 +-
 .../test_write_simple_script/prompt_002.log      |  2 +-
 .../test_write_simple_script/prompt_005.log      |  2 +-
 .../test_write_simple_script/prompt_006.log      |  2 +-
 .../test_write_simple_script/prompt_009.log      |  2 +-
 .../test_simple_task_rejection/prompt_003.log    |  2 +-
 .../test_write_simple_script/prompt_010.log      |  5 +----
 .../test_write_simple_script/prompt_011.log      |  5 +----
 tests/unit/test_action_serialization.py          | 16 +++++++++++++++-
 11 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py
index a70595b3bb06..b77fcd93951d 100644
--- a/openhands/events/serialization/event.py
+++ b/openhands/events/serialization/event.py
@@ -80,6 +80,13 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
     d.pop('timestamp', None)
     d.pop('message', None)
     d.pop('images_urls', None)
+
+    # runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM
+    if 'args' in d:
+        d['args'].pop('blocking', None)
+        d['args'].pop('keep_prompt', None)
+        d['args'].pop('is_confirmed', None)
+
     if 'extras' in d:
         remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS)
     if isinstance(event, Observation) and 'content' in d:
diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log
index d0d2de3ab32e..1435cc0d1ae0 100644
--- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log
@@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": ""}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log
index 65df2e8c13ac..fe4199f06e62 100644
--- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log
@@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": ""}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
index aa890b172628..a8ebb28b8e17 100644
--- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
@@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": ""}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
index 99188b7b9092..09a7d1e3f3b2 100644
--- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
+++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
@@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": ""}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
index af0d7aef2f6e..bb7e90b8a0dd 100644
--- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
+++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
@@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": ""}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
index ae25dcf09fd1..338ef431d57c 100644
--- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
+++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
@@ -35,7 +35,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
index d4affba2f202..556d8bbff280 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
@@ -24,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": ""}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
 
 If the last item in the history is an error, you should try to fix it.
 
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
index b122f016ad75..64dbbc48790b 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
@@ -193,10 +193,7 @@ ten actions--more happened before that.
     "action": "run",
     "args": {
       "command": "bash hello.sh",
-      "thought": "",
-      "blocking": false,
-      "keep_prompt": true,
-      "is_confirmed": "confirmed"
+      "thought": ""
     }
   },
   {
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
index 03378e8d9e98..219643b27e6c 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
@@ -192,10 +192,7 @@ ten actions--more happened before that.
     "action": "run",
     "args": {
       "command": "bash hello.sh",
-      "thought": "",
-      "blocking": false,
-      "keep_prompt": true,
-      "is_confirmed": "confirmed"
+      "thought": ""
     }
   },
   {
diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py
index 7cd9b0d178b9..4a2136f397be 100644
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@@ -29,15 +29,29 @@ def serialization_deserialization(
     assert isinstance(
         action_instance, cls
     ), f'The action instance should be an instance of {cls.__name__}.'
+
+    # event_to_dict is the regular serialization of an event
     serialized_action_dict = event_to_dict(action_instance)
-    serialized_action_memory = event_to_memory(action_instance, max_message_chars)
+
+    # it has an extra message property, for the UI
     serialized_action_dict.pop('message')
     assert (
         serialized_action_dict == original_action_dict
     ), 'The serialized action should match the original action dict.'
+
+    # memory dict is what is sent to the LLM
+    serialized_action_memory = event_to_memory(action_instance, max_message_chars)
     original_memory_dict = original_action_dict.copy()
+
+    # we don't send backend properties like id or 'keep_prompt'
     original_memory_dict.pop('id', None)
     original_memory_dict.pop('timestamp', None)
+    if 'args' in original_memory_dict:
+        original_memory_dict['args'].pop('keep_prompt', None)
+        original_memory_dict['args'].pop('blocking', None)
+        original_memory_dict['args'].pop('is_confirmed', None)
+
+    # the rest should match
     assert (
         serialized_action_memory == original_memory_dict
     ), 'The serialized action in memory should match the original action dict.'