diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py
index bd9899dc..a5807168 100644
--- a/docetl/operations/utils.py
+++ b/docetl/operations/utils.py
@@ -486,9 +486,7 @@ def _cached_call_llm(
                             }
                         ]
                         + messages
-                        + [
-                            {"role": "assistant", "content": json.dumps(parsed_output)},
-                        ]
+                        + [{"role": "assistant", "content": json.dumps(parsed_output)}]
                     )
 
                     for rnd in range(num_gleaning_rounds):
@@ -553,9 +551,10 @@ def _cached_call_llm(
                         parsed_output = self.parse_llm_response(
                             response, output_schema, tools
                         )[0]
-                        validator_messages[-1] = [
-                            {"role": "assistant", "content": json.dumps(parsed_output)},
-                        ]
+                        validator_messages[-1] = {
+                            "role": "assistant",
+                            "content": json.dumps(parsed_output),
+                        }
 
                         total_cost += completion_cost(response)
 
diff --git a/tests/basic/test_basic_map.py b/tests/basic/test_basic_map.py
index 21a21d95..357a90b0 100644
--- a/tests/basic/test_basic_map.py
+++ b/tests/basic/test_basic_map.py
@@ -191,9 +191,10 @@ def test_map_operation_with_gleaning(simple_map_config, map_sample_data, api_wra
     map_config_with_gleaning = {
         **simple_map_config,
         "gleaning": {
-            "num_rounds": 1,
+            "num_rounds": 2,
             "validation_prompt": "Review the sentiment analysis. Is it accurate? If not, suggest improvements.",
         },
+        "bypass_cache": True,
     }
 
     operation = MapOperation(api_wrapper, map_config_with_gleaning, "gpt-4o-mini", 4)