From dd50503517bdb66e21844cf8be0bdb6248722cec Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 24 Dec 2024 08:26:35 -0800
Subject: [PATCH] fmt

---
 python/langsmith/evaluation/_arunner.py | 15 +++++++++++----
 python/langsmith/evaluation/_runner.py  | 18 ++++++++++--------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 80ba8db17..8a612b0b9 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -858,10 +858,17 @@ async def _aapply_summary_evaluators(
         summary_evaluators: Sequence[SUMMARY_EVALUATOR_T],
     ) -> AsyncIterator[EvaluationResults]:
         runs, examples, evaluation_results = [], [], []
-        async for row in self.aget_results():
-            runs.append(row["run"])
-            examples.append(row["example"])
-            evaluation_results.append(row["evaluation_results"]["results"])
+
+        async_examples = aitertools.ensure_async_iterator(await self.aget_examples())
+        async for run, example in aitertools.async_zip(
+            self.aget_runs(), async_examples
+        ):
+            runs.append(run)
+            examples.append(example)
+
+        async for evaluation_result in self.aget_evaluation_results():
+            evaluation_results.append(evaluation_result["results"])
+
         aggregate_feedback = []
         project_id = self._get_experiment().id if self._upload_results else None
         current_context = rh.get_tracing_context()
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 5148e99be..dbed77c2a 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1668,10 +1668,12 @@ def _apply_summary_evaluators(
         self, summary_evaluators: Sequence[SUMMARY_EVALUATOR_T]
     ) -> Generator[EvaluationResults, None, None]:
         runs, examples, evaluation_results = [], [], []
-        for row in self.get_results():
-            runs.append(row["run"])
-            examples.append(row["example"])
-            evaluation_results.append(row["evaluation_results"]["results"])
+        for run, example in zip(self.runs, self.examples):
+            runs.append(run)
+            examples.append(example)
+
+        for evaluation_result in self.evaluation_results:
+            evaluation_results.append(evaluation_result["results"])
 
         aggregate_feedback = []
         with ls_utils.ContextThreadPoolExecutor() as executor:
@@ -1791,15 +1793,15 @@ def _wrap(evaluator: SUMMARY_EVALUATOR_T) -> SUMMARY_EVALUATOR_T:
 
         @functools.wraps(evaluator)
         def _wrapper_inner(
-            runs: list[schemas.Run],
-            examples: list[schemas.Example],
-            evaluation_results: list[list[EvaluationResult]],
+            runs: Sequence[schemas.Run],
+            examples: Sequence[schemas.Example],
+            evaluation_results: Sequence[list[EvaluationResult]],
         ) -> Union[EvaluationResult, EvaluationResults]:
             @rh.traceable(name=eval_name)
             def _wrapper_super_inner(
                 runs_: str, examples_: str, evaluation_results_: str
             ) -> Union[EvaluationResult, EvaluationResults]:
-                return evaluator(runs, examples, evaluation_results)
+                return evaluator(list(runs), list(examples), list(evaluation_results))
 
             return _wrapper_super_inner(
                 f"Runs[] (Length={len(runs)})",