[Python] Fix incremental streaming of eval steps (#944)

langchain-ai · Aug 23, 2024 · 8f4303d · 8f4303d
1 parent b846a4c
commit 8f4303d
Show file tree

Hide file tree

Showing 6 changed files with 340 additions and 7 deletions.
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
@@ -329,6 +329,7 @@ async def aevaluate_existing(
         max_concurrency=max_concurrency,
         client=client,
         blocking=blocking,
+        experiment=project,
     )
 
 
@@ -627,6 +628,7 @@ async def _arun_evaluators(
                 "project_name": "evaluators",
                 "metadata": metadata,
                 "enabled": True,
+                "client": self.client,
             }
         ):
             run = current_results["run"]
@@ -682,6 +684,7 @@ async def _aapply_summary_evaluators(
                 "project_name": "evaluators",
                 "metadata": metadata,
                 "enabled": True,
+                "client": self.client,
             }
         ):
             for evaluator in summary_evaluators:

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
@@ -1084,7 +1084,7 @@ def dataset_id(self) -> str:
     @property
     def evaluation_results(self) -> Iterable[EvaluationResults]:
         if self._evaluation_results is None:
-            return [{"results": []} for _ in self.examples]
+            return ({"results": []} for _ in self.examples)
         return self._evaluation_results
 
     @property
@@ -1256,6 +1256,7 @@ def _run_evaluators(
                 "project_name": "evaluators",
                 "metadata": metadata,
                 "enabled": True,
+                "client": self.client,
             }
         ):
             run = current_results["run"]
@@ -1340,6 +1341,8 @@ def _apply_summary_evaluators(
                     **current_context,
                     "project_name": "evaluators",
                     "metadata": metadata,
+                    "client": self.client,
+                    "enabled": True,
                 }
             ):
                 for evaluator in summary_evaluators:

diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py
@@ -328,7 +328,7 @@ def __call__(
 
     def __repr__(self) -> str:
         """Represent the DynamicRunEvaluator object."""
-        return f"<DynamicRunEvaluator {getattr(self.func, '__name__')}>"
+        return f"<DynamicRunEvaluator {self._name}>"
 
 
 def run_evaluator(

diff --git a/python/langsmith/run_helpers.py b/python/langsmith/run_helpers.py
@@ -58,12 +58,14 @@
 _TRACING_ENABLED = contextvars.ContextVar[Optional[bool]](
     "_TRACING_ENABLED", default=None
 )
+_CLIENT = contextvars.ContextVar[Optional[ls_client.Client]]("_CLIENT", default=None)
 _CONTEXT_KEYS: Dict[str, contextvars.ContextVar] = {
     "parent": _PARENT_RUN_TREE,
     "project_name": _PROJECT_NAME,
     "tags": _TAGS,
     "metadata": _METADATA,
     "enabled": _TRACING_ENABLED,
+    "client": _CLIENT,
 }
 
 
@@ -83,6 +85,7 @@ def get_tracing_context(
             "tags": _TAGS.get(),
             "metadata": _METADATA.get(),
             "enabled": _TRACING_ENABLED.get(),
+            "client": _CLIENT.get(),
         }
     return {k: context.get(v) for k, v in _CONTEXT_KEYS.items()}
 
@@ -102,6 +105,7 @@ def tracing_context(
     metadata: Optional[Dict[str, Any]] = None,
     parent: Optional[Union[run_trees.RunTree, Mapping, str]] = None,
     enabled: Optional[bool] = None,
+    client: Optional[ls_client.Client] = None,
     **kwargs: Any,
 ) -> Generator[None, None, None]:
     """Set the tracing context for a block of code.
@@ -113,9 +117,11 @@ def tracing_context(
         parent: The parent run to use for the context. Can be a Run/RunTree object,
             request headers (for distributed tracing), or the dotted order string.
             Defaults to None.
+        client: The client to use for logging the run to LangSmith. Defaults to None,
         enabled: Whether tracing is enabled. Defaults to None, meaning it will use the
             current context value or environment variables.
 
+
     """
     if kwargs:
         # warn
@@ -129,14 +135,14 @@ def tracing_context(
         tags = sorted(set(tags or []) | set(parent_run.tags or []))
         metadata = {**parent_run.metadata, **(metadata or {})}
     enabled = enabled if enabled is not None else current_context.get("enabled")
-
     _set_tracing_context(
         {
             "parent": parent_run,
             "project_name": project_name,
             "tags": tags,
             "metadata": metadata,
             "enabled": enabled,
+            "client": client,
         }
     )
     try:
@@ -829,11 +835,12 @@ def _setup(self) -> run_trees.RunTree:
 
         outer_tags = _TAGS.get()
         outer_metadata = _METADATA.get()
+        client_ = self.client or self.old_ctx.get("client")
         parent_run_ = _get_parent_run(
             {
                 "parent": self.parent,
                 "run_tree": self.run_tree,
-                "client": self.client,
+                "client": client_,
             }
         )
 
@@ -870,7 +877,7 @@ def _setup(self) -> run_trees.RunTree:
                 project_name=project_name_ or "default",
                 inputs=self.inputs or {},
                 tags=tags_,
-                client=self.client,  # type: ignore[arg-type]
+                client=client_,  # type: ignore
             )
 
         if enabled:
@@ -879,6 +886,7 @@ def _setup(self) -> run_trees.RunTree:
             _METADATA.set(metadata)
             _PARENT_RUN_TREE.set(self.new_run)
             _PROJECT_NAME.set(project_name_)
+            _CLIENT.set(client_)
 
         return self.new_run
 
@@ -1248,7 +1256,7 @@ def _setup_run(
     outer_project = _PROJECT_NAME.get()
     langsmith_extra = langsmith_extra or LangSmithExtra()
     name = langsmith_extra.get("name") or container_input.get("name")
-    client_ = langsmith_extra.get("client", client)
+    client_ = langsmith_extra.get("client", client) or _CLIENT.get()
     parent_run_ = _get_parent_run(
         {**langsmith_extra, "client": client_}, kwargs.get("config")
     )

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.1.103"
+version = "0.1.104"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <[email protected]>"]
 license = "MIT"