python: add Client.similar_examples() and Client.index_dataset() (#924)

Add methods for interacting with `datasets/{dataset_id}/index` and `datasets/{dataset_id}/search` endpoints. Indexing takes a second so the integration test is skipped by default. Could turn it on or could update to use an existing index instead of creating one as part of the test.
langchain-ai · Aug 20, 2024 · bee2a0c · bee2a0c
2 parents 179d606 + e9fada2
commit bee2a0c
Show file tree

Hide file tree

Showing 9 changed files with 189 additions and 8 deletions.
diff --git a/python/langsmith/beta/_utils.py → ...on/langsmith/_internal/_beta_decorator.py b/python/langsmith/beta/_utils.py → ...on/langsmith/_internal/_beta_decorator.py
@@ -11,7 +11,7 @@ def warn_beta(func: Callable) -> Callable:
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         warnings.warn(
-            f"Function {func.__name__} is in beta.", UserWarning, stacklevel=2
+            f"Function {func.__name__} is in beta.", LangSmithBetaWarning, stacklevel=2
         )
         return func(*args, **kwargs)
 

diff --git a/python/langsmith/beta/__init__.py b/python/langsmith/beta/__init__.py
@@ -1,6 +1,6 @@
 """Beta functionality prone to change."""
 
+from langsmith._internal._beta_decorator import warn_beta
 from langsmith.beta._evals import compute_test_metrics, convert_runs_to_test
-from langsmith.beta._utils import warn_beta
 
 __all__ = ["convert_runs_to_test", "compute_test_metrics", "warn_beta"]
diff --git a/python/langsmith/beta/_evals.py b/python/langsmith/beta/_evals.py
@@ -9,9 +9,9 @@
 import uuid
 from typing import DefaultDict, List, Optional, Sequence, Tuple, TypeVar
 
-import langsmith.beta._utils as beta_utils
 import langsmith.schemas as ls_schemas
 from langsmith import evaluation as ls_eval
+from langsmith._internal._beta_decorator import warn_beta
 from langsmith.client import Client
 
 
@@ -65,7 +65,7 @@ def _convert_root_run(root: ls_schemas.Run, run_to_example_map: dict) -> List[di
     return result
 
 
-@beta_utils.warn_beta
+@warn_beta
 def convert_runs_to_test(
     runs: Sequence[ls_schemas.Run],
     *,
@@ -196,7 +196,7 @@ def _outer_product(list1: List[T], list2: List[U]) -> List[Tuple[T, U]]:
     return list(itertools.product(list1, list2))
 
 
-@beta_utils.warn_beta
+@warn_beta
 def compute_test_metrics(
     project_name: str,
     *,

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -55,6 +55,7 @@
 from langsmith import env as ls_env
 from langsmith import schemas as ls_schemas
 from langsmith import utils as ls_utils
+from langsmith._internal._beta_decorator import warn_beta
 
 if TYPE_CHECKING:
     import pandas as pd  # type: ignore
@@ -3412,6 +3413,118 @@ def list_examples(
             if limit is not None and i + 1 >= limit:
                 break
 
+    @warn_beta
+    def index_dataset(
+        self,
+        *,
+        dataset_id: ID_TYPE,
+        tag: str = "latest",
+        **kwargs: Any,
+    ) -> None:
+        """Enable dataset indexing. Examples are indexed by their inputs.
+
+        This enables searching for similar examples by inputs with
+        ``client.similar_examples()``.
+
+        Args:
+            dataset_id (UUID): The ID of the dataset to index.
+            tag (str, optional): The version of the dataset to index. If 'latest'
+                then any updates to the dataset (additions, updates, deletions of
+                examples) will be reflected in the index.
+
+        Returns:
+            None
+
+        Raises:
+            requests.HTTPError
+        """  # noqa: E501
+        dataset_id = _as_uuid(dataset_id, "dataset_id")
+        resp = self.request_with_retries(
+            "POST",
+            f"/datasets/{dataset_id}/index",
+            headers=self._headers,
+            data=json.dumps({"tag": tag, **kwargs}),
+        )
+        ls_utils.raise_for_status_with_text(resp)
+
+    # NOTE: dataset_name arg explicitly not supported to avoid extra API calls.
+    @warn_beta
+    def similar_examples(
+        self,
+        inputs: dict,
+        /,
+        *,
+        limit: int,
+        dataset_id: ID_TYPE,
+        **kwargs: Any,
+    ) -> List[ls_schemas.ExampleSearch]:
+        r"""Retrieve the dataset examples whose inputs best match the current inputs.
+
+        **Note**: Must have few-shot indexing enabled for the dataset. See
+        ``client.index_dataset()``.
+
+        Args:
+            inputs (dict): The inputs to use as a search query. Must match the dataset
+                input schema. Must be JSON serializable.
+            limit (int): The maximum number of examples to return.
+            dataset_id (str or UUID): The ID of the dataset to search over.
+            kwargs (Any): Additional keyword args to pass as part of request body.
+
+        Returns:
+            List of ExampleSearch objects.
+
+        Example:
+            .. code-block:: python
+
+                from langsmith import Client
+
+                client = Client()
+                client.similar_examples(
+                    {"question": "When would i use the runnable generator"},
+                    limit=3,
+                    dataset_id="...",
+                )
+
+            .. code-block:: pycon
+
+                [
+                    ExampleSearch(
+                        inputs={'question': 'How do I cache a Chat model? What caches can I use?'},
+                        outputs={'answer': 'You can use LangChain\'s caching layer for Chat Models. This can save you money by reducing the number of API calls you make to the LLM provider, if you\'re often requesting the same completion multiple times, and speed up your application.\n\n```python\n\nfrom langchain.cache import InMemoryCache\nlangchain.llm_cache = InMemoryCache()\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\')\n\n```\n\nYou can also use SQLite Cache which uses a SQLite database:\n\n```python\n  rm .langchain.db\n\nfrom langchain.cache import SQLiteCache\nlangchain.llm_cache = SQLiteCache(database_path=".langchain.db")\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\') \n```\n'},
+                        metadata=None,
+                        id=UUID('b2ddd1c4-dff6-49ae-8544-f48e39053398'),
+                        dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40')
+                    ),
+                    ExampleSearch(
+                        inputs={'question': "What's a runnable lambda?"},
+                        outputs={'answer': "A runnable lambda is an object that implements LangChain's `Runnable` interface and runs a callbale (i.e., a function). Note the function must accept a single argument."},
+                        metadata=None,
+                        id=UUID('f94104a7-2434-4ba7-8293-6a283f4860b4'),
+                        dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40')
+                    ),
+                    ExampleSearch(
+                        inputs={'question': 'Show me how to use RecursiveURLLoader'},
+                        outputs={'answer': 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\n```python\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n    print(link)\n```\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'},
+                        metadata=None,
+                        id=UUID('0308ea70-a803-4181-a37d-39e95f138f8c'),
+                        dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40')
+                    ),
+                ]
+
+        """  # noqa: E501
+        dataset_id = _as_uuid(dataset_id, "dataset_id")
+        resp = self.request_with_retries(
+            "POST",
+            f"/datasets/{dataset_id}/search",
+            headers=self._headers,
+            data=json.dumps({"inputs": inputs, "limit": limit, **kwargs}),
+        )
+        ls_utils.raise_for_status_with_text(resp)
+        examples = []
+        for ex in resp.json()["examples"]:
+            examples.append(ls_schemas.ExampleSearch(**ex, dataset_id=dataset_id))
+        return examples
+
     def update_example(
         self,
         example_id: ID_TYPE,

diff --git a/python/langsmith/evaluation/llm_evaluator.py b/python/langsmith/evaluation/llm_evaluator.py
@@ -4,7 +4,7 @@
 
 from pydantic import BaseModel
 
-import langsmith.beta._utils as beta_utils
+from langsmith._internal._beta_decorator import warn_beta
 from langsmith.evaluation import EvaluationResult, EvaluationResults, RunEvaluator
 from langsmith.schemas import Example, Run
 
@@ -201,7 +201,7 @@ def _initialize(
         chat_model = chat_model.with_structured_output(self.score_schema)
         self.runnable = self.prompt | chat_model
 
-    @beta_utils.warn_beta
+    @warn_beta
     def evaluate_run(
         self, run: Run, example: Optional[Example] = None
     ) -> Union[EvaluationResult, EvaluationResults]:
@@ -210,7 +210,7 @@ def evaluate_run(
         output: dict = cast(dict, self.runnable.invoke(variables))
         return self._parse_output(output)
 
-    @beta_utils.warn_beta
+    @warn_beta
     async def aevaluate_run(
         self, run: Run, example: Optional[Example] = None
     ) -> Union[EvaluationResult, EvaluationResults]:

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -99,6 +99,12 @@ def url(self) -> Optional[str]:
         return None
 
 
+class ExampleSearch(ExampleBase):
+    """Example returned via search."""
+
+    id: UUID
+
+
 class ExampleUpdate(BaseModel):
     """Update class for Example."""
 

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -101,3 +101,4 @@ disallow_untyped_defs = "True"
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
+markers = [ "slow: long-running tests",]
diff --git a/python/tests/integration_tests/conftest.py b/python/tests/integration_tests/conftest.py
@@ -0,0 +1,17 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--runslow", action="store_true", default=False, help="run slow tests"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--runslow"):
+        # --runslow given in cli: do not skip slow tests
+        return
+    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
@@ -269,6 +269,50 @@ def test_list_examples(langchain_client: Client) -> None:
     langchain_client.delete_dataset(dataset_id=dataset.id)
 
 
+@pytest.mark.slow
+def test_similar_examples(langchain_client: Client) -> None:
+    inputs = [{"text": "how are you"}, {"text": "good bye"}, {"text": "see ya later"}]
+    outputs = [
+        {"response": "good how are you"},
+        {"response": "ta ta"},
+        {"response": "tootles"},
+    ]
+    dataset_name = "__test_similar_examples" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name=dataset_name,
+        inputs_schema={
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                "text": {"type": "string"},
+            },
+            "required": ["text"],
+            "additionalProperties": False,
+        },
+        outputs_schema={
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                "response": {"type": "string"},
+            },
+            "required": ["response"],
+            "additionalProperties": False,
+        },
+    )
+    langchain_client.create_examples(
+        inputs=inputs, outputs=outputs, dataset_id=dataset.id
+    )
+    langchain_client.index_dataset(dataset_id=dataset.id)
+    # Need to wait for indexing to finish.
+    time.sleep(5)
+    similar_list = langchain_client.similar_examples(
+        {"text": "howdy"}, limit=2, dataset_id=dataset.id
+    )
+    assert len(similar_list) == 2
+
+    langchain_client.delete_dataset(dataset_id=dataset.id)
+
+
 @pytest.mark.skip(reason="This test is flaky")
 def test_persist_update_run(langchain_client: Client) -> None:
     """Test the persist and update methods work as expected."""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -101,3 +101,4 @@ disallow_untyped_defs = "True"

		[tool.pytest.ini_options]
		asyncio_mode = "auto"
		markers = [ "slow: long-running tests",]