diff --git a/python/langsmith/beta/_utils.py b/python/langsmith/_internal/_beta_decorator.py similarity index 80% rename from python/langsmith/beta/_utils.py rename to python/langsmith/_internal/_beta_decorator.py index d1ebcbe1b..433ade058 100644 --- a/python/langsmith/beta/_utils.py +++ b/python/langsmith/_internal/_beta_decorator.py @@ -11,7 +11,7 @@ def warn_beta(func: Callable) -> Callable: @functools.wraps(func) def wrapper(*args, **kwargs): warnings.warn( - f"Function {func.__name__} is in beta.", UserWarning, stacklevel=2 + f"Function {func.__name__} is in beta.", LangSmithBetaWarning, stacklevel=2 ) return func(*args, **kwargs) diff --git a/python/langsmith/beta/__init__.py b/python/langsmith/beta/__init__.py index 9240296a3..f712c1adb 100644 --- a/python/langsmith/beta/__init__.py +++ b/python/langsmith/beta/__init__.py @@ -1,6 +1,6 @@ """Beta functionality prone to change.""" +from langsmith._internal._beta_decorator import warn_beta from langsmith.beta._evals import compute_test_metrics, convert_runs_to_test -from langsmith.beta._utils import warn_beta __all__ = ["convert_runs_to_test", "compute_test_metrics", "warn_beta"] diff --git a/python/langsmith/beta/_evals.py b/python/langsmith/beta/_evals.py index 03b099fff..de6103d81 100644 --- a/python/langsmith/beta/_evals.py +++ b/python/langsmith/beta/_evals.py @@ -9,9 +9,9 @@ import uuid from typing import DefaultDict, List, Optional, Sequence, Tuple, TypeVar -import langsmith.beta._utils as beta_utils import langsmith.schemas as ls_schemas from langsmith import evaluation as ls_eval +from langsmith._internal._beta_decorator import warn_beta from langsmith.client import Client @@ -65,7 +65,7 @@ def _convert_root_run(root: ls_schemas.Run, run_to_example_map: dict) -> List[di return result -@beta_utils.warn_beta +@warn_beta def convert_runs_to_test( runs: Sequence[ls_schemas.Run], *, @@ -196,7 +196,7 @@ def _outer_product(list1: List[T], list2: List[U]) -> List[Tuple[T, U]]: return list(itertools.product(list1, list2)) -@beta_utils.warn_beta +@warn_beta def compute_test_metrics( project_name: str, *, diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 82edecabf..b48a16be1 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -55,6 +55,7 @@ from langsmith import env as ls_env from langsmith import schemas as ls_schemas from langsmith import utils as ls_utils +from langsmith._internal._beta_decorator import warn_beta if TYPE_CHECKING: import pandas as pd # type: ignore @@ -3412,6 +3413,118 @@ def list_examples( if limit is not None and i + 1 >= limit: break + @warn_beta + def index_dataset( + self, + *, + dataset_id: ID_TYPE, + tag: str = "latest", + **kwargs: Any, + ) -> None: + """Enable dataset indexing. Examples are indexed by their inputs. + + This enables searching for similar examples by inputs with + ``client.similar_examples()``. + + Args: + dataset_id (UUID): The ID of the dataset to index. + tag (str, optional): The version of the dataset to index. If 'latest' + then any updates to the dataset (additions, updates, deletions of + examples) will be reflected in the index. + + Returns: + None + + Raises: + requests.HTTPError + """ # noqa: E501 + dataset_id = _as_uuid(dataset_id, "dataset_id") + resp = self.request_with_retries( + "POST", + f"/datasets/{dataset_id}/index", + headers=self._headers, + data=json.dumps({"tag": tag, **kwargs}), + ) + ls_utils.raise_for_status_with_text(resp) + + # NOTE: dataset_name arg explicitly not supported to avoid extra API calls. + @warn_beta + def similar_examples( + self, + inputs: dict, + /, + *, + limit: int, + dataset_id: ID_TYPE, + **kwargs: Any, + ) -> List[ls_schemas.ExampleSearch]: + r"""Retrieve the dataset examples whose inputs best match the current inputs. + + **Note**: Must have few-shot indexing enabled for the dataset. See + ``client.index_dataset()``. + + Args: + inputs (dict): The inputs to use as a search query. Must match the dataset + input schema. Must be JSON serializable. + limit (int): The maximum number of examples to return. + dataset_id (str or UUID): The ID of the dataset to search over. + kwargs (Any): Additional keyword args to pass as part of request body. + + Returns: + List of ExampleSearch objects. + + Example: + .. code-block:: python + + from langsmith import Client + + client = Client() + client.similar_examples( + {"question": "When would i use the runnable generator"}, + limit=3, + dataset_id="...", + ) + + .. code-block:: pycon + + [ + ExampleSearch( + inputs={'question': 'How do I cache a Chat model? What caches can I use?'}, + outputs={'answer': 'You can use LangChain\'s caching layer for Chat Models. This can save you money by reducing the number of API calls you make to the LLM provider, if you\'re often requesting the same completion multiple times, and speed up your application.\n\n```python\n\nfrom langchain.cache import InMemoryCache\nlangchain.llm_cache = InMemoryCache()\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\')\n\n```\n\nYou can also use SQLite Cache which uses a SQLite database:\n\n```python\n rm .langchain.db\n\nfrom langchain.cache import SQLiteCache\nlangchain.llm_cache = SQLiteCache(database_path=".langchain.db")\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\') \n```\n'}, + metadata=None, + id=UUID('b2ddd1c4-dff6-49ae-8544-f48e39053398'), + dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40') + ), + ExampleSearch( + inputs={'question': "What's a runnable lambda?"}, + outputs={'answer': "A runnable lambda is an object that implements LangChain's `Runnable` interface and runs a callbale (i.e., a function). Note the function must accept a single argument."}, + metadata=None, + id=UUID('f94104a7-2434-4ba7-8293-6a283f4860b4'), + dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40') + ), + ExampleSearch( + inputs={'question': 'Show me how to use RecursiveURLLoader'}, + outputs={'answer': 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\n```python\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n print(link)\n```\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'}, + metadata=None, + id=UUID('0308ea70-a803-4181-a37d-39e95f138f8c'), + dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40') + ), + ] + + """ # noqa: E501 + dataset_id = _as_uuid(dataset_id, "dataset_id") + resp = self.request_with_retries( + "POST", + f"/datasets/{dataset_id}/search", + headers=self._headers, + data=json.dumps({"inputs": inputs, "limit": limit, **kwargs}), + ) + ls_utils.raise_for_status_with_text(resp) + examples = [] + for ex in resp.json()["examples"]: + examples.append(ls_schemas.ExampleSearch(**ex, dataset_id=dataset_id)) + return examples + def update_example( self, example_id: ID_TYPE, diff --git a/python/langsmith/evaluation/llm_evaluator.py b/python/langsmith/evaluation/llm_evaluator.py index d0ef4fec3..3ae7b333c 100644 --- a/python/langsmith/evaluation/llm_evaluator.py +++ b/python/langsmith/evaluation/llm_evaluator.py @@ -4,7 +4,7 @@ from pydantic import BaseModel -import langsmith.beta._utils as beta_utils +from langsmith._internal._beta_decorator import warn_beta from langsmith.evaluation import EvaluationResult, EvaluationResults, RunEvaluator from langsmith.schemas import Example, Run @@ -201,7 +201,7 @@ def _initialize( chat_model = chat_model.with_structured_output(self.score_schema) self.runnable = self.prompt | chat_model - @beta_utils.warn_beta + @warn_beta def evaluate_run( self, run: Run, example: Optional[Example] = None ) -> Union[EvaluationResult, EvaluationResults]: @@ -210,7 +210,7 @@ def evaluate_run( output: dict = cast(dict, self.runnable.invoke(variables)) return self._parse_output(output) - @beta_utils.warn_beta + @warn_beta async def aevaluate_run( self, run: Run, example: Optional[Example] = None ) -> Union[EvaluationResult, EvaluationResults]: diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index c23b2b713..3da1d4650 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -99,6 +99,12 @@ def url(self) -> Optional[str]: return None +class ExampleSearch(ExampleBase): + """Example returned via search.""" + + id: UUID + + class ExampleUpdate(BaseModel): """Update class for Example.""" diff --git a/python/pyproject.toml b/python/pyproject.toml index 7c7c95888..1242fb6d7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -101,3 +101,4 @@ disallow_untyped_defs = "True" [tool.pytest.ini_options] asyncio_mode = "auto" +markers = [ "slow: long-running tests",] diff --git a/python/tests/integration_tests/conftest.py b/python/tests/integration_tests/conftest.py new file mode 100644 index 000000000..8ad66c3d2 --- /dev/null +++ b/python/tests/integration_tests/conftest.py @@ -0,0 +1,17 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 22f5355c9..d939111d4 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -269,6 +269,50 @@ def test_list_examples(langchain_client: Client) -> None: langchain_client.delete_dataset(dataset_id=dataset.id) +@pytest.mark.slow +def test_similar_examples(langchain_client: Client) -> None: + inputs = [{"text": "how are you"}, {"text": "good bye"}, {"text": "see ya later"}] + outputs = [ + {"response": "good how are you"}, + {"response": "ta ta"}, + {"response": "tootles"}, + ] + dataset_name = "__test_similar_examples" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name=dataset_name, + inputs_schema={ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "text": {"type": "string"}, + }, + "required": ["text"], + "additionalProperties": False, + }, + outputs_schema={ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "response": {"type": "string"}, + }, + "required": ["response"], + "additionalProperties": False, + }, + ) + langchain_client.create_examples( + inputs=inputs, outputs=outputs, dataset_id=dataset.id + ) + langchain_client.index_dataset(dataset_id=dataset.id) + # Need to wait for indexing to finish. + time.sleep(5) + similar_list = langchain_client.similar_examples( + {"text": "howdy"}, limit=2, dataset_id=dataset.id + ) + assert len(similar_list) == 2 + + langchain_client.delete_dataset(dataset_id=dataset.id) + + @pytest.mark.skip(reason="This test is flaky") def test_persist_update_run(langchain_client: Client) -> None: """Test the persist and update methods work as expected."""