From 3f1a2778d4104d5fc7449a72935ca56df777fc61 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:35:10 -0700 Subject: [PATCH 01/19] python: add Client.search_examples() --- python/langsmith/_expect.py | 6 ++- python/langsmith/_internal/_aiter.py | 6 ++- python/langsmith/_testing.py | 6 ++- python/langsmith/client.py | 57 +++++++++++++++++++++++++++- python/langsmith/run_helpers.py | 6 ++- python/langsmith/schemas.py | 4 ++ 6 files changed, 75 insertions(+), 10 deletions(-) diff --git a/python/langsmith/_expect.py b/python/langsmith/_expect.py index 967390597..3b69deb95 100644 --- a/python/langsmith/_expect.py +++ b/python/langsmith/_expect.py @@ -410,10 +410,12 @@ def score( ## Private Methods @overload - def __call__(self, value: Any, /) -> _Matcher: ... + def __call__(self, value: Any, /) -> _Matcher: + ... @overload - def __call__(self, /, *, client: ls_client.Client) -> _Expect: ... + def __call__(self, /, *, client: ls_client.Client) -> _Expect: + ... def __call__( self, diff --git a/python/langsmith/_internal/_aiter.py b/python/langsmith/_internal/_aiter.py index 7ae217f68..e359f28b9 100644 --- a/python/langsmith/_internal/_aiter.py +++ b/python/langsmith/_internal/_aiter.py @@ -185,10 +185,12 @@ def __len__(self) -> int: return len(self._children) @overload - def __getitem__(self, item: int) -> AsyncIterator[T]: ... + def __getitem__(self, item: int) -> AsyncIterator[T]: + ... @overload - def __getitem__(self, item: slice) -> Tuple[AsyncIterator[T], ...]: ... + def __getitem__(self, item: slice) -> Tuple[AsyncIterator[T], ...]: + ... def __getitem__( self, item: Union[int, slice] diff --git a/python/langsmith/_testing.py b/python/langsmith/_testing.py index 3d5ac9c3b..20da5a39a 100644 --- a/python/langsmith/_testing.py +++ b/python/langsmith/_testing.py @@ -41,7 +41,8 @@ class SkipException(Exception): # type: ignore[no-redef] @overload def test( func: Callable, -) -> Callable: ... +) -> Callable: + ... @overload @@ -51,7 +52,8 @@ def test( output_keys: Optional[Sequence[str]] = None, client: Optional[ls_client.Client] = None, test_suite_name: Optional[str] = None, -) -> Callable[[Callable], Callable]: ... +) -> Callable[[Callable], Callable]: + ... def test(*args: Any, **kwargs: Any) -> Callable: diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 82edecabf..f073835e4 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -416,13 +416,15 @@ def _as_uuid(value: ID_TYPE, var: Optional[str] = None) -> uuid.UUID: @typing.overload -def _ensure_uuid(value: Optional[Union[str, uuid.UUID]]) -> uuid.UUID: ... +def _ensure_uuid(value: Optional[Union[str, uuid.UUID]]) -> uuid.UUID: + ... @typing.overload def _ensure_uuid( value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = True -) -> Optional[uuid.UUID]: ... +) -> Optional[uuid.UUID]: + ... def _ensure_uuid(value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = False): @@ -3412,6 +3414,57 @@ def list_examples( if limit is not None and i + 1 >= limit: break + @ls_utils.xor_args(("dataset_name", "dataset_id")) + def search_examples( + self, + query: dict, + /, + limit: int, + dataset_id: Optional[ID_TYPE] = None, + dataset_name: Optional[str] = None, + **kwargs: Any, + ) -> List[ls_schemas.ExampleBase]: + """Retrieve the dataset examples whose inputs best match the query. + + **Note**: Must have few-shot indexing enabled for the dataset. See (TODO) method + for how to enable indexing. + + Args: + query (dict): The query to search against. Must be JSON serializable. + limit (int): The maximum number of examples to return. + dataset_id (UUID, optional): The ID of the dataset to filter by. + Defaults to None. Must specify one of ``dataset_id`` or + ``dataset_name``. + dataset_name (str, optional): The name of the dataset to filter by. + Defaults to None. Must specify one of ``dataset_id`` or + ``dataset_name``. + kwargs (Any): Additional keyword args to pass as part of request body. + + Returns: + List of ExampleSearch. + """ + if dataset_id is None: + dataset_id = self.read_dataset(dataset_name=dataset_name).id + dataset_id = _as_uuid(dataset_id, "dataset_id") + few_shot_resp = self.request_with_retries( + "POST", + f"/datasets/{dataset_id}/search", + headers=self._headers, + data=json.dumps({"inputs": query, "limit": limit, **kwargs}), + ) + ls_utils.raise_for_status_with_text(few_shot_resp) + examples = [] + for res in few_shot_resp.json()["examples"]: + examples.append( + ls_schemas.ExampleSearch( + **res, + dataset_id=dataset_id, + _host_url=self._host_url, + _tenant_id=self._get_optional_tenant_id(), + ) + ) + return examples + def update_example( self, example_id: ID_TYPE, diff --git a/python/langsmith/run_helpers.py b/python/langsmith/run_helpers.py index 05f2534fe..6d72f8568 100644 --- a/python/langsmith/run_helpers.py +++ b/python/langsmith/run_helpers.py @@ -232,7 +232,8 @@ def __call__( @overload def traceable( func: Callable[P, R], -) -> SupportsLangsmithExtra[P, R]: ... +) -> SupportsLangsmithExtra[P, R]: + ... @overload @@ -248,7 +249,8 @@ def traceable( process_inputs: Optional[Callable[[dict], dict]] = None, process_outputs: Optional[Callable[..., dict]] = None, _invocation_params_fn: Optional[Callable[[dict], dict]] = None, -) -> Callable[[Callable[P, R]], SupportsLangsmithExtra[P, R]]: ... +) -> Callable[[Callable[P, R]], SupportsLangsmithExtra[P, R]]: + ... def traceable( diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index c23b2b713..60085ea00 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -98,6 +98,10 @@ def url(self) -> Optional[str]: return f"{self._host_url}{path}" return None +class ExampleSearch(ExampleBase): + """Example returned via search.""" + id: UUID + class ExampleUpdate(BaseModel): """Update class for Example.""" From 678d61a60db787d53668401446ab5ef49c8d2be1 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:37:28 -0700 Subject: [PATCH 02/19] fmt --- python/langsmith/client.py | 8 ++++---- python/langsmith/schemas.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index f073835e4..69c8d9f5a 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3446,18 +3446,18 @@ def search_examples( if dataset_id is None: dataset_id = self.read_dataset(dataset_name=dataset_name).id dataset_id = _as_uuid(dataset_id, "dataset_id") - few_shot_resp = self.request_with_retries( + resp = self.request_with_retries( "POST", f"/datasets/{dataset_id}/search", headers=self._headers, data=json.dumps({"inputs": query, "limit": limit, **kwargs}), ) - ls_utils.raise_for_status_with_text(few_shot_resp) + ls_utils.raise_for_status_with_text(resp) examples = [] - for res in few_shot_resp.json()["examples"]: + for ex in resp.json()["examples"]: examples.append( ls_schemas.ExampleSearch( - **res, + **ex, dataset_id=dataset_id, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 60085ea00..3da1d4650 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -98,8 +98,10 @@ def url(self) -> Optional[str]: return f"{self._host_url}{path}" return None + class ExampleSearch(ExampleBase): """Example returned via search.""" + id: UUID From d9c2272ca786d274abb42094a3b2820f1c7b5ae7 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:43:25 -0700 Subject: [PATCH 03/19] fmt --- python/langsmith/client.py | 18 ++++++------------ python/langsmith/evaluation/__init__.py | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 69c8d9f5a..7d62c2b45 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3414,43 +3414,38 @@ def list_examples( if limit is not None and i + 1 >= limit: break - @ls_utils.xor_args(("dataset_name", "dataset_id")) + # dataset_name explicitly not supported to avoid extra API calls. def search_examples( self, - query: dict, + inputs: dict, /, limit: int, dataset_id: Optional[ID_TYPE] = None, - dataset_name: Optional[str] = None, **kwargs: Any, - ) -> List[ls_schemas.ExampleBase]: + ) -> List[ls_schemas.ExampleSearch]: """Retrieve the dataset examples whose inputs best match the query. **Note**: Must have few-shot indexing enabled for the dataset. See (TODO) method for how to enable indexing. Args: - query (dict): The query to search against. Must be JSON serializable. + inputs (dict): The inputs to use as a search query. Must match the dataset + input schema. Must be JSON serializable. limit (int): The maximum number of examples to return. dataset_id (UUID, optional): The ID of the dataset to filter by. Defaults to None. Must specify one of ``dataset_id`` or ``dataset_name``. - dataset_name (str, optional): The name of the dataset to filter by. - Defaults to None. Must specify one of ``dataset_id`` or - ``dataset_name``. kwargs (Any): Additional keyword args to pass as part of request body. Returns: List of ExampleSearch. """ - if dataset_id is None: - dataset_id = self.read_dataset(dataset_name=dataset_name).id dataset_id = _as_uuid(dataset_id, "dataset_id") resp = self.request_with_retries( "POST", f"/datasets/{dataset_id}/search", headers=self._headers, - data=json.dumps({"inputs": query, "limit": limit, **kwargs}), + data=json.dumps({"inputs": inputs, "limit": limit, **kwargs}), ) ls_utils.raise_for_status_with_text(resp) examples = [] @@ -3460,7 +3455,6 @@ def search_examples( **ex, dataset_id=dataset_id, _host_url=self._host_url, - _tenant_id=self._get_optional_tenant_id(), ) ) return examples diff --git a/python/langsmith/evaluation/__init__.py b/python/langsmith/evaluation/__init__.py index 253732cfc..f7a51eb9c 100644 --- a/python/langsmith/evaluation/__init__.py +++ b/python/langsmith/evaluation/__init__.py @@ -1,6 +1,6 @@ """Evaluation Helpers.""" -from typing import TYPE_CHECKING, Any, List +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from typing import List From 4f83908129cad1083d783068169835029695b9bf Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:45:43 -0700 Subject: [PATCH 04/19] fmt --- python/langsmith/client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 7d62c2b45..20e49e95a 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3420,7 +3420,7 @@ def search_examples( inputs: dict, /, limit: int, - dataset_id: Optional[ID_TYPE] = None, + dataset_id: ID_TYPE, **kwargs: Any, ) -> List[ls_schemas.ExampleSearch]: """Retrieve the dataset examples whose inputs best match the query. @@ -3433,8 +3433,6 @@ def search_examples( input schema. Must be JSON serializable. limit (int): The maximum number of examples to return. dataset_id (UUID, optional): The ID of the dataset to filter by. - Defaults to None. Must specify one of ``dataset_id`` or - ``dataset_name``. kwargs (Any): Additional keyword args to pass as part of request body. Returns: From b16a39f9b438c725391c6c002f9b32794dc5a332 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:50:33 -0700 Subject: [PATCH 05/19] fmt --- python/langsmith/_expect.py | 6 ++---- python/langsmith/_internal/_aiter.py | 6 ++---- python/langsmith/_testing.py | 6 ++---- python/langsmith/client.py | 14 +++----------- python/langsmith/run_helpers.py | 6 ++---- 5 files changed, 11 insertions(+), 27 deletions(-) diff --git a/python/langsmith/_expect.py b/python/langsmith/_expect.py index 3b69deb95..967390597 100644 --- a/python/langsmith/_expect.py +++ b/python/langsmith/_expect.py @@ -410,12 +410,10 @@ def score( ## Private Methods @overload - def __call__(self, value: Any, /) -> _Matcher: - ... + def __call__(self, value: Any, /) -> _Matcher: ... @overload - def __call__(self, /, *, client: ls_client.Client) -> _Expect: - ... + def __call__(self, /, *, client: ls_client.Client) -> _Expect: ... def __call__( self, diff --git a/python/langsmith/_internal/_aiter.py b/python/langsmith/_internal/_aiter.py index e359f28b9..7ae217f68 100644 --- a/python/langsmith/_internal/_aiter.py +++ b/python/langsmith/_internal/_aiter.py @@ -185,12 +185,10 @@ def __len__(self) -> int: return len(self._children) @overload - def __getitem__(self, item: int) -> AsyncIterator[T]: - ... + def __getitem__(self, item: int) -> AsyncIterator[T]: ... @overload - def __getitem__(self, item: slice) -> Tuple[AsyncIterator[T], ...]: - ... + def __getitem__(self, item: slice) -> Tuple[AsyncIterator[T], ...]: ... def __getitem__( self, item: Union[int, slice] diff --git a/python/langsmith/_testing.py b/python/langsmith/_testing.py index 20da5a39a..3d5ac9c3b 100644 --- a/python/langsmith/_testing.py +++ b/python/langsmith/_testing.py @@ -41,8 +41,7 @@ class SkipException(Exception): # type: ignore[no-redef] @overload def test( func: Callable, -) -> Callable: - ... +) -> Callable: ... @overload @@ -52,8 +51,7 @@ def test( output_keys: Optional[Sequence[str]] = None, client: Optional[ls_client.Client] = None, test_suite_name: Optional[str] = None, -) -> Callable[[Callable], Callable]: - ... +) -> Callable[[Callable], Callable]: ... def test(*args: Any, **kwargs: Any) -> Callable: diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 20e49e95a..2ffc1322d 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -416,15 +416,13 @@ def _as_uuid(value: ID_TYPE, var: Optional[str] = None) -> uuid.UUID: @typing.overload -def _ensure_uuid(value: Optional[Union[str, uuid.UUID]]) -> uuid.UUID: - ... +def _ensure_uuid(value: Optional[Union[str, uuid.UUID]]) -> uuid.UUID: ... @typing.overload def _ensure_uuid( value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = True -) -> Optional[uuid.UUID]: - ... +) -> Optional[uuid.UUID]: ... def _ensure_uuid(value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = False): @@ -3448,13 +3446,7 @@ def search_examples( ls_utils.raise_for_status_with_text(resp) examples = [] for ex in resp.json()["examples"]: - examples.append( - ls_schemas.ExampleSearch( - **ex, - dataset_id=dataset_id, - _host_url=self._host_url, - ) - ) + examples.append(ls_schemas.ExampleSearch(**ex, dataset_id=dataset_id)) return examples def update_example( diff --git a/python/langsmith/run_helpers.py b/python/langsmith/run_helpers.py index 6d72f8568..05f2534fe 100644 --- a/python/langsmith/run_helpers.py +++ b/python/langsmith/run_helpers.py @@ -232,8 +232,7 @@ def __call__( @overload def traceable( func: Callable[P, R], -) -> SupportsLangsmithExtra[P, R]: - ... +) -> SupportsLangsmithExtra[P, R]: ... @overload @@ -249,8 +248,7 @@ def traceable( process_inputs: Optional[Callable[[dict], dict]] = None, process_outputs: Optional[Callable[..., dict]] = None, _invocation_params_fn: Optional[Callable[[dict], dict]] = None, -) -> Callable[[Callable[P, R]], SupportsLangsmithExtra[P, R]]: - ... +) -> Callable[[Callable[P, R]], SupportsLangsmithExtra[P, R]]: ... def traceable( From 61f6df7a885e54cf9659d070c275927fe331e665 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:52:50 -0700 Subject: [PATCH 06/19] fmt --- python/langsmith/evaluation/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/evaluation/__init__.py b/python/langsmith/evaluation/__init__.py index f7a51eb9c..253732cfc 100644 --- a/python/langsmith/evaluation/__init__.py +++ b/python/langsmith/evaluation/__init__.py @@ -1,6 +1,6 @@ """Evaluation Helpers.""" -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, List if TYPE_CHECKING: from typing import List From c5acf462e49fd83134cd003fa8715aeec25e0e1d Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 13:55:00 -0700 Subject: [PATCH 07/19] fmt --- python/langsmith/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 2ffc1322d..fc1a13638 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3417,6 +3417,7 @@ def search_examples( self, inputs: dict, /, + *, limit: int, dataset_id: ID_TYPE, **kwargs: Any, From 33c111419a89a78df75d1e47adf6c6b815f68b78 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 14:04:24 -0700 Subject: [PATCH 08/19] fmt --- python/langsmith/client.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index fc1a13638..9d8aa075b 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3413,7 +3413,7 @@ def list_examples( break # dataset_name explicitly not supported to avoid extra API calls. - def search_examples( + def similar_examples( self, inputs: dict, /, @@ -3422,7 +3422,7 @@ def search_examples( dataset_id: ID_TYPE, **kwargs: Any, ) -> List[ls_schemas.ExampleSearch]: - """Retrieve the dataset examples whose inputs best match the query. + """Retrieve the dataset examples whose inputs best match the current inputs. **Note**: Must have few-shot indexing enabled for the dataset. See (TODO) method for how to enable indexing. @@ -3436,6 +3436,27 @@ def search_examples( Returns: List of ExampleSearch. + + Example: + .. code-block:: python + + from langsmith import Client + + client = Client() + client.similar_examples( + {"question": "When would i use the runnable generator"}, + limit=3, + dataset_id="...", + ) + + .. code-block:: pycon + + [ + ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': 'How do I cache a Chat model? What caches can I use?'}, outputs={'answer': 'You can use LangChain\'s caching layer for Chat Models. This can save you money by reducing the number of API calls you make to the LLM provider, if you\'re often requesting the same completion multiple times, and speed up your application.\n\n```python\n\nfrom langchain.cache import InMemoryCache\nlangchain.llm_cache = InMemoryCache()\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\')\n\n```\n\nYou can also use SQLite Cache which uses a SQLite database:\n\n```python\n rm .langchain.db\n\nfrom langchain.cache import SQLiteCache\nlangchain.llm_cache = SQLiteCache(database_path=".langchain.db")\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\') \n```\n'}, metadata=None, id=UUID('b2ddd1c4-dff6-49ae-8544-f48e39053398')), + ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': "What's a runnable lambda?"}, outputs={'answer': "A runnable lambda is an object that implements LangChain's `Runnable` interface and runs a callbale (i.e., a function). Note the function must accept a single argument."}, metadata=None, id=UUID('f94104a7-2434-4ba7-8293-6a283f4860b4')), + ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': 'Show me how to use RecursiveURLLoader'}, outputs={'answer': 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\n```python\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n print(link)\n```\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'}, metadata=None, id=UUID('0308ea70-a803-4181-a37d-39e95f138f8c')), + ] + """ dataset_id = _as_uuid(dataset_id, "dataset_id") resp = self.request_with_retries( From ba5e64b6546e04957cd1f8970dd62b3854488bd9 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 14:05:22 -0700 Subject: [PATCH 09/19] fmt --- python/langsmith/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 9d8aa075b..a87889995 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3422,7 +3422,7 @@ def similar_examples( dataset_id: ID_TYPE, **kwargs: Any, ) -> List[ls_schemas.ExampleSearch]: - """Retrieve the dataset examples whose inputs best match the current inputs. + r"""Retrieve the dataset examples whose inputs best match the current inputs. **Note**: Must have few-shot indexing enabled for the dataset. See (TODO) method for how to enable indexing. @@ -3457,7 +3457,7 @@ def similar_examples( ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': 'Show me how to use RecursiveURLLoader'}, outputs={'answer': 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\n```python\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n print(link)\n```\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'}, metadata=None, id=UUID('0308ea70-a803-4181-a37d-39e95f138f8c')), ] - """ + """ # noqa: E501 dataset_id = _as_uuid(dataset_id, "dataset_id") resp = self.request_with_retries( "POST", From f3fde6cc483fbb84fbfbb33bca517ea166e11124 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 14:07:57 -0700 Subject: [PATCH 10/19] fmt --- python/langsmith/client.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index a87889995..cdfcd9dd2 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3435,7 +3435,7 @@ def similar_examples( kwargs (Any): Additional keyword args to pass as part of request body. Returns: - List of ExampleSearch. + List of ExampleSearch objects. Example: .. code-block:: python @@ -3452,9 +3452,27 @@ def similar_examples( .. code-block:: pycon [ - ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': 'How do I cache a Chat model? What caches can I use?'}, outputs={'answer': 'You can use LangChain\'s caching layer for Chat Models. This can save you money by reducing the number of API calls you make to the LLM provider, if you\'re often requesting the same completion multiple times, and speed up your application.\n\n```python\n\nfrom langchain.cache import InMemoryCache\nlangchain.llm_cache = InMemoryCache()\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\')\n\n```\n\nYou can also use SQLite Cache which uses a SQLite database:\n\n```python\n rm .langchain.db\n\nfrom langchain.cache import SQLiteCache\nlangchain.llm_cache = SQLiteCache(database_path=".langchain.db")\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\') \n```\n'}, metadata=None, id=UUID('b2ddd1c4-dff6-49ae-8544-f48e39053398')), - ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': "What's a runnable lambda?"}, outputs={'answer': "A runnable lambda is an object that implements LangChain's `Runnable` interface and runs a callbale (i.e., a function). Note the function must accept a single argument."}, metadata=None, id=UUID('f94104a7-2434-4ba7-8293-6a283f4860b4')), - ExampleSearch(dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40'), inputs={'question': 'Show me how to use RecursiveURLLoader'}, outputs={'answer': 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\n```python\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n print(link)\n```\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'}, metadata=None, id=UUID('0308ea70-a803-4181-a37d-39e95f138f8c')), + ExampleSearch( + inputs={'question': 'How do I cache a Chat model? What caches can I use?'}, + outputs={'answer': 'You can use LangChain\'s caching layer for Chat Models. This can save you money by reducing the number of API calls you make to the LLM provider, if you\'re often requesting the same completion multiple times, and speed up your application.\n\n```python\n\nfrom langchain.cache import InMemoryCache\nlangchain.llm_cache = InMemoryCache()\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\')\n\n```\n\nYou can also use SQLite Cache which uses a SQLite database:\n\n```python\n rm .langchain.db\n\nfrom langchain.cache import SQLiteCache\nlangchain.llm_cache = SQLiteCache(database_path=".langchain.db")\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict(\'Tell me a joke\') \n```\n'}, + metadata=None, + id=UUID('b2ddd1c4-dff6-49ae-8544-f48e39053398'), + dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40') + ), + ExampleSearch( + inputs={'question': "What's a runnable lambda?"}, + outputs={'answer': "A runnable lambda is an object that implements LangChain's `Runnable` interface and runs a callbale (i.e., a function). Note the function must accept a single argument."}, + metadata=None, + id=UUID('f94104a7-2434-4ba7-8293-6a283f4860b4'), + dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40') + ), + ExampleSearch( + inputs={'question': 'Show me how to use RecursiveURLLoader'}, + outputs={'answer': 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\n```python\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n print(link)\n```\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'}, + metadata=None, + id=UUID('0308ea70-a803-4181-a37d-39e95f138f8c'), + dataset_id=UUID('01b6ce0f-bfb6-4f48-bbb8-f19272135d40') + ), ] """ # noqa: E501 From 1ebb19764acebe142ce00ece0bb26d27c4bb79a0 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 14:11:07 -0700 Subject: [PATCH 11/19] fmt --- python/langsmith/client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index cdfcd9dd2..218b8f581 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3412,7 +3412,9 @@ def list_examples( if limit is not None and i + 1 >= limit: break - # dataset_name explicitly not supported to avoid extra API calls. + # dataset_name arg explicitly not supported to avoid extra API calls. + # TODO: Update note on enabling indexing when there's an enable_indexing method. + # TODO: Come up with more interesting example for docstring. def similar_examples( self, inputs: dict, From e1544bcb28aff96927622383be52ed0a4fed3f5f Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 15 Aug 2024 15:43:28 -0700 Subject: [PATCH 12/19] fmt --- python/langsmith/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 218b8f581..20479e4d3 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -55,6 +55,7 @@ from langsmith import env as ls_env from langsmith import schemas as ls_schemas from langsmith import utils as ls_utils +from langsmith.base import beta if TYPE_CHECKING: import pandas as pd # type: ignore @@ -3415,6 +3416,7 @@ def list_examples( # dataset_name arg explicitly not supported to avoid extra API calls. # TODO: Update note on enabling indexing when there's an enable_indexing method. # TODO: Come up with more interesting example for docstring. + @beta() def similar_examples( self, inputs: dict, From f4815034213ebf65cc0ef67c4c496613dce12d6d Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sat, 17 Aug 2024 23:09:43 -0700 Subject: [PATCH 13/19] fmt --- python/langsmith/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 20479e4d3..afaca6e9d 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3415,7 +3415,6 @@ def list_examples( # dataset_name arg explicitly not supported to avoid extra API calls. # TODO: Update note on enabling indexing when there's an enable_indexing method. - # TODO: Come up with more interesting example for docstring. @beta() def similar_examples( self, @@ -3428,8 +3427,9 @@ def similar_examples( ) -> List[ls_schemas.ExampleSearch]: r"""Retrieve the dataset examples whose inputs best match the current inputs. - **Note**: Must have few-shot indexing enabled for the dataset. See (TODO) method - for how to enable indexing. + **Note**: Must have few-shot indexing enabled for the dataset. You can do this + in the LangSmith UI: + https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection Args: inputs (dict): The inputs to use as a search query. Must match the dataset From f6fd84d7235a4ecb842a9295889fa59cf8e23009 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sun, 18 Aug 2024 10:41:39 -0700 Subject: [PATCH 14/19] fmt --- python/langsmith/beta/_utils.py | 2 +- python/langsmith/client.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/langsmith/beta/_utils.py b/python/langsmith/beta/_utils.py index d1ebcbe1b..433ade058 100644 --- a/python/langsmith/beta/_utils.py +++ b/python/langsmith/beta/_utils.py @@ -11,7 +11,7 @@ def warn_beta(func: Callable) -> Callable: @functools.wraps(func) def wrapper(*args, **kwargs): warnings.warn( - f"Function {func.__name__} is in beta.", UserWarning, stacklevel=2 + f"Function {func.__name__} is in beta.", LangSmithBetaWarning, stacklevel=2 ) return func(*args, **kwargs) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index afaca6e9d..1f94a99e4 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -55,7 +55,7 @@ from langsmith import env as ls_env from langsmith import schemas as ls_schemas from langsmith import utils as ls_utils -from langsmith.base import beta +from langsmith.beta import _utils as beta_utils if TYPE_CHECKING: import pandas as pd # type: ignore @@ -3415,7 +3415,7 @@ def list_examples( # dataset_name arg explicitly not supported to avoid extra API calls. # TODO: Update note on enabling indexing when there's an enable_indexing method. - @beta() + @beta_utils.warn_beta def similar_examples( self, inputs: dict, From 14315753cef20c9cad597932b1e97f92c14d600d Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sun, 18 Aug 2024 12:13:12 -0700 Subject: [PATCH 15/19] fmt --- .../{beta/_utils.py => _internal/_beta_decorator.py} | 0 python/langsmith/beta/__init__.py | 2 +- python/langsmith/beta/_evals.py | 6 +++--- python/langsmith/client.py | 4 ++-- python/langsmith/evaluation/llm_evaluator.py | 6 +++--- python/tests/integration_tests/test_client.py | 5 +++++ 6 files changed, 14 insertions(+), 9 deletions(-) rename python/langsmith/{beta/_utils.py => _internal/_beta_decorator.py} (100%) diff --git a/python/langsmith/beta/_utils.py b/python/langsmith/_internal/_beta_decorator.py similarity index 100% rename from python/langsmith/beta/_utils.py rename to python/langsmith/_internal/_beta_decorator.py diff --git a/python/langsmith/beta/__init__.py b/python/langsmith/beta/__init__.py index 9240296a3..f712c1adb 100644 --- a/python/langsmith/beta/__init__.py +++ b/python/langsmith/beta/__init__.py @@ -1,6 +1,6 @@ """Beta functionality prone to change.""" +from langsmith._internal._beta_decorator import warn_beta from langsmith.beta._evals import compute_test_metrics, convert_runs_to_test -from langsmith.beta._utils import warn_beta __all__ = ["convert_runs_to_test", "compute_test_metrics", "warn_beta"] diff --git a/python/langsmith/beta/_evals.py b/python/langsmith/beta/_evals.py index 03b099fff..de6103d81 100644 --- a/python/langsmith/beta/_evals.py +++ b/python/langsmith/beta/_evals.py @@ -9,9 +9,9 @@ import uuid from typing import DefaultDict, List, Optional, Sequence, Tuple, TypeVar -import langsmith.beta._utils as beta_utils import langsmith.schemas as ls_schemas from langsmith import evaluation as ls_eval +from langsmith._internal._beta_decorator import warn_beta from langsmith.client import Client @@ -65,7 +65,7 @@ def _convert_root_run(root: ls_schemas.Run, run_to_example_map: dict) -> List[di return result -@beta_utils.warn_beta +@warn_beta def convert_runs_to_test( runs: Sequence[ls_schemas.Run], *, @@ -196,7 +196,7 @@ def _outer_product(list1: List[T], list2: List[U]) -> List[Tuple[T, U]]: return list(itertools.product(list1, list2)) -@beta_utils.warn_beta +@warn_beta def compute_test_metrics( project_name: str, *, diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 1f94a99e4..6aaaf473e 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -55,7 +55,7 @@ from langsmith import env as ls_env from langsmith import schemas as ls_schemas from langsmith import utils as ls_utils -from langsmith.beta import _utils as beta_utils +from langsmith._internal._beta_decorator import warn_beta if TYPE_CHECKING: import pandas as pd # type: ignore @@ -3415,7 +3415,7 @@ def list_examples( # dataset_name arg explicitly not supported to avoid extra API calls. # TODO: Update note on enabling indexing when there's an enable_indexing method. - @beta_utils.warn_beta + @warn_beta def similar_examples( self, inputs: dict, diff --git a/python/langsmith/evaluation/llm_evaluator.py b/python/langsmith/evaluation/llm_evaluator.py index d0ef4fec3..3ae7b333c 100644 --- a/python/langsmith/evaluation/llm_evaluator.py +++ b/python/langsmith/evaluation/llm_evaluator.py @@ -4,7 +4,7 @@ from pydantic import BaseModel -import langsmith.beta._utils as beta_utils +from langsmith._internal._beta_decorator import warn_beta from langsmith.evaluation import EvaluationResult, EvaluationResults, RunEvaluator from langsmith.schemas import Example, Run @@ -201,7 +201,7 @@ def _initialize( chat_model = chat_model.with_structured_output(self.score_schema) self.runnable = self.prompt | chat_model - @beta_utils.warn_beta + @warn_beta def evaluate_run( self, run: Run, example: Optional[Example] = None ) -> Union[EvaluationResult, EvaluationResults]: @@ -210,7 +210,7 @@ def evaluate_run( output: dict = cast(dict, self.runnable.invoke(variables)) return self._parse_output(output) - @beta_utils.warn_beta + @warn_beta async def aevaluate_run( self, run: Run, example: Optional[Example] = None ) -> Union[EvaluationResult, EvaluationResults]: diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 22f5355c9..b478a2f60 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -268,6 +268,11 @@ def test_list_examples(langchain_client: Client) -> None: langchain_client.delete_dataset(dataset_id=dataset.id) + example_list = langchain_client.similar_examples( + {"text": "hey there"}, k=1, dataset_id=dataset.id + ) + assert len(example_list) == 1 + @pytest.mark.skip(reason="This test is flaky") def test_persist_update_run(langchain_client: Client) -> None: From f1a8808f2e4231cfbc30140a6d6ba812b3225975 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sun, 18 Aug 2024 12:53:20 -0700 Subject: [PATCH 16/19] fmt --- python/langsmith/client.py | 44 +++++++++++++++--- python/pyproject.toml | 1 + python/tests/integration_tests/conftest.py | 21 +++++++++ python/tests/integration_tests/test_client.py | 45 +++++++++++++++++-- 4 files changed, 101 insertions(+), 10 deletions(-) create mode 100644 python/tests/integration_tests/conftest.py diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 6aaaf473e..b48a16be1 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3413,8 +3413,41 @@ def list_examples( if limit is not None and i + 1 >= limit: break - # dataset_name arg explicitly not supported to avoid extra API calls. - # TODO: Update note on enabling indexing when there's an enable_indexing method. + @warn_beta + def index_dataset( + self, + *, + dataset_id: ID_TYPE, + tag: str = "latest", + **kwargs: Any, + ) -> None: + """Enable dataset indexing. Examples are indexed by their inputs. + + This enables searching for similar examples by inputs with + ``client.similar_examples()``. + + Args: + dataset_id (UUID): The ID of the dataset to index. + tag (str, optional): The version of the dataset to index. If 'latest' + then any updates to the dataset (additions, updates, deletions of + examples) will be reflected in the index. + + Returns: + None + + Raises: + requests.HTTPError + """ # noqa: E501 + dataset_id = _as_uuid(dataset_id, "dataset_id") + resp = self.request_with_retries( + "POST", + f"/datasets/{dataset_id}/index", + headers=self._headers, + data=json.dumps({"tag": tag, **kwargs}), + ) + ls_utils.raise_for_status_with_text(resp) + + # NOTE: dataset_name arg explicitly not supported to avoid extra API calls. @warn_beta def similar_examples( self, @@ -3427,15 +3460,14 @@ def similar_examples( ) -> List[ls_schemas.ExampleSearch]: r"""Retrieve the dataset examples whose inputs best match the current inputs. - **Note**: Must have few-shot indexing enabled for the dataset. You can do this - in the LangSmith UI: - https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection + **Note**: Must have few-shot indexing enabled for the dataset. See + ``client.index_dataset()``. Args: inputs (dict): The inputs to use as a search query. Must match the dataset input schema. Must be JSON serializable. limit (int): The maximum number of examples to return. - dataset_id (UUID, optional): The ID of the dataset to filter by. + dataset_id (str or UUID): The ID of the dataset to search over. kwargs (Any): Additional keyword args to pass as part of request body. Returns: diff --git a/python/pyproject.toml b/python/pyproject.toml index 7c7c95888..1242fb6d7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -101,3 +101,4 @@ disallow_untyped_defs = "True" [tool.pytest.ini_options] asyncio_mode = "auto" +markers = [ "slow: long-running tests",] diff --git a/python/tests/integration_tests/conftest.py b/python/tests/integration_tests/conftest.py new file mode 100644 index 000000000..e446d0a1c --- /dev/null +++ b/python/tests/integration_tests/conftest.py @@ -0,0 +1,21 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_configure(config): + config.addinivalue_line("markers", "slow: mark test as slow to run") + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index b478a2f60..9f436f515 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -37,7 +37,7 @@ def wait_for( @pytest.fixture def langchain_client() -> Client: - return Client() + return Client(api_key=os.environ["LANGCHAIN_ORG_API_KEY"]) def test_datasets(langchain_client: Client) -> None: @@ -268,10 +268,47 @@ def test_list_examples(langchain_client: Client) -> None: langchain_client.delete_dataset(dataset_id=dataset.id) - example_list = langchain_client.similar_examples( - {"text": "hey there"}, k=1, dataset_id=dataset.id + +@pytest.mark.slow +def test_similar_examples(langchain_client: Client) -> None: + inputs = [{"text": "how are you"}, {"text": "good bye"}, {"text": "see ya later"}] + outputs = [ + {"response": "good how are you"}, + {"response": "ta ta"}, + {"response": "tootles"}, + ] + dataset_name = "__test_similar_examples" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name=dataset_name, + inputs_schema={ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "text": {"type": "string"}, + }, + "required": ["text"], + "additionalProperties": False, + }, + outputs_schema={ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "response": {"type": "string"}, + }, + "required": ["response"], + "additionalProperties": False, + }, ) - assert len(example_list) == 1 + langchain_client.create_examples( + inputs=inputs, outputs=outputs, dataset_id=dataset.id + ) + langchain_client.index_dataset(dataset_id=dataset.id) + # Need to wait for indexing to finish. + time.sleep(5) + similar_list = langchain_client.similar_examples( + {"text": "howdy"}, limit=2, dataset_id=dataset.id + ) + assert len(similar_list) == 2 @pytest.mark.skip(reason="This test is flaky") From 6a8b7e508ddc419b90606351a7466429156c03ab Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sun, 18 Aug 2024 12:55:28 -0700 Subject: [PATCH 17/19] fmt --- python/tests/integration_tests/conftest.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/tests/integration_tests/conftest.py b/python/tests/integration_tests/conftest.py index e446d0a1c..8ad66c3d2 100644 --- a/python/tests/integration_tests/conftest.py +++ b/python/tests/integration_tests/conftest.py @@ -7,10 +7,6 @@ def pytest_addoption(parser): ) -def pytest_configure(config): - config.addinivalue_line("markers", "slow: mark test as slow to run") - - def pytest_collection_modifyitems(config, items): if config.getoption("--runslow"): # --runslow given in cli: do not skip slow tests From bfc64b7a0ab8e616cd5686b38d38e8e8d13b992e Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sun, 18 Aug 2024 12:55:51 -0700 Subject: [PATCH 18/19] fmt --- python/tests/integration_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 9f436f515..97cca837c 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -37,7 +37,7 @@ def wait_for( @pytest.fixture def langchain_client() -> Client: - return Client(api_key=os.environ["LANGCHAIN_ORG_API_KEY"]) + return Client() def test_datasets(langchain_client: Client) -> None: From e779d94ce397feacfbdb7cd60a890021c249fdfd Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sun, 18 Aug 2024 12:56:25 -0700 Subject: [PATCH 19/19] fmt --- python/tests/integration_tests/test_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 97cca837c..d939111d4 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -310,6 +310,8 @@ def test_similar_examples(langchain_client: Client) -> None: ) assert len(similar_list) == 2 + langchain_client.delete_dataset(dataset_id=dataset.id) + @pytest.mark.skip(reason="This test is flaky") def test_persist_update_run(langchain_client: Client) -> None: