diff --git a/libs/packages.yml b/libs/packages.yml index f9020eef1279f..388f307b8dfc8 100644 --- a/libs/packages.yml +++ b/libs/packages.yml @@ -144,3 +144,6 @@ packages: - name: langchain-box repo: langchain-ai/langchain-box path: libs/box + - name: langchain-tests + repo: langchain-ai/langchain + path: libs/standard-tests diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index f0ac13d461443..f7608469c04fd 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -77,6 +77,24 @@ def standard_chat_model_params(self) -> dict: return {} def test_invoke(self, model: BaseChatModel) -> None: + """Test to verify that `model.invoke(simple_message)` works. + + This should pass for all integrations. + + .. dropdown:: Troubleshooting + + If this test fails, you should make sure your _generate method + does not raise any exceptions, and that it returns a valid + :class:`~langchain_core.outputs.chat_result.ChatResult` like so: + + .. code-block:: python + + return ChatResult( + generations=[ChatGeneration( + message=AIMessage(content="Output text") + )] + ) + """ result = model.invoke("Hello") assert result is not None assert isinstance(result, AIMessage) @@ -84,6 +102,31 @@ def test_invoke(self, model: BaseChatModel) -> None: assert len(result.content) > 0 async def test_ainvoke(self, model: BaseChatModel) -> None: + """Test to verify that `await model.ainvoke(simple_message)` works. + + This should pass for all integrations. Passing this test does not indicate + a "natively async" implementation, but rather that the model can be used + in an async context. + + .. dropdown:: Troubleshooting + + First, debug + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`. + because `ainvoke` has a default implementation that calls `invoke` in an + async context. + + If that test passes but not this one, you should make sure your _agenerate + method does not raise any exceptions, and that it returns a valid + :class:`~langchain_core.outputs.chat_result.ChatResult` like so: + + .. code-block:: python + + return ChatResult( + generations=[ChatGeneration( + message=AIMessage(content="Output text") + )] + ) + """ result = await model.ainvoke("Hello") assert result is not None assert isinstance(result, AIMessage) @@ -91,6 +134,30 @@ async def test_ainvoke(self, model: BaseChatModel) -> None: assert len(result.content) > 0 def test_stream(self, model: BaseChatModel) -> None: + """Test to verify that `model.stream(simple_message)` works. + + This should pass for all integrations. Passing this test does not indicate + a "streaming" implementation, but rather that the model can be used in a + streaming context. + + .. dropdown:: Troubleshooting + + First, debug + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`. + because `stream` has a default implementation that calls `invoke` and yields + the result as a single chunk. + + If that test passes but not this one, you should make sure your _stream + method does not raise any exceptions, and that it yields valid + :class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk` + objects like so: + + .. code-block:: python + + yield ChatGenerationChunk( + message=AIMessageChunk(content="chunk text") + ) + """ num_tokens = 0 for token in model.stream("Hello"): assert token is not None @@ -99,6 +166,33 @@ def test_stream(self, model: BaseChatModel) -> None: assert num_tokens > 0 async def test_astream(self, model: BaseChatModel) -> None: + """Test to verify that `await model.astream(simple_message)` works. + + This should pass for all integrations. Passing this test does not indicate + a "natively async" or "streaming" implementation, but rather that the model can + be used in an async streaming context. + + .. dropdown:: Troubleshooting + + First, debug + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`. + and + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`. + because `astream` has a default implementation that calls `_stream` in an + async context if it is implemented, or `ainvoke` and yields the result as a + single chunk if not. + + If those tests pass but not this one, you should make sure your _astream + method does not raise any exceptions, and that it yields valid + :class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk` + objects like so: + + .. code-block:: python + + yield ChatGenerationChunk( + message=AIMessageChunk(content="chunk text") + ) + """ num_tokens = 0 async for token in model.astream("Hello"): assert token is not None @@ -107,6 +201,22 @@ async def test_astream(self, model: BaseChatModel) -> None: assert num_tokens > 0 def test_batch(self, model: BaseChatModel) -> None: + """Test to verify that `model.batch([messages])` works. + + This should pass for all integrations. Tests the model's ability to process + multiple prompts in a single batch. + + .. dropdown:: Troubleshooting + + First, debug + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke` + because `batch` has a default implementation that calls `invoke` for each + message in the batch. + + If that test passes but not this one, you should make sure your `batch` + method does not raise any exceptions, and that it returns a list of valid + :class:`~langchain_core.messages.AIMessage` objects. + """ batch_results = model.batch(["Hello", "Hey"]) assert batch_results is not None assert isinstance(batch_results, list) @@ -118,6 +228,24 @@ def test_batch(self, model: BaseChatModel) -> None: assert len(result.content) > 0 async def test_abatch(self, model: BaseChatModel) -> None: + """Test to verify that `await model.abatch([messages])` works. + + This should pass for all integrations. Tests the model's ability to process + multiple prompts in a single batch asynchronously. + + .. dropdown:: Troubleshooting + + First, debug + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch` + and + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke` + because `abatch` has a default implementation that calls `ainvoke` for each + message in the batch. + + If those tests pass but not this one, you should make sure your `abatch` + method does not raise any exceptions, and that it returns a list of valid + :class:`~langchain_core.messages.AIMessage` objects. + """ batch_results = await model.abatch(["Hello", "Hey"]) assert batch_results is not None assert isinstance(batch_results, list) @@ -129,6 +257,23 @@ async def test_abatch(self, model: BaseChatModel) -> None: assert len(result.content) > 0 def test_conversation(self, model: BaseChatModel) -> None: + """Test to verify that the model can handle multi-turn conversations. + + This should pass for all integrations. Tests the model's ability to process + a sequence of alternating human and AI messages as context for generating + the next response. + + .. dropdown:: Troubleshooting + + First, debug + :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke` + because this test also uses `model.invoke()`. + + If that test passes but not this one, you should verify that: + 1. Your model correctly processes the message history + 2. The model maintains appropriate context from previous messages + 3. The response is a valid :class:`~langchain_core.messages.AIMessage` + """ messages = [ HumanMessage("hello"), AIMessage("hello"), @@ -141,6 +286,82 @@ def test_conversation(self, model: BaseChatModel) -> None: assert len(result.content) > 0 def test_usage_metadata(self, model: BaseChatModel) -> None: + """Test to verify that the model returns correct usage metadata. + + This test is optional and should be skipped if the model does not return + usage metadata (see Configuration below). + + .. dropdown:: Configuration + + By default, this test is run. + To disable this feature, set `returns_usage_metadata` to False in your test + class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def returns_usage_metadata(self) -> bool: + return False + + This test can also check the format of specific kinds of usage metadata + based on the `supported_usage_metadata_details` property. This property + should be configured as follows with the types of tokens that the model + supports tracking: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def supported_usage_metadata_details(self) -> dict: + return { + "invoke": [ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ], + "stream": [ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ], + } + + + .. dropdown:: Troubleshooting + + If this test fails, first verify that your model returns + :class:`~langchain_core.messages.ai.UsageMetadata` dicts + attached to the returned AIMessage object in `_generate`: + + .. code-block:: python + + return ChatResult( + generations=[ChatGeneration( + message=AIMessage( + content="Output text", + usage_metadata={ + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } + } + ) + )] + ) + """ if not self.returns_usage_metadata: pytest.skip("Not implemented.") result = model.invoke("Hello") @@ -207,6 +428,88 @@ def test_usage_metadata(self, model: BaseChatModel) -> None: ) def test_usage_metadata_streaming(self, model: BaseChatModel) -> None: + """ + Test to verify that the model returns correct usage metadata in streaming mode. + + .. dropdown:: Configuration + + By default, this test is run. + To disable this feature, set `returns_usage_metadata` to False in your test + class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def returns_usage_metadata(self) -> bool: + return False + + This test can also check the format of specific kinds of usage metadata + based on the `supported_usage_metadata_details` property. This property + should be configured as follows with the types of tokens that the model + supports tracking: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def supported_usage_metadata_details(self) -> dict: + return { + "invoke": [ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ], + "stream": [ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ], + } + + .. dropdown:: Troubleshooting + + If this test fails, first verify that your model yields + :class:`~langchain_core.messages.ai.UsageMetadata` dicts + attached to the returned AIMessage object in `_stream` + that sum up to the total usage metadata. + + Note that `input_tokens` should only be included on one of the chunks + (typically the first or the last chunk), and the rest should have 0 or None + to avoid counting input tokens multiple times. + + `output_tokens` typically count the number of tokens in each chunk, not the + sum. This test will pass as long as the sum of `output_tokens` across all + chunks is not 0. + + .. code-block:: python + + yield ChatResult( + generations=[ChatGeneration( + message=AIMessage( + content="Output text", + usage_metadata={ + "input_tokens": 0, + "output_tokens": 240, + "total_tokens": 590, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } + } + ) + )] + ) + """ if not self.returns_usage_metadata: pytest.skip("Not implemented.") full: Optional[AIMessageChunk] = None