diff --git a/libs/vertexai/Makefile b/libs/vertexai/Makefile index f5a56349..d86dffd2 100644 --- a/libs/vertexai/Makefile +++ b/libs/vertexai/Makefile @@ -35,9 +35,9 @@ lint_tests: MYPY_CACHE=.mypy_cache_test lint lint_diff lint_package lint_tests: ./scripts/check_pydantic.sh . ./scripts/lint_imports.sh - poetry run ruff . + poetry run ruff check . poetry run ruff format $(PYTHON_FILES) --diff - poetry run ruff --select I $(PYTHON_FILES) + poetry run ruff check --select I $(PYTHON_FILES) mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) format format_diff: diff --git a/libs/vertexai/langchain_google_vertexai/_anthropic_parsers.py b/libs/vertexai/langchain_google_vertexai/_anthropic_parsers.py index 8913dda2..10d2bff2 100644 --- a/libs/vertexai/langchain_google_vertexai/_anthropic_parsers.py +++ b/libs/vertexai/langchain_google_vertexai/_anthropic_parsers.py @@ -1,6 +1,6 @@ from typing import Any, List, Optional, Type -from langchain_core.messages import ToolCall +from langchain_core.messages import AIMessage, ToolCall from langchain_core.output_parsers import BaseGenerationOutputParser from langchain_core.outputs import ChatGeneration, Generation from langchain_core.pydantic_v1 import BaseModel @@ -26,25 +26,20 @@ def parse_result(self, result: List[Generation], *, partial: bool = False) -> An """ if not result or not isinstance(result[0], ChatGeneration): return None if self.first_tool_only else [] + message = result[0].message - if isinstance(message.content, str): - tool_calls: List = [] - else: - content: List = message.content - _tool_calls = [dict(tc) for tc in _extract_tool_calls(content)] - # Map tool call id to index - id_to_index = { - block["id"]: i - for i, block in enumerate(content) - if block["type"] == "tool_use" - } - tool_calls = [{**tc, "index": id_to_index[tc["id"]]} for tc in _tool_calls] + tool_calls: List[Any] = [] + + if isinstance(message, AIMessage) and message.tool_calls: + tool_calls = message.tool_calls + elif isinstance(message.content, list): + content: Any = message.content + tool_calls = _extract_tool_calls(content) + if self.pydantic_schemas: tool_calls = [self._pydantic_parse(tc) for tc in tool_calls] elif self.args_only: tool_calls = [tc["args"] for tc in tool_calls] - else: - pass if self.first_tool_only: return tool_calls[0] if tool_calls else None diff --git a/libs/vertexai/langchain_google_vertexai/chat_models.py b/libs/vertexai/langchain_google_vertexai/chat_models.py index 63b89d1a..892c32f6 100644 --- a/libs/vertexai/langchain_google_vertexai/chat_models.py +++ b/libs/vertexai/langchain_google_vertexai/chat_models.py @@ -296,6 +296,14 @@ def _convert_to_parts(message: BaseMessage) -> List[Part]: ) parts.append(Part(function_call=function_call)) + prev_content = vertex_messages[-1] + prev_content_is_model = prev_content and prev_content.role == "model" + if prev_content_is_model: + prev_parts = list(prev_content.parts) + prev_parts.extend(parts) + vertex_messages[-1] = Content(role=role, parts=prev_parts) + continue + vertex_messages.append(Content(role=role, parts=parts)) elif isinstance(message, FunctionMessage): prev_ai_message = None @@ -306,18 +314,18 @@ def _convert_to_parts(message: BaseMessage) -> List[Part]: name=message.name, response={"content": message.content} ) ) + parts = [part] prev_content = vertex_messages[-1] prev_content_is_function = prev_content and prev_content.role == "function" + if prev_content_is_function: - parts = list(prev_content.parts) - parts.append(part) + prev_parts = list(prev_content.parts) + prev_parts.extend(parts) # replacing last message - vertex_messages[-1] = Content(role=role, parts=parts) + vertex_messages[-1] = Content(role=role, parts=prev_parts) continue - parts = [part] - vertex_messages.append(Content(role=role, parts=parts)) elif isinstance(message, ToolMessage): role = "function" @@ -383,18 +391,19 @@ def _parse_content(raw_content: str | Dict[Any, Any]) -> Dict[Any, Any]: response=content, ) ) + parts = [part] prev_content = vertex_messages[-1] prev_content_is_function = prev_content and prev_content.role == "function" + if prev_content_is_function: - parts = list(prev_content.parts) - parts.append(part) + prev_parts = list(prev_content.parts) + prev_parts.extend(parts) # replacing last message - vertex_messages[-1] = Content(role=role, parts=parts) + vertex_messages[-1] = Content(role=role, parts=prev_parts) continue - else: - parts = [part] - vertex_messages.append(Content(role=role, parts=parts)) + + vertex_messages.append(Content(role=role, parts=parts)) else: raise ValueError( f"Unexpected message with type {type(message)} at the position {i}." @@ -966,11 +975,11 @@ class Joke(BaseModel): setting this parameter to True is discouraged. """ response_mime_type: Optional[str] = None - """Optional. Output response mimetype of the generated candidate text. Only - supported in Gemini 1.5 and later models. Supported mimetype: - * "text/plain": (default) Text output. + """Optional. Output response mimetype of the generated candidate text. Only + supported in Gemini 1.5 and later models. Supported mimetype: + * "text/plain": (default) Text output. * "application/json": JSON response in the candidates. - The model also needs to be prompted to output the appropriate response + The model also needs to be prompted to output the appropriate response type, otherwise the behavior is undefined. This is a preview feature. """ diff --git a/libs/vertexai/langchain_google_vertexai/embeddings.py b/libs/vertexai/langchain_google_vertexai/embeddings.py index df97a357..1e42e4be 100644 --- a/libs/vertexai/langchain_google_vertexai/embeddings.py +++ b/libs/vertexai/langchain_google_vertexai/embeddings.py @@ -320,7 +320,6 @@ def _prepare_and_validate_batches( first_result = self._get_embeddings_with_retry( first_batch, embeddings_type ) - batches = batches[1:] break except InvalidArgument: had_failure = True @@ -347,6 +346,8 @@ def _prepare_and_validate_batches( batches = VertexAIEmbeddings._prepare_batches( texts[first_batch_len:], self.instance["batch_size"] ) + else: + batches = batches[1:] else: # Still figuring out max batch size. batches = batches[1:] diff --git a/libs/vertexai/langchain_google_vertexai/functions_utils.py b/libs/vertexai/langchain_google_vertexai/functions_utils.py index 4fa54c81..771fd70f 100644 --- a/libs/vertexai/langchain_google_vertexai/functions_utils.py +++ b/libs/vertexai/langchain_google_vertexai/functions_utils.py @@ -167,6 +167,12 @@ def _format_to_gapic_function_declaration( elif isinstance(tool, vertexai.FunctionDeclaration): return _format_vertex_to_function_declaration(tool) elif isinstance(tool, dict): + # this could come from + # 'langchain_core.utils.function_calling.convert_to_openai_tool' + if tool.get("type") == "function" and tool.get("function"): + return _format_dict_to_function_declaration( + cast(FunctionDescription, tool.get("function")) + ) return _format_dict_to_function_declaration(tool) else: raise ValueError(f"Unsupported tool call type {tool}") diff --git a/libs/vertexai/tests/integration_tests/test_model_garden.py b/libs/vertexai/tests/integration_tests/test_model_garden.py index 5936497b..d0a2a781 100644 --- a/libs/vertexai/tests/integration_tests/test_model_garden.py +++ b/libs/vertexai/tests/integration_tests/test_model_garden.py @@ -230,3 +230,26 @@ def my_tool(name: str, age: int) -> None: assert tool_call_chunk["args"] if tool_call_chunk["args"]: assert json.loads(tool_call_chunk["args"]) == {"age": 27.0, "name": "Erick"} + + +@pytest.mark.extended +def test_anthropic_with_structured_output() -> None: + project = os.environ["PROJECT_ID"] + location = "us-east5" + model = ChatAnthropicVertex( + project=project, + location=location, + model="claude-3-opus@20240229", + ) + + class MyModel(BaseModel): + name: str + age: int + + message = HumanMessage(content="My name is Erick and I am 27 years old") + model_with_structured_output = model.with_structured_output(MyModel) + response = model_with_structured_output.invoke([message]) + + assert isinstance(response, MyModel) + assert response.name == "Erick" + assert response.age == 27 diff --git a/libs/vertexai/tests/unit_tests/test_chat_models.py b/libs/vertexai/tests/unit_tests/test_chat_models.py index e9953877..afc01006 100644 --- a/libs/vertexai/tests/unit_tests/test_chat_models.py +++ b/libs/vertexai/tests/unit_tests/test_chat_models.py @@ -464,6 +464,51 @@ def test_parse_history_gemini_function() -> None: ) ], ), + ( + [ + AIMessage( + content=["Mike age is 30"], + tool_calls=[ + ToolCall( + name="Information", + args={"name": "Rob"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + AIMessage( + content=["Arthur age is 30"], + tool_calls=[ + ToolCall( + name="Information", + args={"name": "Ben"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + ], + [ + Content( + role="model", + parts=[ + Part(text="Mike age is 30"), + Part( + function_call=FunctionCall( + name="Information", + args={"name": "Rob"}, + ) + ), + Part(text="Arthur age is 30"), + Part( + function_call=FunctionCall( + name="Information", + args={"name": "Ben"}, + ) + ), + ], + ) + ], + ), ], ) def test_parse_history_gemini_multi(source_history, expected_history) -> None: diff --git a/libs/vertexai/tests/unit_tests/test_embeddings.py b/libs/vertexai/tests/unit_tests/test_embeddings.py index efba09d1..287fbb31 100644 --- a/libs/vertexai/tests/unit_tests/test_embeddings.py +++ b/libs/vertexai/tests/unit_tests/test_embeddings.py @@ -19,7 +19,7 @@ def test_langchain_google_vertexai_embed_image_multimodal_only() -> None: def test_langchain_google_vertexai_no_dups_dynamic_batch_size() -> None: mock_embeddings = MockVertexAIEmbeddings("textembedding-gecko@001") default_batch_size = mock_embeddings.instance["batch_size"] - texts = ["text_{i}" for i in range(default_batch_size * 2)] + texts = ["text {i}" for i in range(default_batch_size * 2)] # It should only return one batch (out of two) still to process _, batches = mock_embeddings._prepare_and_validate_batches(texts=texts) assert len(batches) == 1