From e7b45726adc15e17f75f018f4542ec11aecdaeb3 Mon Sep 17 00:00:00 2001 From: Leonid Kuligin Date: Fri, 28 Jun 2024 06:02:03 +0200 Subject: [PATCH] fix embeddings with max_batch_size (#338) --- libs/vertexai/langchain_google_vertexai/embeddings.py | 3 ++- libs/vertexai/tests/unit_tests/test_embeddings.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/vertexai/langchain_google_vertexai/embeddings.py b/libs/vertexai/langchain_google_vertexai/embeddings.py index df97a357..1e42e4be 100644 --- a/libs/vertexai/langchain_google_vertexai/embeddings.py +++ b/libs/vertexai/langchain_google_vertexai/embeddings.py @@ -320,7 +320,6 @@ def _prepare_and_validate_batches( first_result = self._get_embeddings_with_retry( first_batch, embeddings_type ) - batches = batches[1:] break except InvalidArgument: had_failure = True @@ -347,6 +346,8 @@ def _prepare_and_validate_batches( batches = VertexAIEmbeddings._prepare_batches( texts[first_batch_len:], self.instance["batch_size"] ) + else: + batches = batches[1:] else: # Still figuring out max batch size. batches = batches[1:] diff --git a/libs/vertexai/tests/unit_tests/test_embeddings.py b/libs/vertexai/tests/unit_tests/test_embeddings.py index efba09d1..287fbb31 100644 --- a/libs/vertexai/tests/unit_tests/test_embeddings.py +++ b/libs/vertexai/tests/unit_tests/test_embeddings.py @@ -19,7 +19,7 @@ def test_langchain_google_vertexai_embed_image_multimodal_only() -> None: def test_langchain_google_vertexai_no_dups_dynamic_batch_size() -> None: mock_embeddings = MockVertexAIEmbeddings("textembedding-gecko@001") default_batch_size = mock_embeddings.instance["batch_size"] - texts = ["text_{i}" for i in range(default_batch_size * 2)] + texts = ["text {i}" for i in range(default_batch_size * 2)] # It should only return one batch (out of two) still to process _, batches = mock_embeddings._prepare_and_validate_batches(texts=texts) assert len(batches) == 1