From 21bc1c8f9bac51732a2a60fee5b0e9d9ad369051 Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Thu, 28 Dec 2023 13:11:21 +0100 Subject: [PATCH] change metadata to meta (#152) --- .../embedders/document_embedder.py | 16 +++++++------- .../embedders/text_embedder.py | 6 ++--- .../cohere/src/cohere_haystack/generator.py | 6 ++--- .../cohere/tests/test_cohere_generators.py | 12 +++++----- .../cohere/tests/test_document_embedder.py | 14 ++++++------ .../generators/image_generator.py | 2 +- .../tests/test_gradient_rag_pipelines.py | 2 +- .../instructor_document_embedder.py | 12 +++++----- .../test_instructor_document_embedder.py | 22 +++++++++---------- .../src/jina_haystack/document_embedder.py | 16 ++++++-------- .../jina/src/jina_haystack/text_embedder.py | 8 +++---- .../jina/tests/test_document_embedder.py | 22 +++++++++---------- integrations/jina/tests/test_text_embedder.py | 2 +- 13 files changed, 68 insertions(+), 72 deletions(-) diff --git a/integrations/cohere/src/cohere_haystack/embedders/document_embedder.py b/integrations/cohere/src/cohere_haystack/embedders/document_embedder.py index deec7c20d..6c87f3537 100644 --- a/integrations/cohere/src/cohere_haystack/embedders/document_embedder.py +++ b/integrations/cohere/src/cohere_haystack/embedders/document_embedder.py @@ -45,7 +45,7 @@ def __init__( timeout: int = 120, batch_size: int = 32, progress_bar: bool = True, - metadata_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", ): """ @@ -74,7 +74,7 @@ def __init__( :param batch_size: Number of Documents to encode at once. :param progress_bar: Whether to show a progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. - :param metadata_fields_to_embed: List of meta fields that should be embedded along with the Document text. + :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. """ @@ -98,7 +98,7 @@ def __init__( self.timeout = timeout self.batch_size = batch_size self.progress_bar = progress_bar - self.metadata_fields_to_embed = metadata_fields_to_embed or [] + self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator def to_dict(self) -> Dict[str, Any]: @@ -116,7 +116,7 @@ def to_dict(self) -> Dict[str, Any]: timeout=self.timeout, batch_size=self.batch_size, progress_bar=self.progress_bar, - metadata_fields_to_embed=self.metadata_fields_to_embed, + meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, ) @@ -127,14 +127,14 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: texts_to_embed: List[str] = [] for doc in documents: meta_values_to_embed = [ - str(doc.meta[key]) for key in self.metadata_fields_to_embed if doc.meta.get(key) is not None + str(doc.meta[key]) for key in self.meta_fields_to_embed if doc.meta.get(key) is not None ] text_to_embed = self.embedding_separator.join(meta_values_to_embed + [doc.content or ""]) # noqa: RUF005 texts_to_embed.append(text_to_embed) return texts_to_embed - @component.output_types(documents=List[Document], metadata=Dict[str, Any]) + @component.output_types(documents=List[Document], meta=Dict[str, Any]) def run(self, documents: List[Document]): """ Embed a list of Documents. @@ -152,7 +152,7 @@ def run(self, documents: List[Document]): if not documents: # return early if we were passed an empty list - return {"documents": [], "metadata": {}} + return {"documents": [], "meta": {}} texts_to_embed = self._prepare_texts_to_embed(documents) @@ -180,4 +180,4 @@ def run(self, documents: List[Document]): for doc, embeddings in zip(documents, all_embeddings): doc.embedding = embeddings - return {"documents": documents, "metadata": metadata} + return {"documents": documents, "meta": metadata} diff --git a/integrations/cohere/src/cohere_haystack/embedders/text_embedder.py b/integrations/cohere/src/cohere_haystack/embedders/text_embedder.py index f21060965..25822223e 100644 --- a/integrations/cohere/src/cohere_haystack/embedders/text_embedder.py +++ b/integrations/cohere/src/cohere_haystack/embedders/text_embedder.py @@ -27,7 +27,7 @@ class CohereTextEmbedder: print(text_embedder.run(text_to_embed)) # {'embedding': [-0.453125, 1.2236328, 2.0058594, ...] - # 'metadata': {'api_version': {'version': '1'}, 'billed_units': {'input_tokens': 4}}} + # 'meta': {'api_version': {'version': '1'}, 'billed_units': {'input_tokens': 4}}} ``` """ @@ -101,7 +101,7 @@ def to_dict(self) -> Dict[str, Any]: timeout=self.timeout, ) - @component.output_types(embedding=List[float], metadata=Dict[str, Any]) + @component.output_types(embedding=List[float], meta=Dict[str, Any]) def run(self, text: str): """Embed a string.""" if not isinstance(text, str): @@ -126,4 +126,4 @@ def run(self, text: str): ) embedding, metadata = get_response(cohere_client, [text], self.model_name, self.input_type, self.truncate) - return {"embedding": embedding[0], "metadata": metadata} + return {"embedding": embedding[0], "meta": metadata} diff --git a/integrations/cohere/src/cohere_haystack/generator.py b/integrations/cohere/src/cohere_haystack/generator.py index a07225804..571464c0c 100644 --- a/integrations/cohere/src/cohere_haystack/generator.py +++ b/integrations/cohere/src/cohere_haystack/generator.py @@ -135,7 +135,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "CohereGenerator": data["init_parameters"]["streaming_callback"] = streaming_callback return default_from_dict(cls, data) - @component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) + @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) def run(self, prompt: str): """ Queries the LLM with the prompts to produce replies. @@ -153,12 +153,12 @@ def run(self, prompt: str): metadata_dict["finish_reason"] = response.finish_reason metadata = [metadata_dict] self._check_truncated_answers(metadata) - return {"replies": replies, "metadata": metadata} + return {"replies": replies, "meta": metadata} metadata = [{"finish_reason": resp.finish_reason} for resp in cast(Generations, response)] replies = [resp.text for resp in response] self._check_truncated_answers(metadata) - return {"replies": replies, "metadata": metadata} + return {"replies": replies, "meta": metadata} def _check_truncated_answers(self, metadata: List[Dict[str, Any]]): """ diff --git a/integrations/cohere/tests/test_cohere_generators.py b/integrations/cohere/tests/test_cohere_generators.py index 9462f364d..ec8027d96 100644 --- a/integrations/cohere/tests/test_cohere_generators.py +++ b/integrations/cohere/tests/test_cohere_generators.py @@ -119,8 +119,8 @@ def test_from_dict(self, monkeypatch): def test_check_truncated_answers(self, caplog): component = CohereGenerator(api_key="test-api-key") - metadata = [{"finish_reason": "MAX_TOKENS"}] - component._check_truncated_answers(metadata) + meta = [{"finish_reason": "MAX_TOKENS"}] + component._check_truncated_answers(meta) assert caplog.records[0].message == ( "Responses have been truncated before reaching a natural stopping point. " "Increase the max_tokens parameter to allow for longer completions." @@ -136,8 +136,8 @@ def test_cohere_generator_run(self): results = component.run(prompt="What's the capital of France?") assert len(results["replies"]) == 1 assert "Paris" in results["replies"][0] - assert len(results["metadata"]) == 1 - assert results["metadata"][0]["finish_reason"] == "COMPLETE" + assert len(results["meta"]) == 1 + assert results["meta"][0]["finish_reason"] == "COMPLETE" @pytest.mark.skipif( not os.environ.get("COHERE_API_KEY", None), @@ -174,6 +174,6 @@ def __call__(self, chunk): assert len(results["replies"]) == 1 assert "Paris" in results["replies"][0] - assert len(results["metadata"]) == 1 - assert results["metadata"][0]["finish_reason"] == "COMPLETE" + assert len(results["meta"]) == 1 + assert results["meta"][0]["finish_reason"] == "COMPLETE" assert callback.responses == results["replies"][0] diff --git a/integrations/cohere/tests/test_document_embedder.py b/integrations/cohere/tests/test_document_embedder.py index 5b0ad5c3f..02dbd4c3e 100644 --- a/integrations/cohere/tests/test_document_embedder.py +++ b/integrations/cohere/tests/test_document_embedder.py @@ -25,7 +25,7 @@ def test_init_default(self): assert embedder.timeout == 120 assert embedder.batch_size == 32 assert embedder.progress_bar is True - assert embedder.metadata_fields_to_embed == [] + assert embedder.meta_fields_to_embed == [] assert embedder.embedding_separator == "\n" def test_init_with_parameters(self): @@ -40,7 +40,7 @@ def test_init_with_parameters(self): timeout=60, batch_size=64, progress_bar=False, - metadata_fields_to_embed=["test_field"], + meta_fields_to_embed=["test_field"], embedding_separator="-", ) assert embedder.api_key == "test-api-key" @@ -53,7 +53,7 @@ def test_init_with_parameters(self): assert embedder.timeout == 60 assert embedder.batch_size == 64 assert embedder.progress_bar is False - assert embedder.metadata_fields_to_embed == ["test_field"] + assert embedder.meta_fields_to_embed == ["test_field"] assert embedder.embedding_separator == "-" def test_to_dict(self): @@ -71,7 +71,7 @@ def test_to_dict(self): "timeout": 120, "batch_size": 32, "progress_bar": True, - "metadata_fields_to_embed": [], + "meta_fields_to_embed": [], "embedding_separator": "\n", }, } @@ -88,7 +88,7 @@ def test_to_dict_with_custom_init_parameters(self): timeout=60, batch_size=64, progress_bar=False, - metadata_fields_to_embed=["text_field"], + meta_fields_to_embed=["text_field"], embedding_separator="-", ) component_dict = embedder_component.to_dict() @@ -104,7 +104,7 @@ def test_to_dict_with_custom_init_parameters(self): "timeout": 60, "batch_size": 64, "progress_bar": False, - "metadata_fields_to_embed": ["text_field"], + "meta_fields_to_embed": ["text_field"], "embedding_separator": "-", }, } @@ -139,4 +139,4 @@ def test_run_wrong_input_format(self): with pytest.raises(TypeError, match="CohereDocumentEmbedder expects a list of Documents as input"): embedder.run(documents=[1, 2, 3]) - assert embedder.run(documents=[]) == {"documents": [], "metadata": {}} + assert embedder.run(documents=[]) == {"documents": [], "meta": {}} diff --git a/integrations/google_vertex/src/google_vertex_haystack/generators/image_generator.py b/integrations/google_vertex/src/google_vertex_haystack/generators/image_generator.py index 67d270347..b69acdd6f 100644 --- a/integrations/google_vertex/src/google_vertex_haystack/generators/image_generator.py +++ b/integrations/google_vertex/src/google_vertex_haystack/generators/image_generator.py @@ -51,5 +51,5 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageGenerator": def run(self, prompt: str, negative_prompt: Optional[str] = None): negative_prompt = negative_prompt or self._kwargs.get("negative_prompt") res = self._model.generate_images(prompt=prompt, negative_prompt=negative_prompt, **self._kwargs) - images = [ByteStream(data=i._image_bytes, metadata=i.generation_parameters) for i in res.images] + images = [ByteStream(data=i._image_bytes, meta=i.generation_parameters) for i in res.images] return {"images": images} diff --git a/integrations/gradient/tests/test_gradient_rag_pipelines.py b/integrations/gradient/tests/test_gradient_rag_pipelines.py index 5835944a8..c5ec4affb 100644 --- a/integrations/gradient/tests/test_gradient_rag_pipelines.py +++ b/integrations/gradient/tests/test_gradient_rag_pipelines.py @@ -90,4 +90,4 @@ def test_gradient_embedding_retrieval_rag_pipeline(tmp_path): assert spyword in generated_answer.data assert generated_answer.query == question assert hasattr(generated_answer, "documents") - assert hasattr(generated_answer, "metadata") + assert hasattr(generated_answer, "meta") diff --git a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py index ecf3594e6..4afe87a3c 100644 --- a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py +++ b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py @@ -67,7 +67,7 @@ def __init__( batch_size: int = 32, progress_bar: bool = True, normalize_embeddings: bool = False, - metadata_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", ): """ @@ -91,7 +91,7 @@ def __init__( :param batch_size: Number of strings to encode at once. :param progress_bar: If true, displays progress bar during embedding. :param normalize_embeddings: If set to true, returned vectors will have the length of 1. - :param metadata_fields_to_embed: List of meta fields that should be embedded along with the Document content. + :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document content. :param embedding_separator: Separator used to concatenate the meta fields to the Document content. """ @@ -103,7 +103,7 @@ def __init__( self.batch_size = batch_size self.progress_bar = progress_bar self.normalize_embeddings = normalize_embeddings - self.metadata_fields_to_embed = metadata_fields_to_embed or [] + self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator def to_dict(self) -> Dict[str, Any]: @@ -119,7 +119,7 @@ def to_dict(self) -> Dict[str, Any]: batch_size=self.batch_size, progress_bar=self.progress_bar, normalize_embeddings=self.normalize_embeddings, - metadata_fields_to_embed=self.metadata_fields_to_embed, + meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, ) @@ -160,9 +160,7 @@ def run(self, documents: List[Document]): texts_to_embed = [] for doc in documents: meta_values_to_embed = [ - str(doc.meta[key]) - for key in self.metadata_fields_to_embed - if key in doc.meta and doc.meta[key] is not None + str(doc.meta[key]) for key in self.meta_fields_to_embed if key in doc.meta and doc.meta[key] is not None ] text_to_embed = [ self.instruction, diff --git a/integrations/instructor_embedders/tests/test_instructor_document_embedder.py b/integrations/instructor_embedders/tests/test_instructor_document_embedder.py index 53466ced2..e28df930b 100644 --- a/integrations/instructor_embedders/tests/test_instructor_document_embedder.py +++ b/integrations/instructor_embedders/tests/test_instructor_document_embedder.py @@ -20,7 +20,7 @@ def test_init_default(self): assert embedder.batch_size == 32 assert embedder.progress_bar is True assert embedder.normalize_embeddings is False - assert embedder.metadata_fields_to_embed == [] + assert embedder.meta_fields_to_embed == [] assert embedder.embedding_separator == "\n" def test_init_with_parameters(self): @@ -35,7 +35,7 @@ def test_init_with_parameters(self): batch_size=64, progress_bar=False, normalize_embeddings=True, - metadata_fields_to_embed=["test_field"], + meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) assert embedder.model_name_or_path == "hkunlp/instructor-base" @@ -45,7 +45,7 @@ def test_init_with_parameters(self): assert embedder.batch_size == 64 assert embedder.progress_bar is False assert embedder.normalize_embeddings is True - assert embedder.metadata_fields_to_embed == ["test_field"] + assert embedder.meta_fields_to_embed == ["test_field"] assert embedder.embedding_separator == " | " def test_to_dict(self): @@ -65,7 +65,7 @@ def test_to_dict(self): "progress_bar": True, "normalize_embeddings": False, "embedding_separator": "\n", - "metadata_fields_to_embed": [], + "meta_fields_to_embed": [], }, } @@ -81,7 +81,7 @@ def test_to_dict_with_custom_init_parameters(self): batch_size=64, progress_bar=False, normalize_embeddings=True, - metadata_fields_to_embed=["test_field"], + meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) embedder_dict = embedder.to_dict() @@ -95,7 +95,7 @@ def test_to_dict_with_custom_init_parameters(self): "batch_size": 64, "progress_bar": False, "normalize_embeddings": True, - "metadata_fields_to_embed": ["test_field"], + "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", }, } @@ -114,7 +114,7 @@ def test_from_dict(self): "batch_size": 32, "progress_bar": True, "normalize_embeddings": False, - "metadata_fields_to_embed": [], + "meta_fields_to_embed": [], "embedding_separator": "\n", }, } @@ -126,7 +126,7 @@ def test_from_dict(self): assert embedder.batch_size == 32 assert embedder.progress_bar is True assert embedder.normalize_embeddings is False - assert embedder.metadata_fields_to_embed == [] + assert embedder.meta_fields_to_embed == [] assert embedder.embedding_separator == "\n" def test_from_dict_with_custom_init_parameters(self): @@ -143,7 +143,7 @@ def test_from_dict_with_custom_init_parameters(self): "batch_size": 64, "progress_bar": False, "normalize_embeddings": True, - "metadata_fields_to_embed": ["test_field"], + "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", }, } @@ -155,7 +155,7 @@ def test_from_dict_with_custom_init_parameters(self): assert embedder.batch_size == 64 assert embedder.progress_bar is False assert embedder.normalize_embeddings is True - assert embedder.metadata_fields_to_embed == ["test_field"] + assert embedder.meta_fields_to_embed == ["test_field"] assert embedder.embedding_separator == " | " @patch("instructor_embedders_haystack.instructor_document_embedder._InstructorEmbeddingBackendFactory") @@ -223,7 +223,7 @@ def test_embed_metadata(self): embedder = InstructorDocumentEmbedder( model_name_or_path="model", instruction="Represent the financial document for retrieval", - metadata_fields_to_embed=["meta_field"], + meta_fields_to_embed=["meta_field"], embedding_separator="\n", ) embedder.embedding_backend = MagicMock() diff --git a/integrations/jina/src/jina_haystack/document_embedder.py b/integrations/jina/src/jina_haystack/document_embedder.py index f030a82c0..03f64462f 100644 --- a/integrations/jina/src/jina_haystack/document_embedder.py +++ b/integrations/jina/src/jina_haystack/document_embedder.py @@ -41,7 +41,7 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - metadata_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", ): """ @@ -54,7 +54,7 @@ def __init__( :param batch_size: Number of Documents to encode at once. :param progress_bar: Whether to show a progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. - :param metadata_fields_to_embed: List of meta fields that should be embedded along with the Document text. + :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. """ # if the user does not provide the API key, check if it is set in the module client @@ -75,7 +75,7 @@ def __init__( self.suffix = suffix self.batch_size = batch_size self.progress_bar = progress_bar - self.metadata_fields_to_embed = metadata_fields_to_embed or [] + self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator self._session = requests.Session() self._session.headers.update( @@ -104,7 +104,7 @@ def to_dict(self) -> Dict[str, Any]: suffix=self.suffix, batch_size=self.batch_size, progress_bar=self.progress_bar, - metadata_fields_to_embed=self.metadata_fields_to_embed, + meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, ) @@ -115,9 +115,7 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: texts_to_embed = [] for doc in documents: meta_values_to_embed = [ - str(doc.meta[key]) - for key in self.metadata_fields_to_embed - if key in doc.meta and doc.meta[key] is not None + str(doc.meta[key]) for key in self.meta_fields_to_embed if key in doc.meta and doc.meta[key] is not None ] text_to_embed = ( self.prefix + self.embedding_separator.join([*meta_values_to_embed, doc.content or ""]) + self.suffix @@ -155,7 +153,7 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List return all_embeddings, metadata - @component.output_types(documents=List[Document], metadata=Dict[str, Any]) + @component.output_types(documents=List[Document], meta=Dict[str, Any]) def run(self, documents: List[Document]): """ Embed a list of Documents. @@ -177,4 +175,4 @@ def run(self, documents: List[Document]): for doc, emb in zip(documents, embeddings): doc.embedding = emb - return {"documents": documents, "metadata": metadata} + return {"documents": documents, "meta": metadata} diff --git a/integrations/jina/src/jina_haystack/text_embedder.py b/integrations/jina/src/jina_haystack/text_embedder.py index b24cf8518..5b29bef6d 100644 --- a/integrations/jina/src/jina_haystack/text_embedder.py +++ b/integrations/jina/src/jina_haystack/text_embedder.py @@ -26,8 +26,8 @@ class JinaTextEmbedder: print(text_embedder.run(text_to_embed)) # {'embedding': [0.017020374536514282, -0.023255806416273117, ...], - # 'metadata': {'model': 'jina-embeddings-v2-base-en', - # 'usage': {'prompt_tokens': 4, 'total_tokens': 4}}} + # 'meta': {'model': 'jina-embeddings-v2-base-en', + # 'usage': {'prompt_tokens': 4, 'total_tokens': 4}}} ``` """ @@ -84,7 +84,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, model_name=self.model_name, prefix=self.prefix, suffix=self.suffix) - @component.output_types(embedding=List[float], metadata=Dict[str, Any]) + @component.output_types(embedding=List[float], meta=Dict[str, Any]) def run(self, text: str): """Embed a string.""" if not isinstance(text, str): @@ -103,4 +103,4 @@ def run(self, text: str): metadata = {"model": resp["model"], "usage": dict(resp["usage"].items())} embedding = resp["data"][0]["embedding"] - return {"embedding": embedding, "metadata": metadata} + return {"embedding": embedding, "meta": metadata} diff --git a/integrations/jina/tests/test_document_embedder.py b/integrations/jina/tests/test_document_embedder.py index c32ffd500..ac8bb6975 100644 --- a/integrations/jina/tests/test_document_embedder.py +++ b/integrations/jina/tests/test_document_embedder.py @@ -34,7 +34,7 @@ def test_init_default(self, monkeypatch): assert embedder.suffix == "" assert embedder.batch_size == 32 assert embedder.progress_bar is True - assert embedder.metadata_fields_to_embed == [] + assert embedder.meta_fields_to_embed == [] assert embedder.embedding_separator == "\n" def test_init_with_parameters(self): @@ -45,7 +45,7 @@ def test_init_with_parameters(self): suffix="suffix", batch_size=64, progress_bar=False, - metadata_fields_to_embed=["test_field"], + meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) assert embedder.model_name == "model" @@ -53,7 +53,7 @@ def test_init_with_parameters(self): assert embedder.suffix == "suffix" assert embedder.batch_size == 64 assert embedder.progress_bar is False - assert embedder.metadata_fields_to_embed == ["test_field"] + assert embedder.meta_fields_to_embed == ["test_field"] assert embedder.embedding_separator == " | " def test_init_fail_wo_api_key(self, monkeypatch): @@ -72,7 +72,7 @@ def test_to_dict(self): "suffix": "", "batch_size": 32, "progress_bar": True, - "metadata_fields_to_embed": [], + "meta_fields_to_embed": [], "embedding_separator": "\n", }, } @@ -85,7 +85,7 @@ def test_to_dict_with_custom_init_parameters(self): suffix="suffix", batch_size=64, progress_bar=False, - metadata_fields_to_embed=["test_field"], + meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) data = component.to_dict() @@ -97,7 +97,7 @@ def test_to_dict_with_custom_init_parameters(self): "suffix": "suffix", "batch_size": 64, "progress_bar": False, - "metadata_fields_to_embed": ["test_field"], + "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", }, } @@ -108,7 +108,7 @@ def test_prepare_texts_to_embed_w_metadata(self): ] embedder = JinaDocumentEmbedder( - api_key="fake-api-key", metadata_fields_to_embed=["meta_field"], embedding_separator=" | " + api_key="fake-api-key", meta_fields_to_embed=["meta_field"], embedding_separator=" | " ) prepared_texts = embedder._prepare_texts_to_embed(documents) @@ -167,14 +167,14 @@ def test_run(self): model_name=model, prefix="prefix ", suffix=" suffix", - metadata_fields_to_embed=["topic"], + meta_fields_to_embed=["topic"], embedding_separator=" | ", ) result = embedder.run(documents=docs) documents_with_embeddings = result["documents"] - metadata = result["metadata"] + metadata = result["meta"] assert isinstance(documents_with_embeddings, list) assert len(documents_with_embeddings) == len(docs) @@ -197,7 +197,7 @@ def test_run_custom_batch_size(self): model_name=model, prefix="prefix ", suffix=" suffix", - metadata_fields_to_embed=["topic"], + meta_fields_to_embed=["topic"], embedding_separator=" | ", batch_size=1, ) @@ -205,7 +205,7 @@ def test_run_custom_batch_size(self): result = embedder.run(documents=docs) documents_with_embeddings = result["documents"] - metadata = result["metadata"] + metadata = result["meta"] assert isinstance(documents_with_embeddings, list) assert len(documents_with_embeddings) == len(docs) diff --git a/integrations/jina/tests/test_text_embedder.py b/integrations/jina/tests/test_text_embedder.py index 14476487e..c8a730c2f 100644 --- a/integrations/jina/tests/test_text_embedder.py +++ b/integrations/jina/tests/test_text_embedder.py @@ -86,7 +86,7 @@ def test_run(self): assert len(result["embedding"]) == 3 assert all(isinstance(x, float) for x in result["embedding"]) - assert result["metadata"] == { + assert result["meta"] == { "model": "jina-embeddings-v2-base-en", "usage": {"prompt_tokens": 6, "total_tokens": 6}, }