From 70ed7e61cf96fd3c5683acbad0dda6de08f1a284 Mon Sep 17 00:00:00 2001 From: Nicola Procopio Date: Mon, 12 Feb 2024 18:14:48 +0100 Subject: [PATCH] fix: from numpy float to float (#391) * created project * added parallel param * updated test * version 0.0.1 * renamed folder * removed print * updated readme * added fastembed.yml * fix typos * python version to 3.9 for lint * updated file * force install black * return to original file * try to fix workflow * retry * add missing info to pyproject * add hatch-vcs to check version * Update pyproject.toml * fixed typos * removed python 3.9 * Update fastembed.yml * Update fastembed_document_embedder.py * Update fastembed_text_embedder.py * ignore errors for bool arguments * fix * try moving noqa * move noqa * formatted with black * added numpy dependency * removed numpy * removed numpy * make mypy happy * Update fastembed_backend.py * removed classvar * fix * Update pyproject.toml * added import numpy lint * skip docs generation for the time being * Update README.md * added config.yml * generate docs * Update fastembed.yml * Update config.yml * rm unnecessary from_dict * final touch * updated labeler.yml * updated library readme * fix typos * fix docstrings/README * added prefix and suffix * fixed typos * Update fastembed_text_embedder.py from numpy float to float * Update fastembed_document_embedder.py from numpy float to float * Update test_fastembed_text_embedder.py from numpy float to float * Update test_fastembed_document_embedder.py from numpy float to float * Update fastembed_document_embedder.py fix typos * Update fastembed_text_embedder.py added if in run * Update fastembed_document_embedder.py added if into run * modify backend --------- Co-authored-by: Stefano Fiorucci --- .../embedders/fastembed/embedding_backend/fastembed_backend.py | 3 ++- .../embedders/fastembed/fastembed_document_embedder.py | 2 +- .../fastembed/tests/test_fastembed_document_embedder.py | 2 +- integrations/fastembed/tests/test_fastembed_text_embedder.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py index 392f9d32d..ee51283e6 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py @@ -38,5 +38,6 @@ def __init__( self.model = TextEmbedding(model_name=model_name) def embed(self, data: List[List[str]], **kwargs) -> List[List[float]]: - embeddings = list(self.model.embed(data, **kwargs)) + # the embed method returns a Iterable[np.ndarray], so we convert it to a list of lists + embeddings = [np_array.tolist() for np_array in self.model.embed(data, **kwargs)] return embeddings diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py index 03dc301b9..b913b0de4 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py @@ -142,6 +142,6 @@ def run(self, documents: List[Document]): ) for doc, emb in zip(documents, embeddings): - doc.embedding = list(emb) + doc.embedding = emb return {"documents": documents} diff --git a/integrations/fastembed/tests/test_fastembed_document_embedder.py b/integrations/fastembed/tests/test_fastembed_document_embedder.py index 6dd1b6e52..597999354 100644 --- a/integrations/fastembed/tests/test_fastembed_document_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_document_embedder.py @@ -248,4 +248,4 @@ def test_run(self): assert isinstance(embedding, list) assert len(embedding) == 384 - assert all(isinstance(emb.item(), float) for emb in embedding) + assert all(isinstance(emb, float) for emb in embedding) diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py index 465f17976..3a7588263 100644 --- a/integrations/fastembed/tests/test_fastembed_text_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_text_embedder.py @@ -186,4 +186,4 @@ def test_run(self): assert isinstance(embedding, list) assert len(embedding) == 384 - assert all(isinstance(emb.item(), float) for emb in embedding) + assert all(isinstance(emb, float) for emb in embedding)