Skip to content

Commit

Permalink
Jina - review docstrings (#504)
Browse files Browse the repository at this point in the history
* wip

* jina - review docstrings

* requested changes
  • Loading branch information
anakin87 authored Feb 29, 2024
1 parent 3ddb10a commit f49523e
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class JinaDocumentEmbedder:
Usage example:
```python
from haystack import Document
from jina_haystack import JinaDocumentEmbedder
from haystack_integrations.components.embedders.jina import JinaDocumentEmbedder
doc = Document(content="I love pizza!")
# Make sure that the environment variable JINA_API_KEY is set
document_embedder = JinaDocumentEmbedder()
doc = Document(content="I love pizza!")
result = document_embedder.run([doc])
print(result['documents'][0].embedding)
Expand All @@ -46,8 +48,10 @@ def __init__(
):
"""
Create a JinaDocumentEmbedder component.
:param api_key: The Jina API key.
:param model: The name of the Jina model to use. Check the list of available models on `https://jina.ai/embeddings/`
:param model: The name of the Jina model to use.
Check the list of available models on [Jina documentation](https://jina.ai/embeddings/).
:param prefix: A string to add to the beginning of each text.
:param suffix: A string to add to the end of each text.
:param batch_size: Number of Documents to encode at once.
Expand Down Expand Up @@ -83,8 +87,9 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> Dict[str, Any]:
"""
This method overrides the default serializer in order to avoid leaking the `api_key` value passed
to the constructor.
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
Expand All @@ -100,6 +105,13 @@ def to_dict(self) -> Dict[str, Any]:

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentEmbedder":
"""
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data)

Expand Down Expand Up @@ -151,10 +163,13 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List
@component.output_types(documents=List[Document], meta=Dict[str, Any])
def run(self, documents: List[Document]):
"""
Embed a list of Documents.
The embedding of each Document is stored in the `embedding` field of the Document.
Compute the embeddings for a list of Documents.
:param documents: A list of Documents to embed.
:returns: A dictionary with following keys:
- `documents`: List of Documents, each with an `embedding` field containing the computed embedding.
- `meta`: A dictionary with metadata including the model name and usage statistics.
:raises TypeError: If the input is not a list of Documents.
"""
if not isinstance(documents, list) or documents and not isinstance(documents[0], Document):
msg = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,18 @@
@component
class JinaTextEmbedder:
"""
A component for embedding strings using Jina models.
A component for embedding strings using Jina AI models.
Usage example:
```python
from jina_haystack import JinaTextEmbedder
from haystack_integrations.components.embedders.jina import JinaTextEmbedder
text_to_embed = "I love pizza!"
# Make sure that the environment variable JINA_API_KEY is set
text_embedder = JinaTextEmbedder()
text_to_embed = "I love pizza!"
print(text_embedder.run(text_to_embed))
# {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
Expand All @@ -39,11 +41,12 @@ def __init__(
suffix: str = "",
):
"""
Create an JinaTextEmbedder component.
Create a JinaTextEmbedder component.
:param api_key: The Jina API key. It can be explicitly provided or automatically read from the
environment variable JINA_API_KEY (recommended).
:param model: The name of the Jina model to use. Check the list of available models on `https://jina.ai/embeddings/`
environment variable `JINA_API_KEY` (recommended).
:param model: The name of the Jina model to use.
Check the list of available models on [Jina documentation](https://jina.ai/embeddings/).
:param prefix: A string to add to the beginning of each text.
:param suffix: A string to add to the end of each text.
"""
Expand Down Expand Up @@ -71,22 +74,37 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> Dict[str, Any]:
"""
This method overrides the default serializer in order to avoid leaking the `api_key` value passed
to the constructor.
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""

return default_to_dict(
self, api_key=self.api_key.to_dict(), model=self.model_name, prefix=self.prefix, suffix=self.suffix
)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "JinaTextEmbedder":
"""
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data)

@component.output_types(embedding=List[float], meta=Dict[str, Any])
def run(self, text: str):
"""Embed a string."""
"""
Embed a string.
:param text: The string to embed.
:returns: A dictionary with following keys:
- `embedding`: The embedding of the input string.
- `meta`: A dictionary with metadata including the model name and usage statistics.
:raises TypeError: If the input is not a string.
"""
if not isinstance(text, str):
msg = (
"JinaTextEmbedder expects a string as an input."
Expand Down

0 comments on commit f49523e

Please sign in to comment.