diff --git a/sdks/python/apache_beam/ml/rag/embeddings/huggingface.py b/sdks/python/apache_beam/ml/rag/embeddings/huggingface.py index 68468ad1875b..8355c3e5a2a5 100644 --- a/sdks/python/apache_beam/ml/rag/embeddings/huggingface.py +++ b/sdks/python/apache_beam/ml/rag/embeddings/huggingface.py @@ -29,20 +29,14 @@ class HuggingfaceTextEmbeddings(EmbeddingsManager): - """SentenceTransformer embeddings for RAG pipeline. - - Extends EmbeddingsManager to work with RAG-specific types: - - Input: Chunk objects containing text to embed - - Output: Chunk objects with embedding property set - """ def __init__( self, model_name: str, *, max_seq_length: Optional[int] = None, **kwargs): - """Initialize RAG embeddings. - + """Utilizes huggingface SentenceTransformer embeddings for RAG pipeline. + Args: model_name: Name of the sentence-transformers model to use max_seq_length: Maximum sequence length for the model - **kwargs: Additional arguments passed to parent + **kwargs: Additional arguments including ModelHandlers arguments """ super().__init__(type_adapter=create_rag_adapter(), **kwargs) self.model_name = model_name diff --git a/sdks/python/apache_beam/ml/rag/types.py b/sdks/python/apache_beam/ml/rag/types.py index 5d7d8b486739..79429899e4c1 100644 --- a/sdks/python/apache_beam/ml/rag/types.py +++ b/sdks/python/apache_beam/ml/rag/types.py @@ -34,6 +34,9 @@ @dataclass class Content: """Container for embeddable content. Add new types as when as necessary. + + Args: + text: Text content to be embedded """ text: Optional[str] = None @@ -42,7 +45,7 @@ class Content: class Embedding: """Represents vector embeddings. - Attributes: + Args: dense_embedding: Dense vector representation sparse_embedding: Optional sparse vector representation for hybrid search @@ -56,7 +59,7 @@ class Embedding: class Chunk: """Represents a chunk of embeddable content with metadata. - Attributes: + Args: content: The actual content of the chunk id: Unique identifier for the chunk index: Index of this chunk within the original document