Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add File type to preview package #5873

Merged
merged 6 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion haystack/preview/dataclasses/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from haystack.preview.dataclasses.document import Document
from haystack.preview.dataclasses.answer import ExtractedAnswer, GeneratedAnswer, Answer
from haystack.preview.dataclasses.blob import Blob

__all__ = ["Document", "ExtractedAnswer", "GeneratedAnswer", "Answer"]
__all__ = ["Document", "ExtractedAnswer", "GeneratedAnswer", "Answer", "Blob"]
47 changes: 47 additions & 0 deletions haystack/preview/dataclasses/blob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import io
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Any


@dataclass(frozen=True)
class ByteStream:
"""
Base data class representing a binary object in the Haystack API.
"""

data: bytes
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)

def to_file(self, destination_path: Path):
with open(destination_path, "wb") as fd:
fd.write(self.data)

@classmethod
def from_file_path(cls, filepath: Path) -> "ByteStream":
"""
Create a ByteStream from the contents read from a file.

:param filepath: A valid path to a file.
"""
with open(filepath, "rb") as fd:
return cls(data=fd.read())

@classmethod
def from_string(cls, text: str, encoding: str = "utf-8") -> "ByteStream":
"""
Create a ByteStream encoding a string.

:param text: The string to encode
:param encoding: The encoding used to convert the string into bytes
"""
return cls(data=text.encode(encoding))

@classmethod
def from_stream(cls, stream: io.BytesIO) -> "ByteStream":
"""
Create a ByteStream from a file-like object.

:param stream: The stream where bytes will be read from.
"""
return cls(data=stream.read())
5 changes: 5 additions & 0 deletions releasenotes/notes/add-blob-type-2a9476a39841f54d.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
preview:
- |
Add Blob type to send binary raw data across components
in a pipeline.
23 changes: 23 additions & 0 deletions test/preview/dataclasses/test_blob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from haystack.preview.dataclasses import Blob

import pytest


@pytest.mark.unit
def test_from_file_path(tmp_path, request):
test_bytes = "Hello, world!\n".encode()
test_path = tmp_path / request.node.name
with open(test_path, "wb") as fd:
assert fd.write(test_bytes)

f = Blob.from_file_path(test_path)
assert f.data == test_bytes


def test_save(tmp_path, request):
test_str = "Hello, world!\n"
test_path = tmp_path / request.node.name

Blob(test_str.encode()).save(test_path)
with open(test_path, "rb") as fd:
assert fd.read().decode() == test_str