Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Update Route docs validation to allow more types #6942

Merged
merged 3 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion haystack/nodes/other/route_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class RouteDocuments(BaseComponent):
def __init__(
self,
split_by: str = "content_type",
metadata_values: Optional[Union[List[str], List[List[str]]]] = None,
metadata_values: Optional[Union[List[Union[str, bool, int]], List[List[Union[str, bool, int]]]]] = None,
return_remaining: bool = False,
):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
fixes:
- |
The types of meta data values accepted by RouteDocuments was unnecessarily restricted to string types.
This causes validation errors (for example when loading from a yaml file) if a user tries to use a boolean type for example.
We add boolean and int types as valid types for metadata_values.
36 changes: 35 additions & 1 deletion test/nodes/test_route_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_routedocuments_by_content_type_return_remaining(docs_diff_types):


@pytest.mark.unit
def test_routedocuments_by_metafield(docs):
def test_routedocuments_by_metafield_str(docs):
route_documents = RouteDocuments(split_by="meta_field", metadata_values=["test1", "test3", "test5"])
assert route_documents.outgoing_edges == 3
result, _ = route_documents.run(docs)
Expand All @@ -65,6 +65,40 @@ def test_routedocuments_by_metafield(docs):
assert result["output_3"][0].meta["meta_field"] == "test5"


@pytest.mark.unit
def test_routedocuments_by_metafield_int():
docs = [
Document(content="doc 1", meta={"meta_field": 1}),
Document(content="doc 2", meta={"meta_field": 1}),
Document(content="doc 3", meta={"meta_field": 2}),
]
route_documents = RouteDocuments(split_by="meta_field", metadata_values=[1, 2])
assert route_documents.outgoing_edges == 2
result, _ = route_documents.run(docs)
assert len(result["output_1"]) == 2
assert len(result["output_2"]) == 1
assert "output_4" not in result
assert result["output_1"][0].meta["meta_field"] == 1
assert result["output_2"][0].meta["meta_field"] == 2


@pytest.mark.unit
def test_routedocuments_by_metafield_bool():
docs = [
Document(content="doc 1", meta={"meta_field": True}),
Document(content="doc 2", meta={"meta_field": True}),
Document(content="doc 3", meta={"meta_field": False}),
]
route_documents = RouteDocuments(split_by="meta_field", metadata_values=[True, False])
assert route_documents.outgoing_edges == 2
result, _ = route_documents.run(docs)
assert len(result["output_1"]) == 2
assert len(result["output_2"]) == 1
assert "output_4" not in result
assert result["output_1"][0].meta["meta_field"] == True
assert result["output_2"][0].meta["meta_field"] == False


@pytest.mark.unit
def test_routedocuments_by_metafield_return_remaning(docs):
route_documents = RouteDocuments(
Expand Down
Loading