diff --git a/haystack/nodes/other/route_documents.py b/haystack/nodes/other/route_documents.py index e504d05e29..cd4ddfee46 100644 --- a/haystack/nodes/other/route_documents.py +++ b/haystack/nodes/other/route_documents.py @@ -20,7 +20,7 @@ class RouteDocuments(BaseComponent): def __init__( self, split_by: str = "content_type", - metadata_values: Optional[Union[List[str], List[List[str]]]] = None, + metadata_values: Optional[Union[List[Union[str, bool, int]], List[List[Union[str, bool, int]]]]] = None, return_remaining: bool = False, ): """ diff --git a/releasenotes/notes/route-documents-metadata-values-types-7b6bdbc916d2624b.yaml b/releasenotes/notes/route-documents-metadata-values-types-7b6bdbc916d2624b.yaml new file mode 100644 index 0000000000..f5eb9b1551 --- /dev/null +++ b/releasenotes/notes/route-documents-metadata-values-types-7b6bdbc916d2624b.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + The types of meta data values accepted by RouteDocuments was unnecessarily restricted to string types. + This causes validation errors (for example when loading from a yaml file) if a user tries to use a boolean type for example. + We add boolean and int types as valid types for metadata_values. diff --git a/test/nodes/test_route_documents.py b/test/nodes/test_route_documents.py index 2100aed964..2f9ea26338 100644 --- a/test/nodes/test_route_documents.py +++ b/test/nodes/test_route_documents.py @@ -52,7 +52,7 @@ def test_routedocuments_by_content_type_return_remaining(docs_diff_types): @pytest.mark.unit -def test_routedocuments_by_metafield(docs): +def test_routedocuments_by_metafield_str(docs): route_documents = RouteDocuments(split_by="meta_field", metadata_values=["test1", "test3", "test5"]) assert route_documents.outgoing_edges == 3 result, _ = route_documents.run(docs) @@ -65,6 +65,40 @@ def test_routedocuments_by_metafield(docs): assert result["output_3"][0].meta["meta_field"] == "test5" +@pytest.mark.unit +def test_routedocuments_by_metafield_int(): + docs = [ + Document(content="doc 1", meta={"meta_field": 1}), + Document(content="doc 2", meta={"meta_field": 1}), + Document(content="doc 3", meta={"meta_field": 2}), + ] + route_documents = RouteDocuments(split_by="meta_field", metadata_values=[1, 2]) + assert route_documents.outgoing_edges == 2 + result, _ = route_documents.run(docs) + assert len(result["output_1"]) == 2 + assert len(result["output_2"]) == 1 + assert "output_4" not in result + assert result["output_1"][0].meta["meta_field"] == 1 + assert result["output_2"][0].meta["meta_field"] == 2 + + +@pytest.mark.unit +def test_routedocuments_by_metafield_bool(): + docs = [ + Document(content="doc 1", meta={"meta_field": True}), + Document(content="doc 2", meta={"meta_field": True}), + Document(content="doc 3", meta={"meta_field": False}), + ] + route_documents = RouteDocuments(split_by="meta_field", metadata_values=[True, False]) + assert route_documents.outgoing_edges == 2 + result, _ = route_documents.run(docs) + assert len(result["output_1"]) == 2 + assert len(result["output_2"]) == 1 + assert "output_4" not in result + assert result["output_1"][0].meta["meta_field"] == True + assert result["output_2"][0].meta["meta_field"] == False + + @pytest.mark.unit def test_routedocuments_by_metafield_return_remaning(docs): route_documents = RouteDocuments(