From 176db5dbf9d5be87122e3feafa19593fed418cde Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 13 Dec 2024 12:12:40 +0100 Subject: [PATCH] initial import (#8635) --- e2e/pipelines/test_dense_doc_search.py | 2 +- e2e/pipelines/test_preprocessing_pipeline.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/e2e/pipelines/test_dense_doc_search.py b/e2e/pipelines/test_dense_doc_search.py index 39a587a106..f348b6f0e5 100644 --- a/e2e/pipelines/test_dense_doc_search.py +++ b/e2e/pipelines/test_dense_doc_search.py @@ -26,7 +26,7 @@ def test_dense_doc_search_pipeline(tmp_path, samples_path): indexing_pipeline.add_component(instance=DocumentJoiner(), name="joiner") indexing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner") indexing_pipeline.add_component( - instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter" + instance=DocumentSplitter(split_by="period", split_length=250, split_overlap=30), name="splitter" ) indexing_pipeline.add_component( instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder" diff --git a/e2e/pipelines/test_preprocessing_pipeline.py b/e2e/pipelines/test_preprocessing_pipeline.py index 82375f89d8..8894113913 100644 --- a/e2e/pipelines/test_preprocessing_pipeline.py +++ b/e2e/pipelines/test_preprocessing_pipeline.py @@ -25,9 +25,7 @@ def test_preprocessing_pipeline(tmp_path): instance=MetadataRouter(rules={"en": {"field": "language", "operator": "==", "value": "en"}}), name="router" ) preprocessing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner") - preprocessing_pipeline.add_component( - instance=DocumentSplitter(split_by="sentence", split_length=1), name="splitter" - ) + preprocessing_pipeline.add_component(instance=DocumentSplitter(split_by="period", split_length=1), name="splitter") preprocessing_pipeline.add_component( instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder" )