From 3a9d290cfc3c1a04937b6726aeba09c1d2a8764d Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 17 Dec 2024 17:49:31 +0100 Subject: [PATCH] updating docstring --- haystack/components/preprocessors/recursive_splitter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/haystack/components/preprocessors/recursive_splitter.py b/haystack/components/preprocessors/recursive_splitter.py index a40cd15eef..11754d5434 100644 --- a/haystack/components/preprocessors/recursive_splitter.py +++ b/haystack/components/preprocessors/recursive_splitter.py @@ -62,9 +62,10 @@ def __init__( # pylint: disable=too-many-positional-arguments """ Initializes a RecursiveDocumentSplitter. - :param split_length: The maximum length of each chunk in characters. + :param split_length: The maximum length of each chunk by default in characters, but can be in words. + See the `split_units` parameter. :param split_overlap: The number of characters to overlap between consecutive chunks. - :param split_units: The unit of the split_length parameter. It can be either "words" or "char". + :param split_units: The unit of the split_length parameter. It can be either "word" or "char". :param separators: An optional list of separator strings to use for splitting the text. The string separators will be treated as regular expressions unless the separator is "sentence", in that case the text will be split into sentences using a custom sentence tokenizer based on NLTK.