From 6a992d3bac01327b01f6d828f848fd21a77f8d99 Mon Sep 17 00:00:00 2001 From: "ilya.michlin" Date: Thu, 23 Nov 2023 22:25:06 +0200 Subject: [PATCH] force_chunk_size --- libs/langchain/langchain/text_splitter.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/text_splitter.py b/libs/langchain/langchain/text_splitter.py index b8ec86f55fa09..34e441e453885 100644 --- a/libs/langchain/langchain/text_splitter.py +++ b/libs/langchain/langchain/text_splitter.py @@ -903,7 +903,7 @@ def __init__( self, separators: Optional[List[str]] = None, keep_separator: bool = True, - is_separator_regex: bool = True, + is_separator_regex: bool = False, **kwargs: Any, ) -> None: """Create a new TextSplitter. @@ -913,13 +913,16 @@ def __init__( keep_separator: Whether to keep the separator in the chunks is_separator_regex: Whether the separator is a regex """ + if not separators: + separators = ["\s+"] + is_separator_regex = True + super().__init__( separator="", is_separator_regex=is_separator_regex, keep_separator=keep_separator, **kwargs, ) - separators = separators or ["\s+"] # If the separator is a regex, we don't need to escape it. if not self._is_separator_regex: