From 6cceaac15f029d629f2be5ac6412b0121220f3db Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 12 Dec 2024 15:16:54 +0100 Subject: [PATCH] docs: add deprecation warning nltk document splitter (#8628) * adding deprecation warning * adding release notes * adding release notes * updating message * Update haystack/components/preprocessors/nltk_document_splitter.py Co-authored-by: Daria Fokina --------- Co-authored-by: Daria Fokina --- .../components/preprocessors/nltk_document_splitter.py | 8 ++++++++ ...deprecating-NLTKDocumentSplitter-e9a621e025e9a49f.yaml | 4 ++++ 2 files changed, 12 insertions(+) create mode 100644 releasenotes/notes/deprecating-NLTKDocumentSplitter-e9a621e025e9a49f.yaml diff --git a/haystack/components/preprocessors/nltk_document_splitter.py b/haystack/components/preprocessors/nltk_document_splitter.py index d6f947ebfc..eb242d9013 100644 --- a/haystack/components/preprocessors/nltk_document_splitter.py +++ b/haystack/components/preprocessors/nltk_document_splitter.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import warnings from copy import deepcopy from typing import Any, Callable, Dict, List, Literal, Optional, Tuple @@ -52,6 +53,13 @@ def __init__( # pylint: disable=too-many-positional-arguments representing the chunks after splitting. """ + warnings.warn( + "The NLTKDocumentSplitter is deprecated and will be removed in the next release. " + "See DocumentSplitter which now supports the functionalities of the NLTKDocumentSplitter, i.e.: " + "using NLTK to detect sentence boundaries.", + DeprecationWarning, + ) + super(NLTKDocumentSplitter, self).__init__( split_by=split_by, split_length=split_length, diff --git a/releasenotes/notes/deprecating-NLTKDocumentSplitter-e9a621e025e9a49f.yaml b/releasenotes/notes/deprecating-NLTKDocumentSplitter-e9a621e025e9a49f.yaml new file mode 100644 index 0000000000..e0331a00af --- /dev/null +++ b/releasenotes/notes/deprecating-NLTKDocumentSplitter-e9a621e025e9a49f.yaml @@ -0,0 +1,4 @@ +--- +deprecations: + - | + The NLTKDocumentSplitter will deprecated and will be removed in the next release. The DocumentSplitter will instead support the functionality of the NLTKDocumentSplitter.