diff --git a/haystack/nodes/preprocessor/preprocessor.py b/haystack/nodes/preprocessor/preprocessor.py index 9dd21978ce..56e6b310bd 100644 --- a/haystack/nodes/preprocessor/preprocessor.py +++ b/haystack/nodes/preprocessor/preprocessor.py @@ -120,7 +120,7 @@ def __init__( nltk.data.find("tokenizers/punkt") except LookupError: try: - nltk.download("punkt") + nltk.download("punkt_tab") except FileExistsError as error: logger.debug("NLTK punkt tokenizer seems to be already downloaded. Error message: %s", error) pass diff --git a/pyproject.toml b/pyproject.toml index 42b8acda5a..7f9227e024 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,7 +147,7 @@ crawler = [ "selenium>=4.11.0" ] preprocessing = [ - "nltk", + "nltk>=3.9", "langdetect", # for language classification ] file-conversion = [ diff --git a/releasenotes/notes/upgrade-ntlk-1e94de2d6f5dd3b6.yaml b/releasenotes/notes/upgrade-ntlk-1e94de2d6f5dd3b6.yaml new file mode 100644 index 0000000000..b17abeb5cf --- /dev/null +++ b/releasenotes/notes/upgrade-ntlk-1e94de2d6f5dd3b6.yaml @@ -0,0 +1,2 @@ +fixes: + - Upgrades ntlk to 3.9 as prior versions are affect by https://nvd.nist.gov/vuln/detail/CVE-2024-39705