Skip to content

Commit

Permalink
Make mypy happy
Browse files Browse the repository at this point in the history
  • Loading branch information
sjrl committed Dec 17, 2024
1 parent 4f457b5 commit 649a2dc
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions haystack/components/preprocessors/nltk_document_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def warm_up(self):
)

def _split_into_units(
self, text: str, split_by: Literal["function", "page", "passage", "sentence", "word", "line"]
self, text: str, split_by: Literal["function", "page", "passage", "period", "sentence", "word", "line"]
) -> List[str]:
"""
Splits the text into units based on the specified split_by parameter.
Expand All @@ -113,6 +113,7 @@ def _split_into_units(
# whitespace is preserved while splitting text into sentences when using keep_white_spaces=True
# so split_at is set to an empty string
self.split_at = ""
assert self.sentence_splitter is not None
result = self.sentence_splitter.split_sentences(text)
units = [sentence["sentence"] for sentence in result]
elif split_by == "word":
Expand Down Expand Up @@ -228,8 +229,9 @@ def _number_of_sentences_to_keep(sentences: List[str], split_length: int, split_
break
return num_sentences_to_keep

@staticmethod
def _concatenate_sentences_based_on_word_amount(
self, sentences: List[str], split_length: int, split_overlap: int
sentences: List[str], split_length: int, split_overlap: int
) -> Tuple[List[str], List[int], List[int]]:
"""
Groups the sentences into chunks of `split_length` words while respecting sentence boundaries.
Expand Down Expand Up @@ -265,7 +267,7 @@ def _concatenate_sentences_based_on_word_amount(
split_start_indices.append(chunk_start_idx)

# Get the number of sentences that overlap with the next chunk
num_sentences_to_keep = self._number_of_sentences_to_keep(
num_sentences_to_keep = NLTKDocumentSplitter._number_of_sentences_to_keep(
sentences=current_chunk, split_length=split_length, split_overlap=split_overlap
)
# Set up information for the new chunk
Expand Down

0 comments on commit 649a2dc

Please sign in to comment.