Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
tstadel committed Mar 1, 2024
1 parent 39ce7ff commit 41e74c2
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions test/nodes/test_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,20 @@ def test_preprocess_page_split_and_split_overlap():
assert output[1].meta["page"] == 2


@pytest.mark.unit
def test_preprocess_page_split_with_empty_pages():
doc = Document(
content="This is a document on page 1.\f\fThis is a document on page 3.\f\fThis is a document on page 5."
)
output = PreProcessor(
split_by="page", split_length=1, split_respect_sentence_boundary=False, split_overlap=0, add_page_number=True
).run([doc])[0]["documents"]
assert len(output) == 3
assert output[0] == Document(content="This is a document on page 1.", meta={"_split_id": 0, "page": 1})
assert output[1] == Document(content="This is a document on page 3.", meta={"_split_id": 1, "page": 3})
assert output[2] == Document(content="This is a document on page 5.", meta={"_split_id": 2, "page": 5})


@pytest.mark.unit
def test_preprocess_tiktoken_token_split(mock_tiktoken_tokenizer):
raw_docs = [
Expand Down

0 comments on commit 41e74c2

Please sign in to comment.