Skip to content

Commit

Permalink
chore: Run linting tests to ensure code quality
Browse files Browse the repository at this point in the history
  • Loading branch information
chkaty committed Nov 27, 2024
1 parent 3baf976 commit 65d09d0
Showing 1 changed file with 56 additions and 24 deletions.
80 changes: 56 additions & 24 deletions libs/text-splitters/tests/unit_tests/test_text_splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1527,7 +1527,7 @@ def test_experimental_markdown_syntax_text_splitter_split_lines() -> None:
assert output == expected_output


EXPERIMENTAL_MARKDOWN_DOCUMENTS = DOCUMENTS = [
EXPERIMENTAL_MARKDOWN_DOCUMENTS = [
(
"# My Header 1 From Document 1\n"
"Content for header 1 from Document 1\n"
Expand Down Expand Up @@ -1560,8 +1560,10 @@ def test_experimental_markdown_syntax_text_splitter_split_lines() -> None:
),
]

def test_experimental_markdown_syntax_text_splitter_with_multi_files() -> None:
"""Test experimental markdown syntax splitter split on default called consecutively on two files."""

def test_experimental_markdown_syntax_text_splitter_on_multi_files() -> None:
"""Test experimental markdown syntax splitter split
on default called consecutively on two files."""
markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter()
output = []
for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
Expand Down Expand Up @@ -1638,15 +1640,20 @@ def test_experimental_markdown_syntax_text_splitter_with_multi_files() -> None:

assert output == expected_output

def test_experimental_markdown_syntax_text_splitter_split_lines_with_multi_files() -> None:


def test_experimental_markdown_syntax_text_splitter_split_lines_on_multi_files() -> (
None
):
"""Test experimental markdown syntax splitter split
on each line called consecutively on two files."""
markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(return_each_line=True)
output = []
for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
output += markdown_splitter.split_text(experimental_markdown_document)
expected_output = [
Document(
page_content="Content for header 1 from Document 1", metadata={"Header 1": "My Header 1 From Document 1"}
page_content="Content for header 1 from Document 1",
metadata={"Header 1": "My Header 1 From Document 1"},
),
Document(
page_content="Content for header 2 from Document 1",
Expand Down Expand Up @@ -1700,7 +1707,8 @@ def test_experimental_markdown_syntax_text_splitter_split_lines_with_multi_files
metadata={"Header 1": "Header 1 again From Document 1"},
),
Document(
page_content="Content for header 1 from Document 2", metadata={"Header 1": "My Header 1 From Document 2"}
page_content="Content for header 1 from Document 2",
metadata={"Header 1": "My Header 1 From Document 2"},
),
Document(
page_content="Content for header 2 from Document 2",
Expand Down Expand Up @@ -1758,21 +1766,26 @@ def test_experimental_markdown_syntax_text_splitter_split_lines_with_multi_files
assert output == expected_output


def test_experimental_markdown_syntax_text_splitter_with_header_with_multi_files() -> None:
"""Test experimental markdown splitter by header called consecutively on two files"""

def test_experimental_markdown_syntax_text_splitter_with_header_on_multi_files() -> (
None
):
"""Test experimental markdown splitter
by header called consecutively on two files"""

markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(strip_headers=False)
output = []
for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
output += markdown_splitter.split_text(experimental_markdown_document)

expected_output = [
Document(
page_content="# My Header 1 From Document 1\nContent for header 1 from Document 1\n",
page_content="# My Header 1 From Document 1\n"
"Content for header 1 from Document 1\n",
metadata={"Header 1": "My Header 1 From Document 1"},
),
Document(
page_content="## Header 2 From Document 1\nContent for header 2 from Document 1\n",
page_content="## Header 2 From Document 1\n"
"Content for header 2 from Document 1\n",
metadata={
"Header 1": "My Header 1 From Document 1",
"Header 2": "Header 2 From Document 1",
Expand All @@ -1790,7 +1803,8 @@ def test_experimental_markdown_syntax_text_splitter_with_header_with_multi_files
},
),
Document(
page_content="# Header 1 again From Document 1\nWe should also split on the horizontal line\n",
page_content="# Header 1 again From Document 1\n"
"We should also split on the horizontal line\n",
metadata={"Header 1": "Header 1 again From Document 1"},
),
Document(
Expand All @@ -1801,11 +1815,13 @@ def test_experimental_markdown_syntax_text_splitter_with_header_with_multi_files
metadata={"Header 1": "Header 1 again From Document 1"},
),
Document(
page_content="# My Header 1 From Document 2\nContent for header 1 from Document 2\n",
page_content="# My Header 1 From Document 2\n"
"Content for header 1 from Document 2\n",
metadata={"Header 1": "My Header 1 From Document 2"},
),
Document(
page_content="## Header 2 From Document 2\nContent for header 2 from Document 2\n",
page_content="## Header 2 From Document 2\n"
"Content for header 2 from Document 2\n",
metadata={
"Header 1": "My Header 1 From Document 2",
"Header 2": "Header 2 From Document 2",
Expand All @@ -1823,7 +1839,8 @@ def test_experimental_markdown_syntax_text_splitter_with_header_with_multi_files
},
),
Document(
page_content="# Header 1 again From Document 2\nWe should also split on the horizontal line\n",
page_content="# Header 1 again From Document 2\n"
"We should also split on the horizontal line\n",
metadata={"Header 1": "Header 1 again From Document 2"},
),
Document(
Expand All @@ -1836,8 +1853,12 @@ def test_experimental_markdown_syntax_text_splitter_with_header_with_multi_files
]
assert output == expected_output

def test_experimental_markdown_syntax_text_splitter_header_configuration_with_multi_files() -> None:
"""Test experimental markdown syntax splitter."""

def test_experimental_markdown_syntax_text_splitter_header_config_on_multi_files() -> (
None
):
"""Test experimental markdown splitter
by header configuration called consecutively on two files"""

headers_to_split_on = [("#", "Encabezamiento 1")]
markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(
Expand All @@ -1846,18 +1867,23 @@ def test_experimental_markdown_syntax_text_splitter_header_configuration_with_mu
output = []
for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
output += markdown_splitter.split_text(experimental_markdown_document)

expected_output = [
Document(
page_content="Content for header 1 from Document 1\n## Header 2 From Document 1\nContent for header 2 from Document 1\n",
page_content="Content for header 1 from Document 1\n"
"## Header 2 From Document 1\n"
"Content for header 2 from Document 1\n",
metadata={"Encabezamiento 1": "My Header 1 From Document 1"},
),
Document(
page_content=(
"```python\ndef func_definition():\n "
"print('Keep the whitespace consistent')\n```\n"
),
metadata={"Code": "python", "Encabezamiento 1": "My Header 1 From Document 1"},
metadata={
"Code": "python",
"Encabezamiento 1": "My Header 1 From Document 1",
},
),
Document(
page_content="We should also split on the horizontal line\n",
Expand All @@ -1871,15 +1897,20 @@ def test_experimental_markdown_syntax_text_splitter_header_configuration_with_mu
metadata={"Encabezamiento 1": "Header 1 again From Document 1"},
),
Document(
page_content="Content for header 1 from Document 2\n## Header 2 From Document 2\nContent for header 2 from Document 2\n",
page_content="Content for header 1 from Document 2\n"
"## Header 2 From Document 2\n"
"Content for header 2 from Document 2\n",
metadata={"Encabezamiento 1": "My Header 1 From Document 2"},
),
Document(
page_content=(
"```python\ndef func_definition():\n "
"print('Keep the whitespace consistent')\n```\n"
),
metadata={"Code": "python", "Encabezamiento 1": "My Header 1 From Document 2"},
metadata={
"Code": "python",
"Encabezamiento 1": "My Header 1 From Document 2",
},
),
Document(
page_content="We should also split on the horizontal line\n",
Expand All @@ -1896,6 +1927,7 @@ def test_experimental_markdown_syntax_text_splitter_header_configuration_with_mu

assert output == expected_output


def test_solidity_code_splitter() -> None:
splitter = RecursiveCharacterTextSplitter.from_language(
Language.SOL, chunk_size=CHUNK_SIZE, chunk_overlap=0
Expand Down

0 comments on commit 65d09d0

Please sign in to comment.