diff --git a/libs/community/langchain_community/document_loaders/notiondb.py b/libs/community/langchain_community/document_loaders/notiondb.py index d5c03da16d380..3e43a3fd811a1 100644 --- a/libs/community/langchain_community/document_loaders/notiondb.py +++ b/libs/community/langchain_community/document_loaders/notiondb.py @@ -107,19 +107,15 @@ def load_page(self, page_summary: Dict[str, Any]) -> Document: # load properties as metadata metadata: Dict[str, Any] = {} + value: Any + for prop_name, prop_data in page_summary["properties"].items(): prop_type = prop_data["type"] if prop_type == "rich_text": - value = ( - prop_data["rich_text"][0]["plain_text"] - if prop_data["rich_text"] - else None - ) + value = self._concatenate_rich_text(prop_data["rich_text"]) elif prop_type == "title": - value = ( - prop_data["title"][0]["plain_text"] if prop_data["title"] else None - ) + value = self._concatenate_rich_text(prop_data["title"]) elif prop_type == "multi_select": value = ( [item["name"] for item in prop_data["multi_select"]] @@ -228,3 +224,7 @@ def _request( ) res.raise_for_status() return res.json() + + def _concatenate_rich_text(self, rich_text_array: List[Dict[str, Any]]) -> str: + """Concatenate all text content from a rich_text array.""" + return "".join(item["plain_text"] for item in rich_text_array) diff --git a/libs/community/tests/unit_tests/document_loaders/test_notiondb_loader.py b/libs/community/tests/unit_tests/document_loaders/test_notiondb_loader.py new file mode 100644 index 0000000000000..004f0fcc872e3 --- /dev/null +++ b/libs/community/tests/unit_tests/document_loaders/test_notiondb_loader.py @@ -0,0 +1,138 @@ +from unittest.mock import Mock, patch + +from langchain_core.documents import Document + +from langchain_community.document_loaders import NotionDBLoader + + +class TestNotionDBLoader: + def setup_method(self) -> None: + self.loader = NotionDBLoader( + integration_token="fake_token", database_id="fake_db_id" + ) + + def test_concatenate_rich_text(self) -> None: + # Setup + rich_text = [ + {"plain_text": "Hello "}, + {"plain_text": "world"}, + {"plain_text": "!"}, + ] + + # Exercise + result = self.loader._concatenate_rich_text(rich_text) + + # Assert + assert result == "Hello world!" + + @patch("langchain_community.document_loaders.notiondb.NotionDBLoader._request") + @patch("langchain_community.document_loaders.notiondb.NotionDBLoader._load_blocks") + def test_load_page_with_rich_text( + self, mock_load_blocks: Mock, mock_request: Mock + ) -> None: + # Setup + mock_load_blocks.return_value = "Mocked block content" + page_summary = { + "id": "page_id", + "properties": { + "Title": {"type": "title", "title": [{"plain_text": "Test Title"}]}, + "Description": { + "type": "rich_text", + "rich_text": [ + {"plain_text": "This is "}, + {"plain_text": "a test"}, + {"plain_text": " description"}, + ], + }, + }, + } + expected_doc = Document( + page_content="Mocked block content", + metadata={ + "title": "Test Title", + "description": "This is a test description", + "id": "page_id", + }, + ) + + # Exercise + result = self.loader.load_page(page_summary) + + # Assert + assert result == expected_doc + + @patch("langchain_community.document_loaders.notiondb.NotionDBLoader._request") + @patch("langchain_community.document_loaders.notiondb.NotionDBLoader._load_blocks") + def test_load_page_with_code_in_rich_text( + self, mock_load_blocks: Mock, mock_request: Mock + ) -> None: + # Setup + mock_load_blocks.return_value = "Mocked block content" + page_summary = { + "id": "page_id", + "properties": { + "Answer": { + "type": "rich_text", + "rich_text": [ + {"plain_text": "Use "}, + {"plain_text": "print('Hello')"}, + {"plain_text": " to display text"}, + ], + } + }, + } + expected_doc = Document( + page_content="Mocked block content", + metadata={"answer": "Use print('Hello') to display text", "id": "page_id"}, + ) + + # Exercise + result = self.loader.load_page(page_summary) + + # Assert + assert result == expected_doc + + @patch("langchain_community.document_loaders.notiondb.NotionDBLoader._request") + @patch("langchain_community.document_loaders.notiondb.NotionDBLoader._load_blocks") + def test_load(self, mock_load_blocks: Mock, mock_request: Mock) -> None: + # Setup + mock_load_blocks.return_value = "Mocked block content" + mock_request.return_value = { + "results": [ + { + "id": "page_id_1", + "properties": { + "Title": { + "type": "title", + "title": [{"plain_text": "Test Title 1"}], + } + }, + }, + { + "id": "page_id_2", + "properties": { + "Title": { + "type": "title", + "title": [{"plain_text": "Test Title 2"}], + } + }, + }, + ], + "has_more": False, + } + expected_docs = [ + Document( + page_content="Mocked block content", + metadata={"title": "Test Title 1", "id": "page_id_1"}, + ), + Document( + page_content="Mocked block content", + metadata={"title": "Test Title 2", "id": "page_id_2"}, + ), + ] + + # Exercise + result = self.loader.load() + + # Assert + assert result == expected_docs