From be81d8ccdd118a39d33b770138f1f900e938b9cf Mon Sep 17 00:00:00 2001 From: Muspi Merol Date: Thu, 24 Oct 2024 12:19:36 +0800 Subject: [PATCH] fix: mistake in reversing html and text attributes --- .../document_transformers/mozilla_readability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_transformers/mozilla_readability.py b/libs/community/langchain_community/document_transformers/mozilla_readability.py index 08dfc28f7159b..3cd26dcc43750 100644 --- a/libs/community/langchain_community/document_transformers/mozilla_readability.py +++ b/libs/community/langchain_community/document_transformers/mozilla_readability.py @@ -40,7 +40,7 @@ def transform_document(self, document: Document, **kwargs: Any) -> Document: article = parse(document.page_content, **{**self.options, **kwargs}) - result = article.text_content if target == "html" else article.content + result = article.content if target == "html" else article.text_content metadata = {**document.metadata, **asdict(article)}