From ec317ab730fa3e3c3e5a941aeb9d2de5ca7cce33 Mon Sep 17 00:00:00 2001 From: Chad Norvell Date: Sat, 6 Jan 2024 00:14:21 -0800 Subject: [PATCH] MathPix: Include PDF ID in metadata --- libs/community/langchain_community/document_loaders/pdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/pdf.py b/libs/community/langchain_community/document_loaders/pdf.py index dc488834a6b36..d83bf4e8ac246 100644 --- a/libs/community/langchain_community/document_loaders/pdf.py +++ b/libs/community/langchain_community/document_loaders/pdf.py @@ -512,7 +512,7 @@ def load(self) -> List[Document]: contents = self.get_processed_pdf(pdf_id) if self.should_clean_pdf: contents = self.clean_pdf(contents) - metadata = {"source": self.source, "file_path": self.source} + metadata = {"source": self.source, "file_path": self.source, "pdf_id": pdf_id} return [Document(page_content=contents, metadata=metadata)]