diff --git a/libs/community/langchain_community/document_loaders/pdf.py b/libs/community/langchain_community/document_loaders/pdf.py index 30615d3e7a3ae..600eb26bd0b0b 100644 --- a/libs/community/langchain_community/document_loaders/pdf.py +++ b/libs/community/langchain_community/document_loaders/pdf.py @@ -518,7 +518,7 @@ def load(self) -> List[Document]: contents = self.get_processed_pdf(pdf_id) if self.should_clean_pdf: contents = self.clean_pdf(contents) - metadata = {"source": self.source, "file_path": self.source} + metadata = {"source": self.source, "file_path": self.source, "pdf_id": pdf_id} return [Document(page_content=contents, metadata=metadata)]