diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index c603dde71eb32..702d5998dd7c2 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -427,6 +427,13 @@ def __init__(
             text_kwargs: Keyword arguments to pass to ``pdfplumber.Page.extract_text()``
             dedupe: Avoiding the error of duplicate characters if `dedupe=True`.
         """
+        try:
+            import PIL  # noqa:F401
+        except ImportError:
+            raise ImportError(
+                "pillow package not found, please install it with"
+                " `pip install pillow`"
+            )
         self.text_kwargs = text_kwargs or {}
         self.dedupe = dedupe
         self.extract_images = extract_images
@@ -468,17 +475,30 @@ def _process_page_content(self, page: pdfplumber.page.Page) -> str:
 
     def _extract_images_from_page(self, page: pdfplumber.page.Page) -> str:
         """Extract images from page and get the text with RapidOCR."""
+        from PIL import Image
+
         if not self.extract_images:
             return ""
 
         images = []
         for img in page.images:
             if img["stream"]["Filter"].name in _PDF_FILTER_WITHOUT_LOSS:
-                images.append(
-                    np.frombuffer(img["stream"].get_data(), dtype=np.uint8).reshape(
-                        img["stream"]["Height"], img["stream"]["Width"], -1
+                if img["stream"]["BitsPerComponent"] == 1:
+                    images.append(
+                        np.array(
+                            Image.frombytes(
+                                "1",
+                                (img["stream"]["Width"], img["stream"]["Height"]),
+                                img["stream"].get_data(),
+                            ).convert("L")
+                        )
+                    )
+                else:
+                    images.append(
+                        np.frombuffer(img["stream"].get_data(), dtype=np.uint8).reshape(
+                            img["stream"]["Height"], img["stream"]["Width"], -1
+                        )
                     )
-                )
             elif img["stream"]["Filter"].name in _PDF_FILTER_WITH_LOSS:
                 images.append(img["stream"].get_data())
             else: