diff --git a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
index efcc99e95..eb40a0d59 100644
--- a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
+++ b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
@@ -7,11 +7,24 @@
 import dask
 from langchain.schema import Document
 from langchain.text_splitter import TextSplitter
-
+from langchain.document_loaders import PyPDFLoader
+from pypdf import PdfReader
+
+# Uses pypdf which is used by PyPDFLoader from langchain
+def pdf_to_text(path):
+    reader = PdfReader(path)
+    pages = reader.pages
+    text = ""
+    for page in pages:
+        text = text + "\n \n" + page.extract_text()
+    return text
 
 def path_to_doc(path):
     with open(str(path)) as f:
-        text = f.read()
+        if os.path.splitext(path)[1] == ".pdf":
+            text = pdf_to_text(path)
+        else: 
+            text = f.read()        
         m = hashlib.sha256()
         m.update(text.encode("utf-8"))
         metadata = {"path": str(path), "sha256": m.digest(), "extension": path.suffix}
@@ -37,6 +50,8 @@ def path_to_doc(path):
     ".jsx",
     ".tsx",
     ".txt",
+    ".html",
+    ".pdf",
 }
 
 
@@ -51,21 +66,26 @@ def flatten(*chunk_lists):
 def split(path, all_files: bool, splitter):
     chunks = []
 
-    for dir, subdirs, filenames in os.walk(path):
-        # Filter out hidden filenames, hidden directories, and excluded directories,
-        # unless "all files" are requested
-        if not all_files:
-            subdirs[:] = [d for d in subdirs if not (d[0] == "." or d in EXCLUDE_DIRS)]
-            filenames = [f for f in filenames if not f[0] == "."]
-
-        for filename in filenames:
-            filepath = Path(os.path.join(dir, filename))
-            if filepath.suffix not in SUPPORTED_EXTS:
-                continue
-
-            document = dask.delayed(path_to_doc)(filepath)
-            chunk = dask.delayed(split_document)(document, splitter)
-            chunks.append(chunk)
+    # Check if the path points to a single file
+    if os.path.isfile(path):
+        dir = os.path.dirname(path)
+        filenames = [os.path.basename(path)]
+    else:
+        for dir, subdirs, filenames in os.walk(path):
+            # Filter out hidden filenames, hidden directories, and excluded directories,
+            # unless "all files" are requested
+            if not all_files:
+                subdirs[:] = [d for d in subdirs if not (d[0] == "." or d in EXCLUDE_DIRS)]
+                filenames = [f for f in filenames if not f[0] == "."]
+
+    for filename in filenames:
+        filepath = Path(os.path.join(dir, filename))
+        if filepath.suffix not in SUPPORTED_EXTS:
+            continue
+
+        document = dask.delayed(path_to_doc)(filepath)
+        chunk = dask.delayed(split_document)(document, splitter)
+        chunks.append(chunk)
 
     flattened_chunks = dask.delayed(flatten)(*chunks)
     return flattened_chunks