diff --git a/server/app/routes/convert.py b/server/app/routes/convert.py index a896bb37..2c680831 100644 --- a/server/app/routes/convert.py +++ b/server/app/routes/convert.py @@ -64,8 +64,23 @@ async def convert_documents(files: List[UploadFile] = File(...)): print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...") # If Modal fails, fall back to local processing - from docling.document_converter import DocumentConverter - doc_converter = DocumentConverter() + from docling.datamodel.base_models import InputFormat + from docling.document_converter import DocumentConverter, PdfFormatOption + from docling.datamodel.pipeline_options import PdfPipelineOptions + from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend + + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False + pipeline_options.do_table_structure = True + pipeline_options.table_structure_options.do_cell_matching = True + + doc_converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend + ) + } + ) # Create a temporary directory to store uploaded files with tempfile.TemporaryDirectory() as temp_dir: