diff --git a/poetry.lock b/poetry.lock index b6527691..705c6612 100644 --- a/poetry.lock +++ b/poetry.lock @@ -244,6 +244,34 @@ azure-core = ">=1.30.0" isodate = ">=0.6.1" typing-extensions = ">=4.6.0" +[[package]] +name = "azure-ai-formrecognizer" +version = "3.3.3" +description = "Microsoft Azure Form Recognizer Client Library for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "azure-ai-formrecognizer-3.3.3.tar.gz", hash = "sha256:9fc09788bbb65866630fa870cca1933bfd7298b8055236530bcc0e40d81fcccf"}, + {file = "azure_ai_formrecognizer-3.3.3-py3-none-any.whl", hash = "sha256:81fc1abda8bd898426ee3bbc1b9c6bd164514201ce282129a31d4664f9d1f3bc"}, +] + +[package.dependencies] +azure-common = ">=1.1" +azure-core = ">=1.23.0" +msrest = ">=0.6.21" +typing-extensions = ">=4.0.1" + +[[package]] +name = "azure-common" +version = "1.1.28" +description = "Microsoft Azure Client Library for Python (Common)" +optional = true +python-versions = "*" +files = [ + {file = "azure-common-1.1.28.zip", hash = "sha256:4ac0cd3214e36b6a1b6a442686722a5d8cc449603aa833f3f0f40bda836704a3"}, + {file = "azure_common-1.1.28-py2.py3-none-any.whl", hash = "sha256:5c12d3dcf4ec20599ca6b0d3e09e86e146353d443e7fcc050c9a19c1f9df20ad"}, +] + [[package]] name = "azure-core" version = "1.31.0" @@ -1926,6 +1954,27 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] +[[package]] +name = "msrest" +version = "0.7.1" +description = "AutoRest swagger generator Python client runtime." +optional = true +python-versions = ">=3.6" +files = [ + {file = "msrest-0.7.1-py3-none-any.whl", hash = "sha256:21120a810e1233e5e6cc7fe40b474eeb4ec6f757a15d7cf86702c369f9567c32"}, + {file = "msrest-0.7.1.zip", hash = "sha256:6e7661f46f3afd88b75667b7187a92829924446c7ea1d169be8c4bb7eeb788b9"}, +] + +[package.dependencies] +azure-core = ">=1.24.0" +certifi = ">=2017.4.17" +isodate = ">=0.6.0" +requests = ">=2.16,<3.0" +requests-oauthlib = ">=0.5.0" + +[package.extras] +async = ["aiodns", "aiohttp (>=3.0)"] + [[package]] name = "multidict" version = "6.1.0" @@ -2415,6 +2464,22 @@ files = [ {file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"}, ] +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = true +python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + [[package]] name = "openai" version = "1.52.2" @@ -3948,6 +4013,24 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +description = "OAuthlib authentication support for Requests." +optional = true +python-versions = ">=3.4" +files = [ + {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, + {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + [[package]] name = "rich" version = "13.9.3" @@ -5153,9 +5236,9 @@ type = ["pytest-mypy"] [extras] parsing = ["azure-ai-documentintelligence", "openpyxl", "paddlepaddle", "pydub", "pymupdf", "python-docx", "python-pptx"] -server = ["docling", "fastapi", "uvicorn"] +server = ["azure-ai-documentintelligence", "azure-ai-formrecognizer", "docling", "fastapi", "uvicorn"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "58c3309ea252aabb4002739171d478674a79397cc1402fc479e188ef6c8b3ad4" +content-hash = "82e66f32512cdbead2ceb993eb58233578bff394d592d348a9864ed779d5b539" diff --git a/pyproject.toml b/pyproject.toml index 2250e140..59a504f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,10 +38,11 @@ fastapi = { version = "^0.115.0", optional = true } uvicorn = { version = "^0.31.0", optional = true } websockets = "^13.1" docling = { version = "^2.5.2", optional = true } +azure-ai-formrecognizer = { version = "^3.3.3", optional = true } [tool.poetry.extras] parsing = ["python-docx", "openpyxl", "pydub", "python-pptx", "azure-ai-documentintelligence", "paddlepaddle", "pymupdf"] -server = ["fastapi", "uvicorn", "docling"] +server = ["fastapi", "uvicorn", "docling", "azure-ai-formrecognizer", "azure-ai-documentintelligence"] [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" diff --git a/server/app/routes/convert.py b/server/app/routes/convert.py index 7dd77e79..2474c34d 100644 --- a/server/app/routes/convert.py +++ b/server/app/routes/convert.py @@ -1,14 +1,46 @@ -from fastapi import APIRouter, UploadFile, File -from typing import List +from fastapi import APIRouter, UploadFile, File, Header +from typing import List, Optional import tempfile import os import aiohttp from pathlib import Path +from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, ContentFormat, AnalyzeResult +from azure.ai.documentintelligence import DocumentIntelligenceClient +from azure.core.credentials import AzureKeyCredential +import asyncio +from concurrent.futures import ThreadPoolExecutor +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Add Azure credentials +AZURE_ENDPOINT = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT") +AZURE_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_KEY") router = APIRouter() MODAL_ENDPOINT = "https://ucbepic--docling-converter-convert-documents.modal.run" +def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str: + """Process a single document with Azure Document Intelligence""" + try: + document_analysis_client = DocumentIntelligenceClient( + endpoint=endpoint, + credential=AzureKeyCredential(key) + ) + + with open(file_path, "rb") as f: + poller = document_analysis_client.begin_analyze_document( + "prebuilt-layout", AnalyzeDocumentRequest(bytes_source=f.read()), output_content_format=ContentFormat.MARKDOWN, + ) + result = poller.result() + + return result.content + except Exception as e: + print(f"Error processing document: {str(e)}") + return f"Error processing document: {str(e)}" + @router.post("/api/convert-documents") async def convert_documents(files: List[UploadFile] = File(...)): # First try Modal endpoint @@ -77,4 +109,59 @@ async def convert_documents(files: List[UploadFile] = File(...)): "markdown": conv_result.document.export_to_markdown() }) - return {"documents": results} \ No newline at end of file + return {"documents": results} + +@router.post("/api/azure-convert-documents") +async def azure_convert_documents( + files: List[UploadFile] = File(...), + azure_endpoint: Optional[str] = Header(None), + azure_key: Optional[str] = Header(None) +): + if not azure_endpoint or not azure_key: + return {"error": "Azure credentials are required"} + + with tempfile.TemporaryDirectory() as temp_dir: + # Save uploaded files and prepare for processing + file_paths = [] + original_filenames = [] + + for file in files: + file_path = os.path.join(temp_dir, file.filename) + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + with open(file_path, "wb") as buffer: + content = await file.read() + buffer.write(content) + + file_paths.append(file_path) + original_filenames.append(file.filename) + + # Process documents concurrently using ThreadPoolExecutor + with ThreadPoolExecutor() as executor: + futures = [] + for file_path in file_paths: + future = executor.submit( + process_document_with_azure, + file_path, + azure_endpoint, + azure_key + ) + futures.append(future) + + # Collect results as they complete + results = [] + for future in futures: + results.append(future.result()) + + # Format results to match the existing endpoint's schema + formatted_results = [ + { + "filename": filename, + "markdown": content + } + for filename, content in zip(original_filenames, results) + ] + + return {"documents": formatted_results} + + diff --git a/website/src/app/api/convertDocuments/route.ts b/website/src/app/api/convertDocuments/route.ts index b7bc4228..da938e6f 100644 --- a/website/src/app/api/convertDocuments/route.ts +++ b/website/src/app/api/convertDocuments/route.ts @@ -17,17 +17,36 @@ export async function POST(request: NextRequest) { backendFormData.append("files", file); }); + // Get Azure credentials from headers if they exist + const azureEndpoint = request.headers.get("azure-endpoint"); + const azureKey = request.headers.get("azure-key"); + + // Determine which endpoint to use + const endpoint = + azureEndpoint && azureKey + ? "/api/azure-convert-documents" + : "/api/convert-documents"; + + // Prepare headers for the backend request + const headers: HeadersInit = {}; + if (azureEndpoint && azureKey) { + headers["azure-endpoint"] = azureEndpoint; + headers["azure-key"] = azureKey; + } + // Forward the request to the Python backend const response = await fetch( - `http://${process.env.NEXT_PUBLIC_BACKEND_HOST}:${process.env.NEXT_PUBLIC_BACKEND_PORT}/api/convert-documents`, + `http://${process.env.NEXT_PUBLIC_BACKEND_HOST}:${process.env.NEXT_PUBLIC_BACKEND_PORT}${endpoint}`, { method: "POST", body: backendFormData, + headers, } ); if (!response.ok) { - throw new Error(`Backend returned ${response.status}`); + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Backend returned ${response.status}`); } const data = await response.json(); @@ -39,7 +58,12 @@ export async function POST(request: NextRequest) { } catch (error) { console.error("Error converting documents:", error); return NextResponse.json( - { error: "Failed to convert documents" }, + { + error: + error instanceof Error + ? error.message + : "Failed to convert documents", + }, { status: 500 } ); } diff --git a/website/src/components/FileExplorer.tsx b/website/src/components/FileExplorer.tsx index a987855f..4755f3ca 100644 --- a/website/src/components/FileExplorer.tsx +++ b/website/src/components/FileExplorer.tsx @@ -10,6 +10,7 @@ import { X, Folder, Database, + AlertTriangle, } from "lucide-react"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; @@ -44,6 +45,14 @@ import { AlertDialogHeader, AlertDialogTitle, } from "@/components/ui/alert-dialog"; +import { Switch } from "@/components/ui/switch"; +import { Label } from "@/components/ui/label"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "./ui/tooltip"; interface FileExplorerProps { files: File[]; @@ -126,6 +135,8 @@ async function getAllFiles(entry: FileSystemEntry): Promise { return files; } +type ConversionMethod = "docling" | "azure"; + export const FileExplorer: React.FC = ({ files, onFileClick, @@ -144,6 +155,10 @@ export const FileExplorer: React.FC = ({ const [viewingDocument, setViewingDocument] = useState(null); const [folderToDelete, setFolderToDelete] = useState(null); const [uploadingFiles, setUploadingFiles] = useState>(new Set()); + const [conversionMethod, setConversionMethod] = + useState("docling"); + const [azureEndpoint, setAzureEndpoint] = useState(""); + const [azureKey, setAzureKey] = useState(""); // Group files by folder const groupedFiles = files.reduce((acc: { [key: string]: File[] }, file) => { @@ -305,9 +320,7 @@ export const FileExplorer: React.FC = ({ // First, save all original documents and collect their paths const originalDocsFormData = new FormData(); Array.from(selectedFiles).forEach((file) => { - // Add to conversion formData formData.append("files", file); - // Add to storage formData with original filename to preserve it originalDocsFormData.append( "files", new File([file], file.name, { type: file.type }) @@ -327,10 +340,21 @@ export const FileExplorer: React.FC = ({ const savedDocs = await saveDocsResponse.json(); + // Choose endpoint based on conversion method + const conversionEndpoint = "/api/convertDocuments"; + + // Prepare headers for Azure if needed + const headers: HeadersInit = {}; + if (conversionMethod === "azure") { + headers["azure-endpoint"] = azureEndpoint; + headers["azure-key"] = azureKey; + } + // Then proceed with conversion - const response = await fetch("/api/convertDocuments", { + const response = await fetch(conversionEndpoint, { method: "POST", body: formData, + headers, }); if (!response.ok) { @@ -625,7 +649,68 @@ export const FileExplorer: React.FC = ({ Upload Documents -
+
+
+
+ + setConversionMethod(checked ? "azure" : "docling") + } + /> + +
+ + {conversionMethod === "azure" && ( +
+
+ + setAzureEndpoint(e.target.value)} + className="h-8" + /> +
+
+
+ + + + + + + +

+ Warning: Key is passed in plaintext to your + local server. Ensure no one is snooping on your + network traffic and stealing your key. +

+
+
+
+
+ setAzureKey(e.target.value)} + className="h-8" + /> +
+
+ )} +
+
= ({