Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: clean up UI to look a bit more consistent and smooth #233

Merged
merged 11 commits into from
Dec 8, 2024
19 changes: 10 additions & 9 deletions server/app/routes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
from concurrent.futures import ThreadPoolExecutor
from dotenv import load_dotenv

from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend

# Load environment variables
load_dotenv()

Expand Down Expand Up @@ -42,10 +47,10 @@ def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str:
return f"Error processing document: {str(e)}"

@router.post("/api/convert-documents")
async def convert_documents(files: List[UploadFile] = File(...)):
# First try Modal endpoint if there are no txt files
async def convert_documents(files: List[UploadFile] = File(...), use_docetl_server: bool = False):
# Only try Modal endpoint if use_docetl_server is true and there are no txt files
all_txt_files = all(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md') for file in files)
if not all_txt_files:
if use_docetl_server and not all_txt_files:
try:
async with aiohttp.ClientSession() as session:
# Prepare files for multipart upload
Expand All @@ -63,12 +68,8 @@ async def convert_documents(files: List[UploadFile] = File(...)):
except Exception as e:
print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...")

# If Modal fails, fall back to local processing
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend

# Process locally if Modal wasn't used or failed

pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
pipeline_options.do_table_structure = True
Expand Down
31 changes: 7 additions & 24 deletions tests/test_runner_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def temp_intermediate_dir():
yield tmpdirname


def create_pipeline(input_file, output_file, intermediate_dir, operation_prompt):
def create_pipeline(input_file, output_file, intermediate_dir, operation_prompt, bypass_cache=False):
return Pipeline(
name="test_pipeline",
datasets={"test_input": Dataset(type="file", path=input_file)},
Expand All @@ -45,6 +45,7 @@ def create_pipeline(input_file, output_file, intermediate_dir, operation_prompt)
type="map",
prompt=operation_prompt,
output={"schema": {"result": "string"}},
bypass_cache=bypass_cache,
)
],
steps=[
Expand Down Expand Up @@ -80,34 +81,16 @@ def test_pipeline_rerun_on_operation_change(
# Check that the pipeline was not rerun (cost should be zero)
assert unmodified_cost == 0

# Record the start time
start_time = time.time()

# Run again without changes
_ = pipeline.run()

# Record the end time
end_time = time.time()

# Calculate and store the runtime
unmodified_runtime = end_time - start_time

# Modify the operation
modified_prompt = "Count the words in the following text: '{{ input.text }}'"
modified_pipeline = create_pipeline(
temp_input_file, temp_output_file, temp_intermediate_dir, modified_prompt
temp_input_file, temp_output_file, temp_intermediate_dir, modified_prompt, bypass_cache=True
)

# Record the start time
start_time = time.time()

_ = modified_pipeline.run()

# Record the end time
end_time = time.time()
modified_cost = modified_pipeline.run()

# Calculate and store the runtime
modified_runtime = end_time - start_time


# Check that the intermediate files were updated
with open(
Expand All @@ -116,8 +99,8 @@ def test_pipeline_rerun_on_operation_change(
intermediate_data = json.load(f)
assert any("word" in str(item).lower() for item in intermediate_data)

# Check that the runtime is faster when not modifying
assert unmodified_runtime < modified_runtime * 2
# Check that the cost > 0
assert modified_cost > 0


# Test with an incorrect later operation but correct earlier operation
Expand Down
73 changes: 73 additions & 0 deletions website/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"@radix-ui/react-menubar": "^1.1.2",
"@radix-ui/react-popover": "^1.0.7",
"@radix-ui/react-progress": "^1.1.0",
"@radix-ui/react-radio-group": "^1.2.1",
"@radix-ui/react-scroll-area": "^1.1.0",
"@radix-ui/react-select": "^2.1.1",
"@radix-ui/react-slot": "^1.1.0",
Expand Down
7 changes: 7 additions & 0 deletions website/src/app/api/convertDocuments/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export async function POST(request: NextRequest) {
try {
const formData = await request.formData();
const files = formData.getAll("files");
const conversionMethod = formData.get("conversion_method");

if (!files || files.length === 0) {
return NextResponse.json({ error: "No files provided" }, { status: 400 });
Expand All @@ -17,6 +18,12 @@ export async function POST(request: NextRequest) {
backendFormData.append("files", file);
});

// Add conversion method to form data
backendFormData.append(
"use_docetl_server",
conversionMethod === "docetl" ? "true" : "false"
);

// Get Azure credentials from headers if they exist
const azureEndpoint = request.headers.get("azure-endpoint");
const azureKey = request.headers.get("azure-key");
Expand Down
53 changes: 6 additions & 47 deletions website/src/app/globals.css
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,6 @@
@tailwind components;
@tailwind utilities;

/* :root {
--color-background: #f8f9fa;
--color-text: #212529;
--color-primary: #007bff;
--color-secondary: #6c757d;
--color-icon: #17a2b8;
} */


@layer base {
:root {
--background: 211 100% 98%;
Expand All @@ -34,44 +25,12 @@
--ring: 211 100% 50%;
--radius: 0.5rem;

/* Custom variables */
/* --color-icon: #17a2b8; Kept as hex */
--chart-1: 12 76% 61%;
--chart-2: 173 58% 39%;
--chart-3: 197 37% 24%;
--chart-4: 43 74% 66%;
--chart-5: 27 87% 67%;
}

.dark {
--background: 211 50% 5%;
--foreground: 211 5% 90%;
--card: 211 50% 0%;
--card-foreground: 211 5% 90%;
--popover: 211 50% 5%;
--popover-foreground: 211 5% 90%;
--primary: 211 100% 50%;
--primary-foreground: 0 0% 100%;
--secondary: 211 30% 10%;
--secondary-foreground: 0 0% 100%;
--muted: 173 30% 15%;
--muted-foreground: 211 5% 60%;
--accent: 173 30% 15%;
--accent-foreground: 211 5% 90%;
--destructive: 0 100% 30%;
--destructive-foreground: 211 5% 90%;
--border: 211 30% 18%;
--input: 211 30% 18%;
--ring: 211 100% 50%;
--radius: 0.5rem;

/* Custom variables for dark mode */
/* --color-icon: #17a2b8; */
--chart-1: 220 70% 50%;
--chart-2: 160 60% 45%;
--chart-3: 30 80% 55%;
--chart-4: 280 65% 60%;
--chart-5: 340 75% 55%;
/* Chart colors now use theme variables */
--chart-1: var(--chart1);
--chart-2: var(--chart2);
--chart-3: var(--chart3);
--chart-4: var(--chart4);
--chart-5: var(--chart5);
}
}

Expand Down
Loading
Loading