Skip to content

Commit

Permalink
feat: clean up UI to look a bit more consistent and smooth (#233)
Browse files Browse the repository at this point in the history
* chore: update lockfile

* feat: clean up UI to look a bit more consistent and smooth

* feat: clean up upload dialog

* feat: clean up upload dialog

* feat: clean up upload dialog

* feat: clean up upload dialog

* feat: add themes

* feat: fix tests

* chore: update type ignore errors
  • Loading branch information
shreyashankar authored Dec 8, 2024
1 parent 23e3cc2 commit e5b89c3
Show file tree
Hide file tree
Showing 26 changed files with 2,276 additions and 1,380 deletions.
19 changes: 10 additions & 9 deletions server/app/routes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
from concurrent.futures import ThreadPoolExecutor
from dotenv import load_dotenv

from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend

# Load environment variables
load_dotenv()

Expand Down Expand Up @@ -42,10 +47,10 @@ def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str:
return f"Error processing document: {str(e)}"

@router.post("/api/convert-documents")
async def convert_documents(files: List[UploadFile] = File(...)):
# First try Modal endpoint if there are no txt files
async def convert_documents(files: List[UploadFile] = File(...), use_docetl_server: bool = False):
# Only try Modal endpoint if use_docetl_server is true and there are no txt files
all_txt_files = all(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md') for file in files)
if not all_txt_files:
if use_docetl_server and not all_txt_files:
try:
async with aiohttp.ClientSession() as session:
# Prepare files for multipart upload
Expand All @@ -63,12 +68,8 @@ async def convert_documents(files: List[UploadFile] = File(...)):
except Exception as e:
print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...")

# If Modal fails, fall back to local processing
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend

# Process locally if Modal wasn't used or failed

pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
pipeline_options.do_table_structure = True
Expand Down
31 changes: 7 additions & 24 deletions tests/test_runner_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def temp_intermediate_dir():
yield tmpdirname


def create_pipeline(input_file, output_file, intermediate_dir, operation_prompt):
def create_pipeline(input_file, output_file, intermediate_dir, operation_prompt, bypass_cache=False):
return Pipeline(
name="test_pipeline",
datasets={"test_input": Dataset(type="file", path=input_file)},
Expand All @@ -45,6 +45,7 @@ def create_pipeline(input_file, output_file, intermediate_dir, operation_prompt)
type="map",
prompt=operation_prompt,
output={"schema": {"result": "string"}},
bypass_cache=bypass_cache,
)
],
steps=[
Expand Down Expand Up @@ -80,34 +81,16 @@ def test_pipeline_rerun_on_operation_change(
# Check that the pipeline was not rerun (cost should be zero)
assert unmodified_cost == 0

# Record the start time
start_time = time.time()

# Run again without changes
_ = pipeline.run()

# Record the end time
end_time = time.time()

# Calculate and store the runtime
unmodified_runtime = end_time - start_time

# Modify the operation
modified_prompt = "Count the words in the following text: '{{ input.text }}'"
modified_pipeline = create_pipeline(
temp_input_file, temp_output_file, temp_intermediate_dir, modified_prompt
temp_input_file, temp_output_file, temp_intermediate_dir, modified_prompt, bypass_cache=True
)

# Record the start time
start_time = time.time()

_ = modified_pipeline.run()

# Record the end time
end_time = time.time()
modified_cost = modified_pipeline.run()

# Calculate and store the runtime
modified_runtime = end_time - start_time


# Check that the intermediate files were updated
with open(
Expand All @@ -116,8 +99,8 @@ def test_pipeline_rerun_on_operation_change(
intermediate_data = json.load(f)
assert any("word" in str(item).lower() for item in intermediate_data)

# Check that the runtime is faster when not modifying
assert unmodified_runtime < modified_runtime * 2
# Check that the cost > 0
assert modified_cost > 0


# Test with an incorrect later operation but correct earlier operation
Expand Down
73 changes: 73 additions & 0 deletions website/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"@radix-ui/react-menubar": "^1.1.2",
"@radix-ui/react-popover": "^1.0.7",
"@radix-ui/react-progress": "^1.1.0",
"@radix-ui/react-radio-group": "^1.2.1",
"@radix-ui/react-scroll-area": "^1.1.0",
"@radix-ui/react-select": "^2.1.1",
"@radix-ui/react-slot": "^1.1.0",
Expand Down
7 changes: 7 additions & 0 deletions website/src/app/api/convertDocuments/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export async function POST(request: NextRequest) {
try {
const formData = await request.formData();
const files = formData.getAll("files");
const conversionMethod = formData.get("conversion_method");

if (!files || files.length === 0) {
return NextResponse.json({ error: "No files provided" }, { status: 400 });
Expand All @@ -17,6 +18,12 @@ export async function POST(request: NextRequest) {
backendFormData.append("files", file);
});

// Add conversion method to form data
backendFormData.append(
"use_docetl_server",
conversionMethod === "docetl" ? "true" : "false"
);

// Get Azure credentials from headers if they exist
const azureEndpoint = request.headers.get("azure-endpoint");
const azureKey = request.headers.get("azure-key");
Expand Down
53 changes: 6 additions & 47 deletions website/src/app/globals.css
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,6 @@
@tailwind components;
@tailwind utilities;

/* :root {
--color-background: #f8f9fa;
--color-text: #212529;
--color-primary: #007bff;
--color-secondary: #6c757d;
--color-icon: #17a2b8;
} */


@layer base {
:root {
--background: 211 100% 98%;
Expand All @@ -34,44 +25,12 @@
--ring: 211 100% 50%;
--radius: 0.5rem;

/* Custom variables */
/* --color-icon: #17a2b8; Kept as hex */
--chart-1: 12 76% 61%;
--chart-2: 173 58% 39%;
--chart-3: 197 37% 24%;
--chart-4: 43 74% 66%;
--chart-5: 27 87% 67%;
}

.dark {
--background: 211 50% 5%;
--foreground: 211 5% 90%;
--card: 211 50% 0%;
--card-foreground: 211 5% 90%;
--popover: 211 50% 5%;
--popover-foreground: 211 5% 90%;
--primary: 211 100% 50%;
--primary-foreground: 0 0% 100%;
--secondary: 211 30% 10%;
--secondary-foreground: 0 0% 100%;
--muted: 173 30% 15%;
--muted-foreground: 211 5% 60%;
--accent: 173 30% 15%;
--accent-foreground: 211 5% 90%;
--destructive: 0 100% 30%;
--destructive-foreground: 211 5% 90%;
--border: 211 30% 18%;
--input: 211 30% 18%;
--ring: 211 100% 50%;
--radius: 0.5rem;

/* Custom variables for dark mode */
/* --color-icon: #17a2b8; */
--chart-1: 220 70% 50%;
--chart-2: 160 60% 45%;
--chart-3: 30 80% 55%;
--chart-4: 280 65% 60%;
--chart-5: 340 75% 55%;
/* Chart colors now use theme variables */
--chart-1: var(--chart1);
--chart-2: var(--chart2);
--chart-3: var(--chart3);
--chart-4: var(--chart4);
--chart-5: var(--chart5);
}
}

Expand Down
Loading

0 comments on commit e5b89c3

Please sign in to comment.