From eabefacd0f79e07d98e7d899a22287979e61b39f Mon Sep 17 00:00:00 2001 From: Shreya Shankar Date: Sat, 30 Nov 2024 14:09:23 -0600 Subject: [PATCH] fix: make histogram calculation and rendering less blocking (#218) --- .github/workflows/docker-ci.yml | 47 +- website/src/components/Output.tsx | 612 ++++++++++-------- website/src/components/ResizableDataTable.tsx | 105 ++- 3 files changed, 452 insertions(+), 312 deletions(-) diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml index 376cbc6a..73981d30 100644 --- a/.github/workflows/docker-ci.yml +++ b/.github/workflows/docker-ci.yml @@ -20,7 +20,11 @@ jobs: run: sed -i '/COPY .env/d' Dockerfile - name: Build Docker image - run: docker build -t docetl . + run: | + if ! docker build -t docetl .; then + echo "Docker build failed" + exit 1 + fi - name: Create Docker volume run: docker volume create docetl-data @@ -38,18 +42,37 @@ jobs: -e BACKEND_PORT=8000 \ --name docetl-test \ docetl - - # Wait for container to start up - sleep 240 + + # Wait for initial startup + echo "Waiting for container to start..." + sleep 30 + + # Check container health for up to 3 minutes + for i in {1..6}; do + if ! docker ps -q -f name=docetl-test > /dev/null 2>&1; then + echo "Container stopped unexpectedly" + docker logs docetl-test + exit 1 + fi + + # Try to curl the frontend + if curl -s -f http://localhost:3000 > /dev/null; then + echo "Frontend is responding" + break + fi + + if [ $i -eq 6 ]; then + echo "Container health check failed after 3 minutes" + docker logs docetl-test + exit 1 + fi + + echo "Waiting for services to be ready... (attempt $i/6)" + sleep 30 + done - # Check if container is still running - if [ "$(docker ps -q -f name=docetl-test)" ]; then - echo "Container is running successfully" - else - echo "Container failed to stay running" - docker logs docetl-test - exit 1 - fi + # If we get here, container is running and healthy + echo "Container is running successfully" # Cleanup docker stop docetl-test diff --git a/website/src/components/Output.tsx b/website/src/components/Output.tsx index b2e7609c..41ab2333 100644 --- a/website/src/components/Output.tsx +++ b/website/src/components/Output.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect, useMemo } from "react"; +import React, { useState, useEffect, useMemo, useCallback } from "react"; import { ColumnType } from "@/components/ResizableDataTable"; import ResizableDataTable from "@/components/ResizableDataTable"; import { usePipelineContext } from "@/contexts/PipelineContext"; @@ -24,6 +24,7 @@ import { Tooltip as RechartsTooltip, ResponsiveContainer, } from "recharts"; +import { memo } from "react"; const TinyPieChart: React.FC<{ percentage: number }> = ({ percentage }) => { const size = 16; @@ -66,264 +67,98 @@ const TinyPieChart: React.FC<{ percentage: number }> = ({ percentage }) => { ); }; -export const ConsoleContent: React.FC = () => { - const { terminalOutput, setTerminalOutput, optimizerProgress } = - usePipelineContext(); - const { readyState } = useWebSocket(); - - return ( -
- {optimizerProgress && ( -
- {/* Animated gradient border */} -
- - {/* Inner content container */} -
-
-
- {optimizerProgress.status} -
-
- {Math.round(optimizerProgress.progress * 100)}% -
-
-
-
-
- - {optimizerProgress.shouldOptimize && ( -
-
- -
-
- Optimizing because -
- -
-
-
- {optimizerProgress.rationale} -
-
- - {optimizerProgress.validatorPrompt && ( -
- -
-
- Using this prompt to evaluate the best plan -
- -
-
-
- {optimizerProgress.validatorPrompt} -
-
- )} -
- )} -
-
- )} - -
- -
-
+// Create a custom hook to find the operation only when needed +const useOperation = (operationId: string | undefined) => { + const { operations } = usePipelineContext(); + return useMemo( + () => operations.find((op) => op.id === operationId), + [operationId] // Only depend on the ID, not the operations array ); }; -export const Output: React.FC = () => { - const { output, isLoadingOutputs, operations } = usePipelineContext(); - const [outputs, setOutputs] = useState([]); - const [inputCount, setInputCount] = useState(0); - const [outputCount, setOutputCount] = useState(0); - - const [operation, setOperation] = useState(undefined); - const [opName, setOpName] = useState(undefined); - const [isResolveOrReduce, setIsResolveOrReduce] = useState(false); - - const [activeTab, setActiveTab] = useState("table"); - const { readyState } = useWebSocket(); - - useEffect(() => { - if (isLoadingOutputs) { - setActiveTab("console"); - } else { - setActiveTab("table"); - } - }, [isLoadingOutputs]); +// Update the useOutputContext to not include operations +const useOutputContext = () => { + const { + output, + isLoadingOutputs, + terminalOutput, + setTerminalOutput, + optimizerProgress, + } = usePipelineContext(); + + return { + output, + isLoadingOutputs, + terminalOutput, + setTerminalOutput, + optimizerProgress, + }; +}; - useEffect(() => { - const foundOperation = operations.find( - (op: Operation) => op.id === output?.operationId - ); - setOperation(foundOperation); - setOpName(foundOperation?.name); - setIsResolveOrReduce( - foundOperation?.type === "resolve" || foundOperation?.type === "reduce" +// First, move TableContent outside and give it a display name +const TableContent = memo( + ({ + opName, + isLoadingOutputs, + outputs, + operation, + columns, + }: { + opName: string | undefined; + isLoadingOutputs: boolean; + outputs: OutputRow[]; + operation: Operation | undefined; + columns: ColumnType[]; + }) => { + return ( +
+ {!opName ? ( +
+

No operation selected.

+
+ ) : isLoadingOutputs ? ( +
+ + + Loading outputs... + +
+ ) : outputs.length > 0 ? ( +
+ field.key) + : [] + } + startingRowHeight={180} + currentOperation={opName} + /> +
+ ) : ( +
+

No outputs available.

+
+ )} +
); - }, [operations, output]); - - useEffect(() => { - const fetchData = async () => { - if (output) { - const importantColumns = - operation?.otherKwargs?.prompts?.[0]?.output_keys; - try { - // Fetch output data - const outputResponse = await fetch( - `/api/readFile?path=${output.path}` - ); - if (!outputResponse.ok) { - throw new Error("Failed to fetch output file"); - } - const outputContent = await outputResponse.text(); - let parsedOutputs = JSON.parse(outputContent) as OutputRow[]; - setOutputCount(parsedOutputs.length); - - // Sort and reorder columns (existing logic) - if (parsedOutputs.length > 0) { - if ("date" in parsedOutputs[0]) { - parsedOutputs.sort((a, b) => { - const dateA = (a as OutputRow & { date?: string }).date; - const dateB = (b as OutputRow & { date?: string }).date; - if (dateA && dateB) { - return new Date(dateB).getTime() - new Date(dateA).getTime(); - } - return 0; - }); - } - - if (importantColumns && importantColumns.length > 0) { - parsedOutputs = parsedOutputs.map((row) => { - const orderedRow: OutputRow = {}; - importantColumns.forEach((col: string) => { - if (col in row) { - orderedRow[col] = row[col]; - } - }); - Object.keys(row).forEach((key) => { - if (!importantColumns.includes(key)) { - orderedRow[key] = row[key]; - } - }); - return orderedRow; - }); - } - } - - setOutputs(parsedOutputs); - - // Fetch input data if inputPath exists - if (output.inputPath) { - const inputResponse = await fetch( - `/api/readFile?path=${output.inputPath}` - ); - if (!inputResponse.ok) { - throw new Error("Failed to fetch input file"); - } - const inputContent = await inputResponse.text(); - const parsedInputs = JSON.parse(inputContent); - setInputCount( - Array.isArray(parsedInputs) ? parsedInputs.length : 1 - ); - } else { - setInputCount(0); - } - } catch (error) { - console.error("Error fetching data:", error); - } - } - }; - - fetchData(); - }, [output, isLoadingOutputs]); - - const columns: ColumnType[] = React.useMemo(() => { - const importantColumns = operation?.output?.schema - ? operation.output.schema.map((field) => field.key) - : []; - - return outputs.length > 0 - ? Object.keys(outputs[0]).map((key) => ({ - accessorKey: key, - header: key, - cell: ({ getValue }: { getValue: () => unknown }) => { - const value = getValue(); - const stringValue = - typeof value === "object" && value !== null - ? JSON.stringify(value, null, 2) - : String(value); - return ( -
-                {stringValue}
-              
- ); - }, - initialWidth: importantColumns?.includes(key) ? 300 : 150, - })) - : []; - }, [outputs, operation?.output?.schema]); - - const TableContent = () => ( -
- {!opName ? ( -
-

No operation selected.

-
- ) : isLoadingOutputs ? ( -
- - Loading outputs... -
- ) : outputs.length > 0 ? ( -
- field.key) - : [] - } - startingRowHeight={180} - currentOperation={opName} - /> -
- ) : ( -
-

No outputs available.

-
- )} -
- ); - - const VisualizeContent = () => { + } +); +TableContent.displayName = "TableContent"; + +// Move VisualizeContent outside +const VisualizeContent = memo( + ({ + opName, + outputs, + operation, + }: { + opName: string | undefined; + outputs: OutputRow[]; + operation: Operation | undefined; + }) => { const visualizationColumn = useMemo(() => { if (!opName) return null; const reduceColumnName = `_counts_prereduce_${opName}`; @@ -404,7 +239,7 @@ export const Output: React.FC = () => { const groupedData = useMemo(() => { const intersectionKeys = new Set( outputs.flatMap((row) => { - // @ts-expect-error Record type needs refinement + // @ts-expect-error - Record type needs refinement for kvPairs structure const kvPairs = row[visualizationColumn.name] as Record< string, unknown @@ -421,7 +256,7 @@ export const Output: React.FC = () => { } = {}; outputs.forEach((row) => { - // @ts-expect-error Record type needs refinement + // @ts-expect-error - Record type needs refinement for kvPairs structure const kvPairs = row[visualizationColumn.name] as Record< string, unknown @@ -482,7 +317,7 @@ export const Output: React.FC = () => { JSON.stringify(value, null, 2) ) ) - // @ts-expect-error + // @ts-expect-error - Record type needs refinement for kvPairs structure ).map((str) => JSON.parse(str)); // Calculate percentage of total documents @@ -540,9 +375,160 @@ export const Output: React.FC = () => {

); - }; + } +); +VisualizeContent.displayName = "VisualizeContent"; + +// Move ConsoleContent outside +export const ConsoleContent = memo(() => { + const { terminalOutput, setTerminalOutput, optimizerProgress } = + useOutputContext(); + const { readyState } = useWebSocket(); + + return ( +
+ {optimizerProgress && ( +
+ {/* Animated gradient border */} +
+ + {/* Inner content container */} +
+
+
+ {optimizerProgress.status} +
+
+ {Math.round(optimizerProgress.progress * 100)}% +
+
+
+
+
+ + {optimizerProgress.shouldOptimize && ( +
+
+ +
+
+ Optimizing because +
+ +
+
+
+ {optimizerProgress.rationale} +
+
+ + {optimizerProgress.validatorPrompt && ( +
+ +
+
+ Using this prompt to evaluate the best plan +
+ +
+
+
+ {optimizerProgress.validatorPrompt} +
+
+ )} +
+ )} +
+
+ )} + +
+ +
+
+ ); +}); +ConsoleContent.displayName = "ConsoleContent"; + +// Main Output component +export const Output = memo(() => { + const { output, isLoadingOutputs } = useOutputContext(); + const operation = useOperation(output?.operationId); + + const [outputs, setOutputs] = useState([]); + const [inputCount, setInputCount] = useState(0); + const [outputCount, setOutputCount] = useState(0); + + const [opName, setOpName] = useState(undefined); + const [isResolveOrReduce, setIsResolveOrReduce] = useState(false); + + const [activeTab, setActiveTab] = useState("table"); + const { readyState } = useWebSocket(); + + // Effect for operation updates + useEffect(() => { + setOpName(operation?.name); + setIsResolveOrReduce( + operation?.type === "resolve" || operation?.type === "reduce" + ); + }, [operation]); + + // Effect for tab changes + useEffect(() => { + setActiveTab(isLoadingOutputs ? "console" : "table"); + }, [isLoadingOutputs]); + + // Memoize columns + const columns = useMemo(() => { + const importantColumns = operation?.output?.schema + ? operation.output.schema.map((field) => field.key) + : []; + + return outputs.length > 0 + ? (Object.keys(outputs[0]).map((key) => ({ + accessorKey: key, + header: key, + cell: ({ getValue }: { getValue: () => unknown }) => { + const value = getValue(); + const stringValue = + typeof value === "object" && value !== null + ? JSON.stringify(value, null, 2) + : String(value); + return ( +
+                {stringValue}
+              
+ ); + }, + initialWidth: importantColumns?.includes(key) ? 300 : 150, + })) as ColumnType[]) + : []; + }, [outputs, operation?.output?.schema]); - const downloadCSV = () => { + // Memoize handlers + const downloadCSV = useCallback(() => { if (outputs.length === 0) return; try { @@ -563,10 +549,93 @@ export const Output: React.FC = () => { } catch (err) { console.error("Error converting to CSV:", err); } - }; + }, [outputs]); - const selectivityFactor = - inputCount > 0 ? (outputCount / inputCount).toFixed(2) : "N/A"; + const handleTabChange = useCallback((value: string) => { + setActiveTab(value); + }, []); + + // Memoize computed values + const selectivityFactor = useMemo( + () => (inputCount > 0 ? (outputCount / inputCount).toFixed(2) : "N/A"), + [inputCount, outputCount] + ); + + // Add back the data fetching effect + useEffect(() => { + const fetchData = async () => { + if (output) { + const importantColumns = + operation?.otherKwargs?.prompts?.[0]?.output_keys; + try { + // Fetch output data + const outputResponse = await fetch( + `/api/readFile?path=${output.path}` + ); + if (!outputResponse.ok) { + throw new Error("Failed to fetch output file"); + } + const outputContent = await outputResponse.text(); + let parsedOutputs = JSON.parse(outputContent) as OutputRow[]; + setOutputCount(parsedOutputs.length); + + // Sort and reorder columns (existing logic) + if (parsedOutputs.length > 0) { + if ("date" in parsedOutputs[0]) { + parsedOutputs.sort((a, b) => { + const dateA = (a as OutputRow & { date?: string }).date; + const dateB = (b as OutputRow & { date?: string }).date; + if (dateA && dateB) { + return new Date(dateB).getTime() - new Date(dateA).getTime(); + } + return 0; + }); + } + + if (importantColumns && importantColumns.length > 0) { + parsedOutputs = parsedOutputs.map((row) => { + const orderedRow: OutputRow = {}; + importantColumns.forEach((col: string) => { + if (col in row) { + orderedRow[col] = row[col]; + } + }); + Object.keys(row).forEach((key) => { + if (!importantColumns.includes(key)) { + orderedRow[key] = row[key]; + } + }); + return orderedRow; + }); + } + } + + setOutputs(parsedOutputs); + + // Fetch input data if inputPath exists + if (output.inputPath) { + const inputResponse = await fetch( + `/api/readFile?path=${output.inputPath}` + ); + if (!inputResponse.ok) { + throw new Error("Failed to fetch input file"); + } + const inputContent = await inputResponse.text(); + const parsedInputs = JSON.parse(inputContent); + setInputCount( + Array.isArray(parsedInputs) ? parsedInputs.length : 1 + ); + } else { + setInputCount(0); + } + } catch (error) { + console.error("Error fetching data:", error); + } + } + }; + + fetchData(); + }, [output, operation?.otherKwargs?.prompts, isLoadingOutputs]); return (
@@ -662,7 +731,7 @@ export const Output: React.FC = () => {
@@ -687,7 +756,13 @@ export const Output: React.FC = () => { value="table" className="h-full data-[state=active]:flex flex-col" > - + { value="visualize" className="h-full data-[state=active]:flex flex-col" > - +
); -}; +}); +Output.displayName = "Output"; diff --git a/website/src/components/ResizableDataTable.tsx b/website/src/components/ResizableDataTable.tsx index 35441e9d..2393abef 100644 --- a/website/src/components/ResizableDataTable.tsx +++ b/website/src/components/ResizableDataTable.tsx @@ -4,6 +4,7 @@ import React, { useCallback, useMemo, useRef, + memo, } from "react"; import { flexRender, @@ -59,9 +60,12 @@ import { ColumnDialog } from "@/components/ColumnDialog"; import { SearchableCell } from "@/components/SearchableCell"; import { PrettyJSON } from "@/components/PrettyJSON"; export type DataType = Record; -export type ColumnType = ColumnDef & { +export type ColumnType = { + accessorKey: string; + header: string; + cell?: ({ getValue }: { getValue: () => unknown }) => React.ReactNode; initialWidth?: number; - accessorKey?: string; + id?: string; }; interface ColumnStats { @@ -247,21 +251,37 @@ function calculateColumnStats( }; } -const WordCountHistogram = React.memo( +const truncateString = (str: string, maxLength: number = 20) => { + if (str.length <= maxLength) return str; + return str.slice(0, maxLength) + "..."; +}; + +const WordCountHistogram = memo( ({ histogramData, }: { histogramData: { range: string; count: number; fullRange: string }[]; }) => { - // Calculate total count for fractions + // Memoize total count calculation const totalCount = useMemo( () => histogramData.reduce((sum, item) => sum + item.count, 0), [histogramData] ); + // Memoize truncated data + const truncatedData = useMemo( + () => + histogramData.map((item) => ({ + ...item, + range: truncateString(item.range, 10), + fullRange: item.fullRange, + })), + [histogramData] + ); + return ( - + label} + labelFormatter={(_, payload) => + payload[0]?.payload?.fullRange || "" + } contentStyle={{ backgroundColor: "hsl(var(--popover))", border: "1px solid hsl(var(--border))", @@ -298,25 +320,36 @@ const WordCountHistogram = React.memo( ); - } + }, + // Deep comparison for histogramData + (prevProps, nextProps) => + JSON.stringify(prevProps.histogramData) === + JSON.stringify(nextProps.histogramData) ); WordCountHistogram.displayName = "WordCountHistogram"; -const CategoricalBarChart = React.memo( +const CategoricalBarChart = memo( ({ data }: { data: { value: string; count: number }[] }) => { + // Memoize total count calculation const totalCount = useMemo( () => data.reduce((sum, item) => sum + item.count, 0), [data] ); - // Take top 10 values for visualization - const displayData = data.slice(0, 10); + // Memoize truncated and limited data + const displayData = useMemo(() => { + return data.slice(0, 10).map((item) => ({ + ...item, + displayValue: truncateString(item.value, 10), + fullValue: item.value, + })); + }, [data]); return ( label} + labelFormatter={(_, payload) => + payload[0]?.payload?.fullValue || "" + } contentStyle={{ backgroundColor: "hsl(var(--popover))", border: "1px solid hsl(var(--border))", @@ -351,7 +386,10 @@ const CategoricalBarChart = React.memo( ); - } + }, + // Deep comparison for data + (prevProps, nextProps) => + JSON.stringify(prevProps.data) === JSON.stringify(nextProps.data) ); CategoricalBarChart.displayName = "CategoricalBarChart"; @@ -366,7 +404,7 @@ interface ColumnHeaderProps { onExpand: () => void; } -const ColumnHeader = React.memo( +const ColumnHeader = memo( ({ header, stats, @@ -460,7 +498,7 @@ const ColumnHeader = React.memo( strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" - className="text-muted-foreground" + className="text-primary" > @@ -479,6 +517,7 @@ const ColumnHeader = React.memo( strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" + className="text-primary" > @@ -495,6 +534,7 @@ const ColumnHeader = React.memo( strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" + className="text-primary" > @@ -507,7 +547,7 @@ const ColumnHeader = React.memo( className="h-6 w-6 p-0" onClick={onExpand} > - +
{header} @@ -579,11 +619,7 @@ const ColumnHeader = React.memo( ); ColumnHeader.displayName = "ColumnHeader"; -const ColumnResizer = ({ - header, -}: { - header: Header; -}) => { +const ColumnResizer = memo(({ header }: { header: Header }) => { return (
({ }} /> ); -}; +}); +ColumnResizer.displayName = "ColumnResizer"; interface ResizableRow extends Row { getSize: () => number; setSize: (size: number) => void; } -const RowResizer = ({ row }: { row: ResizableRow }) => { +const RowResizer = memo(({ row }: { row: ResizableRow }) => { return ( @@ -634,21 +671,22 @@ const RowResizer = ({ row }: { row: ResizableRow }) => { ); -}; +}); +RowResizer.displayName = "RowResizer"; -interface ResizableDataTableProps { +interface ResizableDataTableProps> { data: T[]; columns: ColumnType[]; - boldedColumns: string[]; + boldedColumns?: string[]; startingRowHeight?: number; - currentOperation: string; + currentOperation?: string; } interface MarkdownCellProps { content: string; } -const MarkdownCell = React.memo(({ content }: MarkdownCellProps) => { +const MarkdownCell = memo(({ content }: MarkdownCellProps) => { return ( { // Only show observability data for the current operation const observabilityEntries = Object.entries(row).filter( @@ -797,11 +835,11 @@ const createSortingFns = ( }, }); -function ResizableDataTable({ +export default function ResizableDataTable>({ data, columns, - boldedColumns, - startingRowHeight = 60, + boldedColumns = [], + startingRowHeight = 40, currentOperation, }: ResizableDataTableProps) { const [columnSizing, setColumnSizing] = useState(() => { @@ -920,6 +958,7 @@ function ResizableDataTable({ }) .map((col) => ({ ...col, + id: col.accessorKey, enableSorting: true, filterFn: fuzzyFilter, sortingFn: (rowA: Row, rowB: Row) => { @@ -1305,5 +1344,3 @@ function ResizableDataTable({
); } - -export default ResizableDataTable;