From eabefacd0f79e07d98e7d899a22287979e61b39f Mon Sep 17 00:00:00 2001
From: Shreya Shankar
Date: Sat, 30 Nov 2024 14:09:23 -0600
Subject: [PATCH] fix: make histogram calculation and rendering less blocking
(#218)
---
.github/workflows/docker-ci.yml | 47 +-
website/src/components/Output.tsx | 612 ++++++++++--------
website/src/components/ResizableDataTable.tsx | 105 ++-
3 files changed, 452 insertions(+), 312 deletions(-)
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
index 376cbc6a..73981d30 100644
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -20,7 +20,11 @@ jobs:
run: sed -i '/COPY .env/d' Dockerfile
- name: Build Docker image
- run: docker build -t docetl .
+ run: |
+ if ! docker build -t docetl .; then
+ echo "Docker build failed"
+ exit 1
+ fi
- name: Create Docker volume
run: docker volume create docetl-data
@@ -38,18 +42,37 @@ jobs:
-e BACKEND_PORT=8000 \
--name docetl-test \
docetl
-
- # Wait for container to start up
- sleep 240
+
+ # Wait for initial startup
+ echo "Waiting for container to start..."
+ sleep 30
+
+ # Check container health for up to 3 minutes
+ for i in {1..6}; do
+ if ! docker ps -q -f name=docetl-test > /dev/null 2>&1; then
+ echo "Container stopped unexpectedly"
+ docker logs docetl-test
+ exit 1
+ fi
+
+ # Try to curl the frontend
+ if curl -s -f http://localhost:3000 > /dev/null; then
+ echo "Frontend is responding"
+ break
+ fi
+
+ if [ $i -eq 6 ]; then
+ echo "Container health check failed after 3 minutes"
+ docker logs docetl-test
+ exit 1
+ fi
+
+ echo "Waiting for services to be ready... (attempt $i/6)"
+ sleep 30
+ done
- # Check if container is still running
- if [ "$(docker ps -q -f name=docetl-test)" ]; then
- echo "Container is running successfully"
- else
- echo "Container failed to stay running"
- docker logs docetl-test
- exit 1
- fi
+ # If we get here, container is running and healthy
+ echo "Container is running successfully"
# Cleanup
docker stop docetl-test
diff --git a/website/src/components/Output.tsx b/website/src/components/Output.tsx
index b2e7609c..41ab2333 100644
--- a/website/src/components/Output.tsx
+++ b/website/src/components/Output.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useEffect, useMemo } from "react";
+import React, { useState, useEffect, useMemo, useCallback } from "react";
import { ColumnType } from "@/components/ResizableDataTable";
import ResizableDataTable from "@/components/ResizableDataTable";
import { usePipelineContext } from "@/contexts/PipelineContext";
@@ -24,6 +24,7 @@ import {
Tooltip as RechartsTooltip,
ResponsiveContainer,
} from "recharts";
+import { memo } from "react";
const TinyPieChart: React.FC<{ percentage: number }> = ({ percentage }) => {
const size = 16;
@@ -66,264 +67,98 @@ const TinyPieChart: React.FC<{ percentage: number }> = ({ percentage }) => {
);
};
-export const ConsoleContent: React.FC = () => {
- const { terminalOutput, setTerminalOutput, optimizerProgress } =
- usePipelineContext();
- const { readyState } = useWebSocket();
-
- return (
-
- {optimizerProgress && (
-
- {/* Animated gradient border */}
-
-
- {/* Inner content container */}
-
-
-
- {optimizerProgress.status}
-
-
- {Math.round(optimizerProgress.progress * 100)}%
-
-
-
-
- {optimizerProgress.shouldOptimize && (
-
-
-
-
-
- Optimizing because
-
-
-
-
-
- {optimizerProgress.rationale}
-
-
-
- {optimizerProgress.validatorPrompt && (
-
-
-
-
- Using this prompt to evaluate the best plan
-
-
-
-
-
- {optimizerProgress.validatorPrompt}
-
-
- )}
-
- )}
-
-
- )}
-
-
-
+// Create a custom hook to find the operation only when needed
+const useOperation = (operationId: string | undefined) => {
+ const { operations } = usePipelineContext();
+ return useMemo(
+ () => operations.find((op) => op.id === operationId),
+ [operationId] // Only depend on the ID, not the operations array
);
};
-export const Output: React.FC = () => {
- const { output, isLoadingOutputs, operations } = usePipelineContext();
- const [outputs, setOutputs] = useState([]);
- const [inputCount, setInputCount] = useState(0);
- const [outputCount, setOutputCount] = useState(0);
-
- const [operation, setOperation] = useState(undefined);
- const [opName, setOpName] = useState(undefined);
- const [isResolveOrReduce, setIsResolveOrReduce] = useState(false);
-
- const [activeTab, setActiveTab] = useState("table");
- const { readyState } = useWebSocket();
-
- useEffect(() => {
- if (isLoadingOutputs) {
- setActiveTab("console");
- } else {
- setActiveTab("table");
- }
- }, [isLoadingOutputs]);
+// Update the useOutputContext to not include operations
+const useOutputContext = () => {
+ const {
+ output,
+ isLoadingOutputs,
+ terminalOutput,
+ setTerminalOutput,
+ optimizerProgress,
+ } = usePipelineContext();
+
+ return {
+ output,
+ isLoadingOutputs,
+ terminalOutput,
+ setTerminalOutput,
+ optimizerProgress,
+ };
+};
- useEffect(() => {
- const foundOperation = operations.find(
- (op: Operation) => op.id === output?.operationId
- );
- setOperation(foundOperation);
- setOpName(foundOperation?.name);
- setIsResolveOrReduce(
- foundOperation?.type === "resolve" || foundOperation?.type === "reduce"
+// First, move TableContent outside and give it a display name
+const TableContent = memo(
+ ({
+ opName,
+ isLoadingOutputs,
+ outputs,
+ operation,
+ columns,
+ }: {
+ opName: string | undefined;
+ isLoadingOutputs: boolean;
+ outputs: OutputRow[];
+ operation: Operation | undefined;
+ columns: ColumnType[];
+ }) => {
+ return (
+
+ {!opName ? (
+
+
No operation selected.
+
+ ) : isLoadingOutputs ? (
+
+
+
+ Loading outputs...
+
+
+ ) : outputs.length > 0 ? (
+
+ field.key)
+ : []
+ }
+ startingRowHeight={180}
+ currentOperation={opName}
+ />
+
+ ) : (
+
+
No outputs available.
+
+ )}
+
);
- }, [operations, output]);
-
- useEffect(() => {
- const fetchData = async () => {
- if (output) {
- const importantColumns =
- operation?.otherKwargs?.prompts?.[0]?.output_keys;
- try {
- // Fetch output data
- const outputResponse = await fetch(
- `/api/readFile?path=${output.path}`
- );
- if (!outputResponse.ok) {
- throw new Error("Failed to fetch output file");
- }
- const outputContent = await outputResponse.text();
- let parsedOutputs = JSON.parse(outputContent) as OutputRow[];
- setOutputCount(parsedOutputs.length);
-
- // Sort and reorder columns (existing logic)
- if (parsedOutputs.length > 0) {
- if ("date" in parsedOutputs[0]) {
- parsedOutputs.sort((a, b) => {
- const dateA = (a as OutputRow & { date?: string }).date;
- const dateB = (b as OutputRow & { date?: string }).date;
- if (dateA && dateB) {
- return new Date(dateB).getTime() - new Date(dateA).getTime();
- }
- return 0;
- });
- }
-
- if (importantColumns && importantColumns.length > 0) {
- parsedOutputs = parsedOutputs.map((row) => {
- const orderedRow: OutputRow = {};
- importantColumns.forEach((col: string) => {
- if (col in row) {
- orderedRow[col] = row[col];
- }
- });
- Object.keys(row).forEach((key) => {
- if (!importantColumns.includes(key)) {
- orderedRow[key] = row[key];
- }
- });
- return orderedRow;
- });
- }
- }
-
- setOutputs(parsedOutputs);
-
- // Fetch input data if inputPath exists
- if (output.inputPath) {
- const inputResponse = await fetch(
- `/api/readFile?path=${output.inputPath}`
- );
- if (!inputResponse.ok) {
- throw new Error("Failed to fetch input file");
- }
- const inputContent = await inputResponse.text();
- const parsedInputs = JSON.parse(inputContent);
- setInputCount(
- Array.isArray(parsedInputs) ? parsedInputs.length : 1
- );
- } else {
- setInputCount(0);
- }
- } catch (error) {
- console.error("Error fetching data:", error);
- }
- }
- };
-
- fetchData();
- }, [output, isLoadingOutputs]);
-
- const columns: ColumnType[] = React.useMemo(() => {
- const importantColumns = operation?.output?.schema
- ? operation.output.schema.map((field) => field.key)
- : [];
-
- return outputs.length > 0
- ? Object.keys(outputs[0]).map((key) => ({
- accessorKey: key,
- header: key,
- cell: ({ getValue }: { getValue: () => unknown }) => {
- const value = getValue();
- const stringValue =
- typeof value === "object" && value !== null
- ? JSON.stringify(value, null, 2)
- : String(value);
- return (
-
- {stringValue}
-
- );
- },
- initialWidth: importantColumns?.includes(key) ? 300 : 150,
- }))
- : [];
- }, [outputs, operation?.output?.schema]);
-
- const TableContent = () => (
-
- {!opName ? (
-
-
No operation selected.
-
- ) : isLoadingOutputs ? (
-
-
- Loading outputs...
-
- ) : outputs.length > 0 ? (
-
- field.key)
- : []
- }
- startingRowHeight={180}
- currentOperation={opName}
- />
-
- ) : (
-
-
No outputs available.
-
- )}
-
- );
-
- const VisualizeContent = () => {
+ }
+);
+TableContent.displayName = "TableContent";
+
+// Move VisualizeContent outside
+const VisualizeContent = memo(
+ ({
+ opName,
+ outputs,
+ operation,
+ }: {
+ opName: string | undefined;
+ outputs: OutputRow[];
+ operation: Operation | undefined;
+ }) => {
const visualizationColumn = useMemo(() => {
if (!opName) return null;
const reduceColumnName = `_counts_prereduce_${opName}`;
@@ -404,7 +239,7 @@ export const Output: React.FC = () => {
const groupedData = useMemo(() => {
const intersectionKeys = new Set(
outputs.flatMap((row) => {
- // @ts-expect-error Record type needs refinement
+ // @ts-expect-error - Record type needs refinement for kvPairs structure
const kvPairs = row[visualizationColumn.name] as Record<
string,
unknown
@@ -421,7 +256,7 @@ export const Output: React.FC = () => {
} = {};
outputs.forEach((row) => {
- // @ts-expect-error Record type needs refinement
+ // @ts-expect-error - Record type needs refinement for kvPairs structure
const kvPairs = row[visualizationColumn.name] as Record<
string,
unknown
@@ -482,7 +317,7 @@ export const Output: React.FC = () => {
JSON.stringify(value, null, 2)
)
)
- // @ts-expect-error
+ // @ts-expect-error - Record type needs refinement for kvPairs structure
).map((str) => JSON.parse(str));
// Calculate percentage of total documents
@@ -540,9 +375,160 @@ export const Output: React.FC = () => {
);
- };
+ }
+);
+VisualizeContent.displayName = "VisualizeContent";
+
+// Move ConsoleContent outside
+export const ConsoleContent = memo(() => {
+ const { terminalOutput, setTerminalOutput, optimizerProgress } =
+ useOutputContext();
+ const { readyState } = useWebSocket();
+
+ return (
+
+ {optimizerProgress && (
+
+ {/* Animated gradient border */}
+
+
+ {/* Inner content container */}
+
+
+
+ {optimizerProgress.status}
+
+
+ {Math.round(optimizerProgress.progress * 100)}%
+
+
+
+
+ {optimizerProgress.shouldOptimize && (
+
+
+
+
+
+ Optimizing because
+
+
+
+
+
+ {optimizerProgress.rationale}
+
+
+
+ {optimizerProgress.validatorPrompt && (
+
+
+
+
+ Using this prompt to evaluate the best plan
+
+
+
+
+
+ {optimizerProgress.validatorPrompt}
+
+
+ )}
+
+ )}
+
+
+ )}
+
+
+
+ );
+});
+ConsoleContent.displayName = "ConsoleContent";
+
+// Main Output component
+export const Output = memo(() => {
+ const { output, isLoadingOutputs } = useOutputContext();
+ const operation = useOperation(output?.operationId);
+
+ const [outputs, setOutputs] = useState([]);
+ const [inputCount, setInputCount] = useState(0);
+ const [outputCount, setOutputCount] = useState(0);
+
+ const [opName, setOpName] = useState(undefined);
+ const [isResolveOrReduce, setIsResolveOrReduce] = useState(false);
+
+ const [activeTab, setActiveTab] = useState("table");
+ const { readyState } = useWebSocket();
+
+ // Effect for operation updates
+ useEffect(() => {
+ setOpName(operation?.name);
+ setIsResolveOrReduce(
+ operation?.type === "resolve" || operation?.type === "reduce"
+ );
+ }, [operation]);
+
+ // Effect for tab changes
+ useEffect(() => {
+ setActiveTab(isLoadingOutputs ? "console" : "table");
+ }, [isLoadingOutputs]);
+
+ // Memoize columns
+ const columns = useMemo(() => {
+ const importantColumns = operation?.output?.schema
+ ? operation.output.schema.map((field) => field.key)
+ : [];
+
+ return outputs.length > 0
+ ? (Object.keys(outputs[0]).map((key) => ({
+ accessorKey: key,
+ header: key,
+ cell: ({ getValue }: { getValue: () => unknown }) => {
+ const value = getValue();
+ const stringValue =
+ typeof value === "object" && value !== null
+ ? JSON.stringify(value, null, 2)
+ : String(value);
+ return (
+
+ {stringValue}
+
+ );
+ },
+ initialWidth: importantColumns?.includes(key) ? 300 : 150,
+ })) as ColumnType[])
+ : [];
+ }, [outputs, operation?.output?.schema]);
- const downloadCSV = () => {
+ // Memoize handlers
+ const downloadCSV = useCallback(() => {
if (outputs.length === 0) return;
try {
@@ -563,10 +549,93 @@ export const Output: React.FC = () => {
} catch (err) {
console.error("Error converting to CSV:", err);
}
- };
+ }, [outputs]);
- const selectivityFactor =
- inputCount > 0 ? (outputCount / inputCount).toFixed(2) : "N/A";
+ const handleTabChange = useCallback((value: string) => {
+ setActiveTab(value);
+ }, []);
+
+ // Memoize computed values
+ const selectivityFactor = useMemo(
+ () => (inputCount > 0 ? (outputCount / inputCount).toFixed(2) : "N/A"),
+ [inputCount, outputCount]
+ );
+
+ // Add back the data fetching effect
+ useEffect(() => {
+ const fetchData = async () => {
+ if (output) {
+ const importantColumns =
+ operation?.otherKwargs?.prompts?.[0]?.output_keys;
+ try {
+ // Fetch output data
+ const outputResponse = await fetch(
+ `/api/readFile?path=${output.path}`
+ );
+ if (!outputResponse.ok) {
+ throw new Error("Failed to fetch output file");
+ }
+ const outputContent = await outputResponse.text();
+ let parsedOutputs = JSON.parse(outputContent) as OutputRow[];
+ setOutputCount(parsedOutputs.length);
+
+ // Sort and reorder columns (existing logic)
+ if (parsedOutputs.length > 0) {
+ if ("date" in parsedOutputs[0]) {
+ parsedOutputs.sort((a, b) => {
+ const dateA = (a as OutputRow & { date?: string }).date;
+ const dateB = (b as OutputRow & { date?: string }).date;
+ if (dateA && dateB) {
+ return new Date(dateB).getTime() - new Date(dateA).getTime();
+ }
+ return 0;
+ });
+ }
+
+ if (importantColumns && importantColumns.length > 0) {
+ parsedOutputs = parsedOutputs.map((row) => {
+ const orderedRow: OutputRow = {};
+ importantColumns.forEach((col: string) => {
+ if (col in row) {
+ orderedRow[col] = row[col];
+ }
+ });
+ Object.keys(row).forEach((key) => {
+ if (!importantColumns.includes(key)) {
+ orderedRow[key] = row[key];
+ }
+ });
+ return orderedRow;
+ });
+ }
+ }
+
+ setOutputs(parsedOutputs);
+
+ // Fetch input data if inputPath exists
+ if (output.inputPath) {
+ const inputResponse = await fetch(
+ `/api/readFile?path=${output.inputPath}`
+ );
+ if (!inputResponse.ok) {
+ throw new Error("Failed to fetch input file");
+ }
+ const inputContent = await inputResponse.text();
+ const parsedInputs = JSON.parse(inputContent);
+ setInputCount(
+ Array.isArray(parsedInputs) ? parsedInputs.length : 1
+ );
+ } else {
+ setInputCount(0);
+ }
+ } catch (error) {
+ console.error("Error fetching data:", error);
+ }
+ }
+ };
+
+ fetchData();
+ }, [output, operation?.otherKwargs?.prompts, isLoadingOutputs]);
return (
@@ -662,7 +731,7 @@ export const Output: React.FC = () => {
@@ -687,7 +756,13 @@ export const Output: React.FC = () => {
value="table"
className="h-full data-[state=active]:flex flex-col"
>
-
+
{
value="visualize"
className="h-full data-[state=active]:flex flex-col"
>
-
+
);
-};
+});
+Output.displayName = "Output";
diff --git a/website/src/components/ResizableDataTable.tsx b/website/src/components/ResizableDataTable.tsx
index 35441e9d..2393abef 100644
--- a/website/src/components/ResizableDataTable.tsx
+++ b/website/src/components/ResizableDataTable.tsx
@@ -4,6 +4,7 @@ import React, {
useCallback,
useMemo,
useRef,
+ memo,
} from "react";
import {
flexRender,
@@ -59,9 +60,12 @@ import { ColumnDialog } from "@/components/ColumnDialog";
import { SearchableCell } from "@/components/SearchableCell";
import { PrettyJSON } from "@/components/PrettyJSON";
export type DataType = Record;
-export type ColumnType = ColumnDef & {
+export type ColumnType = {
+ accessorKey: string;
+ header: string;
+ cell?: ({ getValue }: { getValue: () => unknown }) => React.ReactNode;
initialWidth?: number;
- accessorKey?: string;
+ id?: string;
};
interface ColumnStats {
@@ -247,21 +251,37 @@ function calculateColumnStats(
};
}
-const WordCountHistogram = React.memo(
+const truncateString = (str: string, maxLength: number = 20) => {
+ if (str.length <= maxLength) return str;
+ return str.slice(0, maxLength) + "...";
+};
+
+const WordCountHistogram = memo(
({
histogramData,
}: {
histogramData: { range: string; count: number; fullRange: string }[];
}) => {
- // Calculate total count for fractions
+ // Memoize total count calculation
const totalCount = useMemo(
() => histogramData.reduce((sum, item) => sum + item.count, 0),
[histogramData]
);
+ // Memoize truncated data
+ const truncatedData = useMemo(
+ () =>
+ histogramData.map((item) => ({
+ ...item,
+ range: truncateString(item.range, 10),
+ fullRange: item.fullRange,
+ })),
+ [histogramData]
+ );
+
return (
-
+
label}
+ labelFormatter={(_, payload) =>
+ payload[0]?.payload?.fullRange || ""
+ }
contentStyle={{
backgroundColor: "hsl(var(--popover))",
border: "1px solid hsl(var(--border))",
@@ -298,25 +320,36 @@ const WordCountHistogram = React.memo(
);
- }
+ },
+ // Deep comparison for histogramData
+ (prevProps, nextProps) =>
+ JSON.stringify(prevProps.histogramData) ===
+ JSON.stringify(nextProps.histogramData)
);
WordCountHistogram.displayName = "WordCountHistogram";
-const CategoricalBarChart = React.memo(
+const CategoricalBarChart = memo(
({ data }: { data: { value: string; count: number }[] }) => {
+ // Memoize total count calculation
const totalCount = useMemo(
() => data.reduce((sum, item) => sum + item.count, 0),
[data]
);
- // Take top 10 values for visualization
- const displayData = data.slice(0, 10);
+ // Memoize truncated and limited data
+ const displayData = useMemo(() => {
+ return data.slice(0, 10).map((item) => ({
+ ...item,
+ displayValue: truncateString(item.value, 10),
+ fullValue: item.value,
+ }));
+ }, [data]);
return (
label}
+ labelFormatter={(_, payload) =>
+ payload[0]?.payload?.fullValue || ""
+ }
contentStyle={{
backgroundColor: "hsl(var(--popover))",
border: "1px solid hsl(var(--border))",
@@ -351,7 +386,10 @@ const CategoricalBarChart = React.memo(
);
- }
+ },
+ // Deep comparison for data
+ (prevProps, nextProps) =>
+ JSON.stringify(prevProps.data) === JSON.stringify(nextProps.data)
);
CategoricalBarChart.displayName = "CategoricalBarChart";
@@ -366,7 +404,7 @@ interface ColumnHeaderProps {
onExpand: () => void;
}
-const ColumnHeader = React.memo(
+const ColumnHeader = memo(
({
header,
stats,
@@ -460,7 +498,7 @@ const ColumnHeader = React.memo(
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
- className="text-muted-foreground"
+ className="text-primary"
>
@@ -479,6 +517,7 @@ const ColumnHeader = React.memo(
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
+ className="text-primary"
>
@@ -495,6 +534,7 @@ const ColumnHeader = React.memo(
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
+ className="text-primary"
>
@@ -507,7 +547,7 @@ const ColumnHeader = React.memo(
className="h-6 w-6 p-0"
onClick={onExpand}
>
-
+
{header}
@@ -579,11 +619,7 @@ const ColumnHeader = React.memo(
);
ColumnHeader.displayName = "ColumnHeader";
-const ColumnResizer = ({
- header,
-}: {
- header: Header;
-}) => {
+const ColumnResizer = memo(({ header }: { header: Header }) => {
return (
({
}}
/>
);
-};
+});
+ColumnResizer.displayName = "ColumnResizer";
interface ResizableRow
extends Row {
getSize: () => number;
setSize: (size: number) => void;
}
-const RowResizer = ({ row }: { row: ResizableRow }) => {
+const RowResizer = memo(({ row }: { row: ResizableRow }) => {
return (
@@ -634,21 +671,22 @@ const RowResizer = ({ row }: { row: ResizableRow }) => {
|
);
-};
+});
+RowResizer.displayName = "RowResizer";
-interface ResizableDataTableProps {
+interface ResizableDataTableProps> {
data: T[];
columns: ColumnType[];
- boldedColumns: string[];
+ boldedColumns?: string[];
startingRowHeight?: number;
- currentOperation: string;
+ currentOperation?: string;
}
interface MarkdownCellProps {
content: string;
}
-const MarkdownCell = React.memo(({ content }: MarkdownCellProps) => {
+const MarkdownCell = memo(({ content }: MarkdownCellProps) => {
return (
{
// Only show observability data for the current operation
const observabilityEntries = Object.entries(row).filter(
@@ -797,11 +835,11 @@ const createSortingFns = (
},
});
-function ResizableDataTable({
+export default function ResizableDataTable>({
data,
columns,
- boldedColumns,
- startingRowHeight = 60,
+ boldedColumns = [],
+ startingRowHeight = 40,
currentOperation,
}: ResizableDataTableProps) {
const [columnSizing, setColumnSizing] = useState(() => {
@@ -920,6 +958,7 @@ function ResizableDataTable({
})
.map((col) => ({
...col,
+ id: col.accessorKey,
enableSorting: true,
filterFn: fuzzyFilter,
sortingFn: (rowA: Row, rowB: Row) => {
@@ -1305,5 +1344,3 @@ function ResizableDataTable({
);
}
-
-export default ResizableDataTable;