From 4212e83692f51dde6ad58146d078da6460cb67f7 Mon Sep 17 00:00:00 2001 From: Shreya Shankar Date: Wed, 13 Nov 2024 23:49:59 -0800 Subject: [PATCH 01/36] feat: tie histograms to output types --- website/src/app/localStorageKeys.ts | 1 + website/src/components/AIChatPanel.tsx | 18 ++- website/src/components/PipelineGui.tsx | 26 +++- website/src/components/ResizableDataTable.tsx | 136 +++++++++++++----- website/src/contexts/PipelineContext.tsx | 17 ++- 5 files changed, 159 insertions(+), 39 deletions(-) diff --git a/website/src/app/localStorageKeys.ts b/website/src/app/localStorageKeys.ts index bc6016b5..fb6748cc 100644 --- a/website/src/app/localStorageKeys.ts +++ b/website/src/app/localStorageKeys.ts @@ -15,3 +15,4 @@ export const COST_KEY = "docetl_cost"; export const DEFAULT_MODEL_KEY = "docetl_defaultModel"; export const OPTIMIZER_MODEL_KEY = "docetl_optimizerModel"; export const AUTO_OPTIMIZE_CHECK_KEY = "docetl_autoOptimizeCheck"; +export const HIGH_LEVEL_GOAL_KEY = "docetl_highLevelGoal"; diff --git a/website/src/components/AIChatPanel.tsx b/website/src/components/AIChatPanel.tsx index 2203b57b..8f8a79b6 100644 --- a/website/src/components/AIChatPanel.tsx +++ b/website/src/components/AIChatPanel.tsx @@ -104,7 +104,7 @@ const AIChatPanel: React.FC = ({ onClose }) => { content: `You are the DocETL assistant, helping users build and refine data analysis pipelines. You are an expert at data analysis. Core Capabilities: -- DocETL enables users to create sophisticated data processing workflows combining LLMs with traditional data operations +- DocETL enables users to create sophisticated data processing workflows with LLM calls, like crowdsourcing pipelines - Each pipeline processes documents through a sequence of operations - Operations can be LLM-based (map, reduce, resolve, filter) or utility-based (unnest, split, gather, sample) @@ -115,7 +115,7 @@ Operation Details: - Operation-specific templating: - Map/Filter: Access current doc with '{{ input.keyname }}' - Reduce: Loop through docs with '{% for doc in inputs %}...{% endfor %}' - - Resolve: Compare docs with '{{ input1 }}/{{ input2 }}' and canonicalize with 'inputs' + - Resolve: Compare docs with '{{ input1 }}/{{ input2 }}' and canonicalize with '{{ inputs }}' Your Role: - Help users optimize pipelines and overcome challenges @@ -132,8 +132,20 @@ Best Practices: - Be specific, never vague or general - Be concise, don't repeat yourself +When Reviewing Outputs: +- All the output fields have been converted to strings, even if they were originally numbers, arrays, or other types. So NEVER COMMENT ON TYPES. +- Actively analyze outputs for discrepancies in structure across the outputs, edge cases, and quality issues. +- For discrepancies, describe how to standardize them. +- Identify where outputs may not fully satisfy the intended goals +- Never simply restate or summarize outputs - provide critical analysis +- Provide 1 suggestion at a time + +Remember, you are only helping the user discover their analysis goal, and only suggest improvements that LLMs or crowd workers are capable of. + Here's their current pipeline state: -${pipelineState}`, +${pipelineState} + +Remember, all the output fields have been converted to strings, even if they were originally numbers, arrays, or other types. So NEVER COMMENT ON TYPES. Steer the user towards their high-level goal, if specified.`, }, ...messages.filter((m) => m.role !== "system"), ]); diff --git a/website/src/components/PipelineGui.tsx b/website/src/components/PipelineGui.tsx index c3b201d4..8af768d9 100644 --- a/website/src/components/PipelineGui.tsx +++ b/website/src/components/PipelineGui.tsx @@ -57,6 +57,7 @@ import { v4 as uuidv4 } from "uuid"; import { useOptimizeCheck } from "@/hooks/useOptimizeCheck"; import { canBeOptimized } from "@/lib/utils"; import { Switch } from "./ui/switch"; +import { Textarea } from "./ui/textarea"; const PipelineGUI: React.FC = () => { const fileInputRef = useRef(null); @@ -85,12 +86,15 @@ const PipelineGUI: React.FC = () => { setOptimizerProgress, autoOptimizeCheck, setAutoOptimizeCheck, + highLevelGoal, + setHighLevelGoal, } = usePipelineContext(); const [isSettingsOpen, setIsSettingsOpen] = useState(false); const [tempPipelineName, setTempPipelineName] = useState(pipelineName); const [tempAutoOptimizeCheck, setTempAutoOptimizeCheck] = useState(autoOptimizeCheck); const [tempOptimizerModel, setTempOptimizerModel] = useState(optimizerModel); + const [tempHighLevelGoal, setTempHighLevelGoal] = useState(highLevelGoal); const [tempSampleSize, setTempSampleSize] = useState( sampleSize?.toString() || "" ); @@ -264,10 +268,16 @@ const PipelineGUI: React.FC = () => { useEffect(() => { if (optimizerModel) { - setTempDefaultModel(tempOptimizerModel); + setTempOptimizerModel(optimizerModel); } }, [optimizerModel]); + useEffect(() => { + if (highLevelGoal) { + setTempHighLevelGoal(highLevelGoal); + } + }, [highLevelGoal]); + const handleFileUpload = async ( event: React.ChangeEvent ) => { @@ -539,6 +549,7 @@ const PipelineGUI: React.FC = () => { setIsSettingsOpen(false); setOptimizerModel(tempOptimizerModel); setAutoOptimizeCheck(tempAutoOptimizeCheck); + setHighLevelGoal(tempHighLevelGoal); }; const handleDragEnd = (result: DropResult) => { @@ -804,6 +815,19 @@ const PipelineGUI: React.FC = () => { className="col-span-3" /> +
+ +