From 4e380f96db3a6c2f8e1ce016c7eb94c47ca1abf4 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Sun, 24 Nov 2024 08:39:10 -0800
Subject: [PATCH 1/4] feat: have global system prompt and decription

---
 docetl/operations/utils.py                    |  31 ++++--
 docetl/runner.py                              |   2 +-
 website/src/app/api/utils.ts                  |  21 +++-
 .../src/app/api/writePipelineConfig/route.ts  |   4 +-
 website/src/app/localStorageKeys.ts           |   1 +
 website/src/components/AIChatPanel.tsx        |  73 +++++++++++-
 website/src/components/LLMContextPopover.tsx  |  48 ++++++--
 website/src/components/PipelineGui.tsx        | 104 ++++++++++++++----
 website/src/components/operations/args.tsx    | 100 +++++++++++------
 .../src/components/operations/components.tsx  |  11 +-
 website/src/contexts/PipelineContext.tsx      |  16 +++
 11 files changed, 328 insertions(+), 83 deletions(-)

diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py
index 127637a1..ee1327c5 100644
--- a/docetl/operations/utils.py
+++ b/docetl/operations/utils.py
@@ -185,6 +185,7 @@ def cache_key(
     messages: List[Dict[str, str]],
     output_schema: Dict[str, str],
     scratchpad: Optional[str] = None,
+    system_prompt: Optional[Dict[str, str]] = None,
 ) -> str:
     """
     Generate a unique cache key based on function arguments.
@@ -209,6 +210,7 @@ def cache_key(
         "messages": json.dumps(messages, sort_keys=True),
         "output_schema": json.dumps(output_schema, sort_keys=True),
         "scratchpad": scratchpad,
+        "system_prompt": json.dumps(system_prompt, sort_keys=True),
     }
     return hashlib.md5(json.dumps(key_dict, sort_keys=True).encode()).hexdigest()
 
@@ -690,7 +692,7 @@ def call_llm(
         Raises:
             TimeoutError: If the call times out after retrying.
         """
-        key = cache_key(model, op_type, messages, output_schema, scratchpad)
+        key = cache_key(model, op_type, messages, output_schema, scratchpad, self.runner.config.get("system_prompt", {}))
 
         max_retries = max_retries_per_timeout
         attempt = 0
@@ -809,21 +811,33 @@ def _call_llm_with_cache(
             tools = None
             tool_choice = None
 
-        system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data. The result should be a structured output that you will send back to the user."
+        persona = self.runner.config.get("system_prompt", {}).get("persona", "a helpful assistant")
+        dataset_description = self.runner.config.get("system_prompt", {}).get("dataset_description", "a collection of unstructured documents")
+        parethetical_op_instructions = "many inputs:one output" if op_type == "reduce" else "one input:one output"
+
+        system_prompt = f"You are a {persona}, intelligently transforming data. The dataset description is: {dataset_description}. You will be performing a {op_type} operation ({parethetical_op_instructions}). You will perform the specified task on the provided data, as accurately, precisely, and exhaustively as possible. The result should be a structured output that you will send back to the user."
         if scratchpad:
             system_prompt += f"""
 
-You are incrementally processing data across multiple batches. Maintain intermediate state between batches to accomplish this task effectively.
+You are incrementally processing data across multiple batches. You will see:
+1. The current batch of data to process
+2. The intermediate output so far (what you returned last time)
+3. A scratchpad for tracking additional state: {scratchpad}
+
+The intermediate output contains the partial result that directly answers the user's task, just on a subset of the data.
+The scratchpad contains supporting information needed to process future batches correctly, but isn't part of the answer itself.
 
-Current scratchpad: {scratchpad}
+Example for counting words that appear >2 times:
+- Intermediate output: {{"frequent_words": ["the", "and"]}} # Words seen 3+ times
+- Scratchpad: {{"pending": {{"cat": 2, "dog": 1}}}} # Track words seen 1-2 times
 
 As you process each batch:
-1. Update the scratchpad with crucial information for subsequent batches.
-2. This may include partial results, counters, or data that doesn't fit into {list(output_schema.keys())}.
-3. Example: For counting elements that appear more than twice, track all occurrences in the scratchpad until an item exceeds the threshold.
+1. Use both the intermediate output and scratchpad to inform your processing
+2. Update the scratchpad with any new information needed for future batches
+3. Return both your partial result (representing the answer on the current batch and the previous batches' intermediate output) and updated scratchpad
 
 Keep the scratchpad concise (~500 chars) and easily parsable. Use clear structures like:
-- Bullet points
+- Bullet points  
 - Key-value pairs
 - JSON-like format
 
@@ -831,6 +845,7 @@ def _call_llm_with_cache(
 
 Remember: The scratchpad should contain information necessary for processing future batches, not the final result."""
 
+
         # Truncate messages if they exceed the model's context length
         messages = truncate_messages(messages, model)
 
diff --git a/docetl/runner.py b/docetl/runner.py
index c36507ff..e02439d8 100644
--- a/docetl/runner.py
+++ b/docetl/runner.py
@@ -102,7 +102,7 @@ def __init__(self, config: Dict, max_threads: int = None, **kwargs):
 
                 all_ops_until_and_including_current = [
                     op_map[prev_op] for prev_op in step["operations"][:idx]
-                ] + [op_map[op_name]]
+                ] + [op_map[op_name]] + [self.config.get("system_prompt", {})]
                 # If there's no model in the op, add the default model
                 for op in all_ops_until_and_including_current:
                     if "model" not in op:
diff --git a/website/src/app/api/utils.ts b/website/src/app/api/utils.ts
index b3274cc0..30bca6dd 100644
--- a/website/src/app/api/utils.ts
+++ b/website/src/app/api/utils.ts
@@ -12,7 +12,11 @@ export function generatePipelineConfig(
   homeDir: string,
   sample_size: number | null,
   optimize: boolean = false,
-  clear_intermediate: boolean = false
+  clear_intermediate: boolean = false,
+  system_prompt: {
+    datasetDescription: string | null;
+    persona: string | null;
+  } | null = null
 ) {
   const datasets = {
     input: {
@@ -156,7 +160,7 @@ export function generatePipelineConfig(
         {
           name: "data_processing",
           input: Object.keys(datasets)[0], // Assuming the first dataset is the input
-          operations: operationsToRun.map((op: any) => op.name),
+          operations: operationsToRun.map((op) => op.name),
         },
       ],
       output: {
@@ -177,8 +181,21 @@ export function generatePipelineConfig(
         ),
       },
     },
+    system_prompt: {},
   };
 
+  if (system_prompt) {
+    if (system_prompt.datasetDescription) {
+      // @ts-ignore
+      pipelineConfig.system_prompt!.dataset_description =
+        system_prompt.datasetDescription;
+    }
+    if (system_prompt.persona) {
+      // @ts-ignore
+      pipelineConfig.system_prompt!.persona = system_prompt.persona;
+    }
+  }
+
   // Get the inputPath from the intermediate_dir
   let inputPath;
   let outputPath;
diff --git a/website/src/app/api/writePipelineConfig/route.ts b/website/src/app/api/writePipelineConfig/route.ts
index 21e57ddd..7f32d7c6 100644
--- a/website/src/app/api/writePipelineConfig/route.ts
+++ b/website/src/app/api/writePipelineConfig/route.ts
@@ -15,6 +15,7 @@ export async function POST(request: Request) {
       sample_size,
       optimize = false,
       clear_intermediate = false,
+      system_prompt,
     } = await request.json();
 
     if (!name) {
@@ -42,7 +43,8 @@ export async function POST(request: Request) {
       homeDir,
       sample_size,
       optimize,
-      clear_intermediate
+      clear_intermediate,
+      system_prompt
     );
 
     // Save the YAML file in the user's home directory
diff --git a/website/src/app/localStorageKeys.ts b/website/src/app/localStorageKeys.ts
index fb6748cc..aa22c9ae 100644
--- a/website/src/app/localStorageKeys.ts
+++ b/website/src/app/localStorageKeys.ts
@@ -16,3 +16,4 @@ export const DEFAULT_MODEL_KEY = "docetl_defaultModel";
 export const OPTIMIZER_MODEL_KEY = "docetl_optimizerModel";
 export const AUTO_OPTIMIZE_CHECK_KEY = "docetl_autoOptimizeCheck";
 export const HIGH_LEVEL_GOAL_KEY = "docetl_highLevelGoal";
+export const SYSTEM_PROMPT_KEY = "docetl_systemPrompt";
diff --git a/website/src/components/AIChatPanel.tsx b/website/src/components/AIChatPanel.tsx
index a1c24adf..3108395d 100644
--- a/website/src/components/AIChatPanel.tsx
+++ b/website/src/components/AIChatPanel.tsx
@@ -1,8 +1,14 @@
 "use client";
 
-import React, { useRef, useState, useEffect } from "react";
+import React, {
+  useRef,
+  useState,
+  useEffect,
+  useMemo,
+  useCallback,
+} from "react";
 import { ResizableBox } from "react-resizable";
-import { Eraser, RefreshCw, X, Copy } from "lucide-react";
+import { RefreshCw, X, Copy } from "lucide-react";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
 import { ScrollArea } from "@/components/ui/scroll-area";
@@ -13,6 +19,13 @@ import "react-resizable/css/styles.css";
 import { LLMContextPopover } from "@/components/LLMContextPopover";
 import { usePipelineContext } from "@/contexts/PipelineContext";
 import ReactMarkdown from "react-markdown";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/ui/popover";
+import { Textarea } from "@/components/ui/textarea";
+import { debounce } from "lodash";
 
 interface AIChatPanelProps {
   onClose: () => void;
@@ -45,7 +58,9 @@ const AIChatPanel: React.FC<AIChatPanelProps> = ({ onClose }) => {
     initialMessages: [],
     id: "persistent-chat",
   });
-  const { serializeState } = usePipelineContext();
+  const { serializeState, highLevelGoal, setHighLevelGoal } =
+    usePipelineContext();
+  const [localGoal, setLocalGoal] = useState(highLevelGoal);
 
   const handleMouseDown = (e: React.MouseEvent<HTMLDivElement>) => {
     if ((e.target as HTMLElement).classList.contains("drag-handle")) {
@@ -184,6 +199,25 @@ Remember, all the output fields have been converted to strings, even if they wer
     );
   };
 
+  const debouncedSetHighLevelGoal = useMemo(
+    () => debounce((value: string) => setHighLevelGoal(value), 1000),
+    [setHighLevelGoal]
+  );
+
+  useEffect(() => {
+    return () => {
+      debouncedSetHighLevelGoal.cancel();
+    };
+  }, [debouncedSetHighLevelGoal]);
+
+  const handleGoalUpdate = useCallback(
+    (newGoal: string) => {
+      setLocalGoal(newGoal);
+      debouncedSetHighLevelGoal(newGoal);
+    },
+    [debouncedSetHighLevelGoal]
+  );
+
   return (
     <div
       style={{
@@ -210,6 +244,33 @@ Remember, all the output fields have been converted to strings, even if they wer
             <LLMContextPopover />
           </span>
           <div className="flex items-center gap-1">
+            <Popover>
+              <PopoverTrigger asChild>
+                <span className="text-s text-primary font-medium flex items-center gap-2 cursor-pointer">
+                  <Button
+                    variant="ghost"
+                    size="sm"
+                    className="h-4 px-2 text-xs"
+                  >
+                    {highLevelGoal ? "Edit Analysis Goal" : "Set Analysis Goal"}
+                  </Button>
+                </span>
+              </PopoverTrigger>
+              <PopoverContent className="w-80 z-[10000]" side="top" align="end">
+                <div className="space-y-2">
+                  <h4 className="font-medium text-sm">Pipeline Goal</h4>
+                  <Textarea
+                    placeholder="Describe the high-level goal of your pipeline..."
+                    className="min-h-[100px]"
+                    value={localGoal}
+                    onChange={(e) => handleGoalUpdate(e.target.value)}
+                  />
+                  <p className="text-xs text-muted-foreground">
+                    This helps the assistant provide more relevant suggestions.
+                  </p>
+                </div>
+              </PopoverContent>
+            </Popover>
             <Button
               variant="ghost"
               size="sm"
@@ -242,8 +303,10 @@ Remember, all the output fields have been converted to strings, even if they wer
                     onClick={() => {
                       handleInputChange({
                         target: { value: suggestion },
-                      } as any);
-                      handleMessageSubmit({ preventDefault: () => {} } as any);
+                      } as React.ChangeEvent<HTMLInputElement>);
+                      handleMessageSubmit({
+                        preventDefault: () => {},
+                      } as React.FormEvent);
                     }}
                   >
                     {suggestion}
diff --git a/website/src/components/LLMContextPopover.tsx b/website/src/components/LLMContextPopover.tsx
index f9707e4b..b5117b89 100644
--- a/website/src/components/LLMContextPopover.tsx
+++ b/website/src/components/LLMContextPopover.tsx
@@ -1,6 +1,6 @@
 "use client";
 
-import React, { useState } from "react";
+import React, { useState, useEffect, useRef } from "react";
 import {
   Popover,
   PopoverContent,
@@ -11,21 +11,47 @@ import { usePipelineContext } from "@/contexts/PipelineContext";
 import { Loader2 } from "lucide-react";
 
 export const LLMContextPopover: React.FC = () => {
-  const { serializeState } = usePipelineContext();
+  const { serializeState, highLevelGoal } = usePipelineContext();
   const [contextData, setContextData] = useState<string>("");
   const [isLoading, setIsLoading] = useState(false);
+  const [isOpen, setIsOpen] = useState(false);
+  const loadTimeoutRef = useRef<NodeJS.Timeout>();
+
+  const loadContext = async () => {
+    setIsLoading(true);
+    try {
+      const data = await serializeState();
+      setContextData(data);
+    } catch (error) {
+      console.error("Failed to load context:", error);
+    } finally {
+      setIsLoading(false);
+    }
+  };
+
+  // Update context when high-level goal changes and popover is open
+  useEffect(() => {
+    if (isOpen) {
+      // Clear any pending timeout
+      if (loadTimeoutRef.current) {
+        clearTimeout(loadTimeoutRef.current);
+      }
+      // Set a new timeout to load context
+      loadTimeoutRef.current = setTimeout(() => {
+        loadContext();
+      }, 500);
+    }
+    return () => {
+      if (loadTimeoutRef.current) {
+        clearTimeout(loadTimeoutRef.current);
+      }
+    };
+  }, [highLevelGoal, isOpen]);
 
   const handlePopoverOpen = async (open: boolean) => {
+    setIsOpen(open);
     if (open && !contextData) {
-      setIsLoading(true);
-      try {
-        const data = await serializeState();
-        setContextData(data);
-      } catch (error) {
-        console.error("Failed to load context:", error);
-      } finally {
-        setIsLoading(false);
-      }
+      await loadContext();
     }
   };
 
diff --git a/website/src/components/PipelineGui.tsx b/website/src/components/PipelineGui.tsx
index b75ada57..52b70bec 100644
--- a/website/src/components/PipelineGui.tsx
+++ b/website/src/components/PipelineGui.tsx
@@ -25,6 +25,7 @@ import {
   Save,
   Loader2,
   StopCircle,
+  Brain,
 } from "lucide-react";
 import { usePipelineContext } from "@/contexts/PipelineContext";
 import {
@@ -59,6 +60,11 @@ import { canBeOptimized } from "@/lib/utils";
 import { Switch } from "./ui/switch";
 import { Textarea } from "./ui/textarea";
 import { OptimizationDialog } from "@/components/OptimizationDialog";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/ui/popover";
 
 const PipelineGUI: React.FC = () => {
   const fileInputRef = useRef<HTMLInputElement>(null);
@@ -87,15 +93,14 @@ const PipelineGUI: React.FC = () => {
     setOptimizerProgress,
     autoOptimizeCheck,
     setAutoOptimizeCheck,
-    highLevelGoal,
-    setHighLevelGoal,
+    systemPrompt,
+    setSystemPrompt,
   } = usePipelineContext();
   const [isSettingsOpen, setIsSettingsOpen] = useState(false);
   const [tempPipelineName, setTempPipelineName] = useState(pipelineName);
   const [tempAutoOptimizeCheck, setTempAutoOptimizeCheck] =
     useState(autoOptimizeCheck);
   const [tempOptimizerModel, setTempOptimizerModel] = useState(optimizerModel);
-  const [tempHighLevelGoal, setTempHighLevelGoal] = useState(highLevelGoal);
   const [tempSampleSize, setTempSampleSize] = useState(
     sampleSize?.toString() || ""
   );
@@ -119,6 +124,10 @@ const PipelineGUI: React.FC = () => {
     prompt: undefined,
     operationName: undefined,
   });
+  const [tempSystemPrompt, setTempSystemPrompt] = useState({
+    datasetDescription: systemPrompt.datasetDescription || "",
+    persona: systemPrompt.persona || "",
+  });
 
   const { submitTask } = useOptimizeCheck({
     onComplete: (result) => {
@@ -303,10 +312,10 @@ const PipelineGUI: React.FC = () => {
   }, [optimizerModel]);
 
   useEffect(() => {
-    if (highLevelGoal) {
-      setTempHighLevelGoal(highLevelGoal);
+    if (sampleSize) {
+      setTempSampleSize(sampleSize.toString());
     }
-  }, [highLevelGoal]);
+  }, [sampleSize]);
 
   const handleFileUpload = async (
     event: React.ChangeEvent<HTMLInputElement>
@@ -510,6 +519,7 @@ const PipelineGUI: React.FC = () => {
             name: pipelineName,
             sample_size: sampleSize,
             clear_intermediate: clear_intermediate,
+            system_prompt: systemPrompt,
           }),
         });
 
@@ -585,7 +595,7 @@ const PipelineGUI: React.FC = () => {
     setIsSettingsOpen(false);
     setOptimizerModel(tempOptimizerModel);
     setAutoOptimizeCheck(tempAutoOptimizeCheck);
-    setHighLevelGoal(tempHighLevelGoal);
+    setSystemPrompt(tempSystemPrompt);
   };
 
   const handleDragEnd = (result: DropResult) => {
@@ -626,7 +636,7 @@ const PipelineGUI: React.FC = () => {
               <TooltipProvider delayDuration={0}>
                 <Tooltip>
                   <TooltipTrigger>
-                    <div className="flex items-center">
+                    <div className="flex items-center cursor-help">
                       <PieChart size={16} className="text-primary mr-2" />
                       <span className="text-xs text-primary">
                         {sampleSize} samples
@@ -690,6 +700,71 @@ const PipelineGUI: React.FC = () => {
               >
                 <Settings size={16} />
               </Button>
+
+              <Popover>
+                <PopoverTrigger asChild>
+                  <Button variant="link" size="sm">
+                    Set System Prompts
+                  </Button>
+                </PopoverTrigger>
+                <PopoverContent className="w-88">
+                  <div className="grid gap-3">
+                    <div className="space-y-1">
+                      <h4 className="text-lg font-semibold">
+                        System Configuration
+                      </h4>
+                      <p className="text-sm text-muted-foreground">
+                        This will be in the system prompt for <b>every</b>{" "}
+                        operation!
+                      </p>
+                    </div>
+                    <div className="grid gap-3">
+                      <div className="space-y-1">
+                        <Label
+                          htmlFor="datasetDescription"
+                          className="text-sm font-medium"
+                        >
+                          Dataset Description
+                        </Label>
+                        <Textarea
+                          id="datasetDescription"
+                          placeholder="a collection of documents"
+                          value={tempSystemPrompt.datasetDescription}
+                          onChange={(e) =>
+                            setTempSystemPrompt((prev) => ({
+                              ...prev,
+                              datasetDescription: e.target.value,
+                            }))
+                          }
+                          onBlur={() => setSystemPrompt(tempSystemPrompt)}
+                          className="h-[3.5rem]"
+                        />
+                      </div>
+                      <div className="space-y-1">
+                        <Label
+                          htmlFor="persona"
+                          className="text-sm font-medium"
+                        >
+                          Persona
+                        </Label>
+                        <Textarea
+                          id="persona"
+                          placeholder="a helpful assistant"
+                          value={tempSystemPrompt.persona}
+                          onChange={(e) =>
+                            setTempSystemPrompt((prev) => ({
+                              ...prev,
+                              persona: e.target.value,
+                            }))
+                          }
+                          onBlur={() => setSystemPrompt(tempSystemPrompt)}
+                          className="h-[3.5rem]"
+                        />
+                      </div>
+                    </div>
+                  </div>
+                </PopoverContent>
+              </Popover>
             </div>
           </div>
           <div className="flex space-x-2">
@@ -886,19 +961,6 @@ const PipelineGUI: React.FC = () => {
                 className="col-span-3"
               />
             </div>
-            <div className="grid grid-cols-4 items-center gap-4">
-              <Label htmlFor="goal" className="text-right">
-                High-Level Goal
-              </Label>
-              <Textarea
-                id="goal"
-                value={tempHighLevelGoal}
-                onChange={(e) => setTempHighLevelGoal(e.target.value)}
-                className="col-span-3"
-                rows={4}
-                placeholder="Describe the high-level goal of your pipeline (e.g., 'I want to find common themes across all the documents' or 'I want to summarize the most important things related to X')..."
-              />
-            </div>
             <div className="grid grid-cols-4 items-center gap-4">
               <Label htmlFor="sampling" className="text-right">
                 Sample Size
diff --git a/website/src/components/operations/args.tsx b/website/src/components/operations/args.tsx
index df54b4ac..0a80740c 100644
--- a/website/src/components/operations/args.tsx
+++ b/website/src/components/operations/args.tsx
@@ -17,10 +17,14 @@ import {
   TooltipProvider,
   TooltipTrigger,
 } from "../ui/tooltip";
-import { Switch } from "../ui/switch";
 import { Label } from "../ui/label";
 import Editor from "@monaco-editor/react";
 import PropTypes from "prop-types";
+import {
+  HoverCard,
+  HoverCardContent,
+  HoverCardTrigger,
+} from "../ui/hover-card";
 
 interface PromptInputProps {
   prompt: string;
@@ -42,7 +46,7 @@ export const PromptInput: React.FC<PromptInputProps> = React.memo(
           className={`mb-1 rounded-sm text-sm font-mono ${
             !validateJinjaTemplate(prompt) ? "border-red-500" : ""
           }`}
-          rows={3}
+          rows={Math.max(3, Math.ceil(prompt.split("\n").length))}
           value={prompt}
           onChange={(e) => onChange(e.target.value)}
         />
@@ -91,20 +95,6 @@ export const SchemaForm: React.FC<SchemaFormProps> = React.memo(
       onUpdate(newSchema);
     };
 
-    const isItemValid = (item: SchemaItem): boolean => {
-      if (!isList && !item.key) return false;
-
-      if (item.type === "list" && item.subType) {
-        return isItemValid(item.subType as SchemaItem);
-      }
-
-      if (item.type === "dict" && item.subType) {
-        return (item.subType as SchemaItem[]).every(isItemValid);
-      }
-
-      return true;
-    };
-
     return (
       <div style={{ marginLeft: `${level * 20}px` }}>
         {schema.map((item, index) => (
@@ -212,7 +202,7 @@ export const SchemaForm: React.FC<SchemaFormProps> = React.memo(
 SchemaForm.displayName = "SchemaForm";
 
 SchemaForm.propTypes = {
-  // @ts-ignore
+  // @ts-expect-error - PropTypes schema doesn't match TypeScript SchemaItem[] type exactly
   schema: PropTypes.arrayOf(
     PropTypes.shape({
       key: PropTypes.string,
@@ -244,18 +234,66 @@ interface OutputSchemaProps {
 
 export const OutputSchema: React.FC<OutputSchemaProps> = React.memo(
   ({ schema, onUpdate, isExpanded, onToggle }) => {
+    const isEmpty = schema.length === 0;
+    const hasEmptyKeys = schema.some((item) => !item.key);
+    const shouldExpandByDefault = isEmpty || hasEmptyKeys;
+
+    // If there are validation issues, force expand unless user explicitly closed it
+    const shouldShow = shouldExpandByDefault ? true : isExpanded;
+
     return (
       <div>
-        <Button variant="ghost" size="sm" onClick={onToggle} className="p-0">
-          <ChevronDown
-            size={16}
-            className={`mr-1 transition-transform duration-200 ${
-              isExpanded ? "transform rotate-180" : ""
-            }`}
-          />
-          <h4 className="text-xs font-semibold">Output Schema</h4>
-        </Button>
-        {isExpanded && <SchemaForm schema={schema} onUpdate={onUpdate} />}
+        <div className="flex items-center gap-2">
+          <Button
+            variant="ghost"
+            size="sm"
+            onClick={onToggle}
+            className={`p-0 ${isEmpty || hasEmptyKeys ? "text-red-500" : ""}`}
+          >
+            <ChevronDown
+              size={16}
+              className={`mr-1 transition-transform duration-200 ${
+                shouldShow ? "transform rotate-180" : ""
+              }`}
+            />
+            <h4 className="text-xs font-semibold">
+              Output Schema {isEmpty && "(Required)"}
+            </h4>
+          </Button>
+          <HoverCard>
+            <HoverCardTrigger>
+              <Info size={16} className="text-primary cursor-help" />
+            </HoverCardTrigger>
+            <HoverCardContent className="w-80">
+              <div className="space-y-2">
+                <h4 className="font-medium">Output Column Naming</h4>
+                <p className="text-sm text-muted-foreground">
+                  Name your columns appropriately as they influence the
+                  LLM&apos;s output.
+                </p>
+                <div className="mt-2 rounded-md bg-muted p-2">
+                  <p className="text-sm font-medium">Example:</p>
+                  <p className="text-xs text-muted-foreground">
+                    If your prompt extracts names from a document, use
+                    &quot;names&quot; as your output column name instead of
+                    &quot;extracted_data&quot; or &quot;results&quot;.
+                  </p>
+                </div>
+              </div>
+            </HoverCardContent>
+          </HoverCard>
+        </div>
+        {shouldShow && <SchemaForm schema={schema} onUpdate={onUpdate} />}
+        {isEmpty && (
+          <div className="text-red-500 text-sm mt-1">
+            At least one output field is required
+          </div>
+        )}
+        {hasEmptyKeys && !isEmpty && (
+          <div className="text-red-500 text-sm mt-1">
+            All fields must have a key name
+          </div>
+        )}
       </div>
     );
   }
@@ -366,7 +404,7 @@ export const GleaningConfig: React.FC<GleaningConfigProps> = React.memo(
 GleaningConfig.displayName = "GleaningConfig";
 
 GleaningConfig.propTypes = {
-  // @ts-ignore
+  // @ts-expect-error - PropTypes null type doesn't match TypeScript optional type
   gleaning: PropTypes.shape({
     num_rounds: PropTypes.number,
     validation_prompt: PropTypes.string,
@@ -418,8 +456,8 @@ export const Guardrails: React.FC<GuardrailsProps> = React.memo(
                 <TooltipContent className="max-w-md whitespace-normal break-words text-left">
                   <p>
                     Guardrails are Python statements to validate output.
-                    Example: "len(output["summary"]) {">"} 100" ensures a
-                    summary is at least 100 characters long.
+                    Example: &quot;len(output[&quot;summary&quot;]) &gt;
+                    100&quot; ensures a summary is at least 100 characters long.
                   </p>
                 </TooltipContent>
               </Tooltip>
@@ -593,7 +631,7 @@ CodeInput.displayName = "CodeInput";
 
 CodeInput.propTypes = {
   code: PropTypes.string.isRequired,
-  // @ts-ignore
+  // @ts-expect-error - PropTypes string union doesn't match TypeScript type exactly
   operationType: PropTypes.oneOf(["code_map", "code_reduce", "code_filter"])
     .isRequired,
   onChange: PropTypes.func.isRequired,
diff --git a/website/src/components/operations/components.tsx b/website/src/components/operations/components.tsx
index 4536e9e3..e2b090d1 100644
--- a/website/src/components/operations/components.tsx
+++ b/website/src/components/operations/components.tsx
@@ -1,7 +1,6 @@
-import React from "react";
+import React, { useMemo, useEffect } from "react";
 import { Operation, SchemaItem } from "@/app/types";
 import { OutputSchema, PromptInput, CodeInput } from "./args";
-import { useMemo } from "react";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { Button } from "../ui/button";
@@ -155,6 +154,12 @@ export const ReduceOperationComponent: React.FC<OperationComponentProps> = ({
     });
   };
 
+  useEffect(() => {
+    if (!operation.otherKwargs?.reduce_key) {
+      handleReduceKeysChange(["_all"]);
+    }
+  }, []);
+
   return (
     <>
       <div className="mb-4">
@@ -194,7 +199,7 @@ export const ReduceOperationComponent: React.FC<OperationComponentProps> = ({
                       }}
                       size="sm"
                       variant="ghost"
-                      className="absolute right-0 top-0 bottom-0"
+                      className="absolute right-0 top-0 h-full"
                     >
                       <X size={12} />
                     </Button>
diff --git a/website/src/contexts/PipelineContext.tsx b/website/src/contexts/PipelineContext.tsx
index fb41223a..6970ea7f 100644
--- a/website/src/contexts/PipelineContext.tsx
+++ b/website/src/contexts/PipelineContext.tsx
@@ -38,6 +38,7 @@ interface PipelineState {
   optimizerModel: string;
   autoOptimizeCheck: boolean;
   highLevelGoal: string;
+  systemPrompt: { datasetDescription: string | null; persona: string | null };
 }
 
 interface PipelineContextType extends PipelineState {
@@ -68,6 +69,12 @@ interface PipelineContextType extends PipelineState {
   serializeState: () => Promise<string>;
   setAutoOptimizeCheck: React.Dispatch<React.SetStateAction<boolean>>;
   setHighLevelGoal: React.Dispatch<React.SetStateAction<string>>;
+  setSystemPrompt: React.Dispatch<
+    React.SetStateAction<{
+      datasetDescription: string | null;
+      persona: string | null;
+    }>
+  >;
 }
 
 const PipelineContext = createContext<PipelineContextType | undefined>(
@@ -267,6 +274,10 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       localStorageKeys.HIGH_LEVEL_GOAL_KEY,
       ""
     ),
+    systemPrompt: loadFromLocalStorage(localStorageKeys.SYSTEM_PROMPT_KEY, {
+      datasetDescription: null,
+      persona: null,
+    }),
   }));
 
   const [unsavedChanges, setUnsavedChanges] = useState(false);
@@ -380,6 +391,7 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       optimizerProgress: null,
       autoOptimizeCheck: false,
       highLevelGoal: "",
+      systemPrompt: { datasetDescription: null, persona: null },
     });
     setUnsavedChanges(false);
     console.log("Pipeline state cleared!");
@@ -466,6 +478,10 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       (value) => setStateAndUpdate("highLevelGoal", value),
       [setStateAndUpdate]
     ),
+    setSystemPrompt: useCallback(
+      (value) => setStateAndUpdate("systemPrompt", value),
+      [setStateAndUpdate]
+    ),
   };
 
   return (

From a73005a2328a68fc5d8ff27b8aea31db0dd1dad0 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Sun, 24 Nov 2024 08:47:06 -0800
Subject: [PATCH 2/4] fix: don't auto collapse output schema in UI

---
 website/src/components/OperationCard.tsx   | 2 +-
 website/src/components/operations/args.tsx | 8 ++------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/website/src/components/OperationCard.tsx b/website/src/components/OperationCard.tsx
index 4eef2e12..400827a3 100644
--- a/website/src/components/OperationCard.tsx
+++ b/website/src/components/OperationCard.tsx
@@ -475,7 +475,7 @@ function operationReducer(state: State, action: Action): State {
 const initialState: State = {
   operation: undefined,
   isEditing: false,
-  isSchemaExpanded: false,
+  isSchemaExpanded: true,
   isGuardrailsExpanded: false,
   isSettingsOpen: false,
   isExpanded: true,
diff --git a/website/src/components/operations/args.tsx b/website/src/components/operations/args.tsx
index 0a80740c..5060e8c0 100644
--- a/website/src/components/operations/args.tsx
+++ b/website/src/components/operations/args.tsx
@@ -236,10 +236,6 @@ export const OutputSchema: React.FC<OutputSchemaProps> = React.memo(
   ({ schema, onUpdate, isExpanded, onToggle }) => {
     const isEmpty = schema.length === 0;
     const hasEmptyKeys = schema.some((item) => !item.key);
-    const shouldExpandByDefault = isEmpty || hasEmptyKeys;
-
-    // If there are validation issues, force expand unless user explicitly closed it
-    const shouldShow = shouldExpandByDefault ? true : isExpanded;
 
     return (
       <div>
@@ -253,7 +249,7 @@ export const OutputSchema: React.FC<OutputSchemaProps> = React.memo(
             <ChevronDown
               size={16}
               className={`mr-1 transition-transform duration-200 ${
-                shouldShow ? "transform rotate-180" : ""
+                isExpanded ? "transform rotate-180" : ""
               }`}
             />
             <h4 className="text-xs font-semibold">
@@ -283,7 +279,7 @@ export const OutputSchema: React.FC<OutputSchemaProps> = React.memo(
             </HoverCardContent>
           </HoverCard>
         </div>
-        {shouldShow && <SchemaForm schema={schema} onUpdate={onUpdate} />}
+        {isExpanded && <SchemaForm schema={schema} onUpdate={onUpdate} />}
         {isEmpty && (
           <div className="text-red-500 text-sm mt-1">
             At least one output field is required

From 49f48a63153fc9e962c1cff40e165914fe71ed80 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Mon, 25 Nov 2024 11:01:21 -0800
Subject: [PATCH 3/4] fix: don't delete the sorting functionality, hide
 observability cols, and other nits

---
 docetl/operations/code_operations.py          |  10 +-
 docetl/optimizers/map_optimizer/evaluator.py  |  10 +-
 website/src/components/OptimizationDialog.tsx |   4 +-
 website/src/components/ResizableDataTable.tsx | 113 +++++++++++++++++-
 4 files changed, 128 insertions(+), 9 deletions(-)

diff --git a/docetl/operations/code_operations.py b/docetl/operations/code_operations.py
index 808b22fa..040275c9 100644
--- a/docetl/operations/code_operations.py
+++ b/docetl/operations/code_operations.py
@@ -107,7 +107,15 @@ def get_group_key(item):
                     for k, v in group[0].items():
                         if k not in result:
                             result[k] = v
-                            
+
+                # Also add the reduce key
+                if reduce_keys != ["_all"]:
+                    for k in reduce_keys:
+                        if k not in result:
+                            result[k] = group[0][k]
+
+                result[f"_counts_prereduce_{self.config['name']}"] = len(group)
+
                 results.append(result)
 
         return results, 0.0
diff --git a/docetl/optimizers/map_optimizer/evaluator.py b/docetl/optimizers/map_optimizer/evaluator.py
index fb22825d..952891ff 100644
--- a/docetl/optimizers/map_optimizer/evaluator.py
+++ b/docetl/optimizers/map_optimizer/evaluator.py
@@ -307,14 +307,14 @@ def _assess_operation(
             self.llm_client.model,
         )
 
-        prompt = f"""Task: Assess the performance of a data processing operation based on sample input-output pairs and a custom validator prompt.
+        prompt = f"""Task: Assess the performance of a data processing operation based on sample rows and a custom validator prompt. You will see the output of the operation for each row.
 
         Operation Name: {op_config['name']}
         Operation Type: {op_config['type']}
         Current Task Prompt: {op_config.get('prompt', 'N/A')}
 
-        Sample Input-Output Pairs:
-        ---Pair 1---
+        Sample Rows:
+        ---Row 1---
         {json.dumps({"input": input_1, "output": output_1}, indent=2)}
         """
 
@@ -332,7 +332,7 @@ def _assess_operation(
                 self.llm_client.model,
             )
             prompt += f"""
-        ---Pair 2---
+        ---Row 2---
         {json.dumps({"input": input_2, "output": output_2}, indent=2)}
         """
 
@@ -350,7 +350,7 @@ def _assess_operation(
                 self.llm_client.model,
             )
             prompt += f"""
-        ---Pair 3---
+        ---Row 3---
         {json.dumps({"input": input_3, "output": output_3}, indent=2)}
         """
 
diff --git a/website/src/components/OptimizationDialog.tsx b/website/src/components/OptimizationDialog.tsx
index a32817c3..03d76a20 100644
--- a/website/src/components/OptimizationDialog.tsx
+++ b/website/src/components/OptimizationDialog.tsx
@@ -49,7 +49,9 @@ export const OptimizationDialog: React.FC<OptimizationDialogProps> = ({
 
   const renderTable = (data: Array<Record<string, unknown>>) => {
     if (!data.length) return null;
-    const columns = Object.keys(data[0]);
+    const columns = Object.keys(data[0]).filter(
+      (column) => !column.startsWith("_observability")
+    );
 
     const totalPages = Math.ceil(data.length / rowsPerPage);
     const startIndex = (currentPage - 1) * rowsPerPage;
diff --git a/website/src/components/ResizableDataTable.tsx b/website/src/components/ResizableDataTable.tsx
index 59d263a4..93866b91 100644
--- a/website/src/components/ResizableDataTable.tsx
+++ b/website/src/components/ResizableDataTable.tsx
@@ -358,10 +358,20 @@ interface ColumnHeaderProps {
   isBold: boolean;
   onFilter: (value: string) => void;
   filterValue: string;
+  onSort: () => void;
+  sortDirection: false | "asc" | "desc";
 }
 
 const ColumnHeader = React.memo(
-  ({ header, stats, isBold, onFilter, filterValue }: ColumnHeaderProps) => {
+  ({
+    header,
+    stats,
+    isBold,
+    onFilter,
+    filterValue,
+    onSort,
+    sortDirection,
+  }: ColumnHeaderProps) => {
     const histogramData = useMemo(() => {
       if (!stats) return [];
 
@@ -422,7 +432,69 @@ const ColumnHeader = React.memo(
 
     return (
       <div className="space-y-1">
-        <div className={`${isBold ? "font-bold" : ""} text-sm px-1`}>
+        <div
+          className={`${
+            isBold ? "font-bold" : ""
+          } text-sm px-1 flex items-center gap-2`}
+        >
+          <Button
+            variant="ghost"
+            size="sm"
+            className="h-6 w-6 p-0"
+            onClick={onSort}
+          >
+            {sortDirection === false && (
+              <svg
+                xmlns="http://www.w3.org/2000/svg"
+                width="14"
+                height="14"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                strokeWidth="2"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+                className="text-muted-foreground"
+              >
+                <path d="m3 16 4 4 4-4" />
+                <path d="M7 20V4" />
+                <path d="m21 8-4-4-4 4" />
+                <path d="M17 4v16" />
+              </svg>
+            )}
+            {sortDirection === "asc" && (
+              <svg
+                xmlns="http://www.w3.org/2000/svg"
+                width="14"
+                height="14"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                strokeWidth="2"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+              >
+                <path d="m3 8 4-4 4 4" />
+                <path d="M7 4v16" />
+              </svg>
+            )}
+            {sortDirection === "desc" && (
+              <svg
+                xmlns="http://www.w3.org/2000/svg"
+                width="14"
+                height="14"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                strokeWidth="2"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+              >
+                <path d="m3 16 4 4 4-4" />
+                <path d="M7 20V4" />
+              </svg>
+            )}
+          </Button>
           {header}
         </div>
         <div
@@ -851,6 +923,20 @@ const ObservabilityIndicator = React.memo(
 );
 ObservabilityIndicator.displayName = "ObservabilityIndicator";
 
+// Move the sortingFns definition outside of the table config
+const createSortingFns = <T extends DataType>(
+  data: T[],
+  originalIndices: number[]
+) => ({
+  preserveIndex: (rowA: Row<T>, rowB: Row<T>) => {
+    const a = rowA.original;
+    const b = rowB.original;
+    const aIndex = originalIndices[data.indexOf(a)];
+    const bIndex = originalIndices[data.indexOf(b)];
+    return aIndex - bIndex;
+  },
+});
+
 function ResizableDataTable<T extends DataType>({
   data,
   columns,
@@ -956,6 +1042,16 @@ function ResizableDataTable<T extends DataType>({
     return String(cellValue).toLowerCase().includes(searchValue);
   };
 
+  // Add this state to store original row indices
+  const [originalIndices] = useState(() => data.map((_, index) => index));
+
+  // Create sorting functions
+  const sortingFns = useMemo(
+    () => createSortingFns(data, originalIndices),
+    [data, originalIndices]
+  );
+
+  // Modify the table configuration
   const table = useReactTable({
     data,
     columns: sortedColumns
@@ -1025,6 +1121,7 @@ function ResizableDataTable<T extends DataType>({
     filterFns: {
       fuzzy: fuzzyFilter,
     },
+    sortingFns,
   });
 
   const resetColumnWidths = useCallback(() => {
@@ -1176,6 +1273,18 @@ function ResizableDataTable<T extends DataType>({
                         filterValue={
                           (header.column.getFilterValue() as string) ?? ""
                         }
+                        onSort={() => {
+                          const currentSortDirection =
+                            header.column.getIsSorted();
+                          if (currentSortDirection === false) {
+                            setSorting([{ id: header.column.id, desc: false }]);
+                          } else if (currentSortDirection === "asc") {
+                            setSorting([{ id: header.column.id, desc: true }]);
+                          } else {
+                            setSorting([]);
+                          }
+                        }}
+                        sortDirection={header.column.getIsSorted()}
                       />
                     )}
                     <ColumnResizer header={header} />

From f6fbe34ac0da31816bb54fee442749737cef0827 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Mon, 25 Nov 2024 11:18:15 -0800
Subject: [PATCH 4/4] feat: add documentation

---
 website/src/app/playground/page.tsx    |  11 ++-
 website/src/components/PipelineGui.tsx | 101 +++++++++++++++++--------
 2 files changed, 78 insertions(+), 34 deletions(-)

diff --git a/website/src/app/playground/page.tsx b/website/src/app/playground/page.tsx
index 8f567079..69da2708 100644
--- a/website/src/app/playground/page.tsx
+++ b/website/src/app/playground/page.tsx
@@ -296,10 +296,17 @@ const CodeEditorPipelineApp: React.FC = () => {
                 </MenubarContent>
               </MenubarMenu>
               <MenubarMenu>
-                <MenubarTrigger>Assistant</MenubarTrigger>
+                <MenubarTrigger>Help</MenubarTrigger>
                 <MenubarContent>
+                  <MenubarItem
+                    onSelect={() =>
+                      window.open("https://ucbepic.github.io/docetl/", "_blank")
+                    }
+                  >
+                    Show Documentation
+                  </MenubarItem>
                   <MenubarItem onSelect={() => setShowChat(!showChat)}>
-                    Toggle Chat
+                    Show Chat
                   </MenubarItem>
                 </MenubarContent>
               </MenubarMenu>
diff --git a/website/src/components/PipelineGui.tsx b/website/src/components/PipelineGui.tsx
index 52b70bec..4ab8b70b 100644
--- a/website/src/components/PipelineGui.tsx
+++ b/website/src/components/PipelineGui.tsx
@@ -65,6 +65,43 @@ import {
   PopoverContent,
   PopoverTrigger,
 } from "@/components/ui/popover";
+import {
+  HoverCard,
+  HoverCardContent,
+  HoverCardTrigger,
+} from "@/components/ui/hover-card";
+
+interface OperationMenuItemProps {
+  name: string;
+  description: string;
+  onClick: () => void;
+}
+
+const OperationMenuItem: React.FC<OperationMenuItemProps> = ({
+  name,
+  description,
+  onClick,
+}) => {
+  return (
+    <HoverCard openDelay={200}>
+      <HoverCardTrigger asChild>
+        <div className="relative w-full">
+          <DropdownMenuItem onClick={onClick} className="w-full cursor-help">
+            {name}
+          </DropdownMenuItem>
+        </div>
+      </HoverCardTrigger>
+      <HoverCardContent side="right" align="start" className="w-72 p-2">
+        <div className="space-y-1">
+          <h4 className="font-medium text-sm">{name} Operation</h4>
+          <p className="text-xs text-muted-foreground leading-snug">
+            {description}
+          </p>
+        </div>
+      </HoverCardContent>
+    </HoverCard>
+  );
+};
 
 const PipelineGUI: React.FC = () => {
   const fileInputRef = useRef<HTMLInputElement>(null);
@@ -777,35 +814,37 @@ const PipelineGUI: React.FC = () => {
               </DropdownMenuTrigger>
               <DropdownMenuContent>
                 <DropdownMenuLabel>LLM Operations</DropdownMenuLabel>
-                <DropdownMenuItem
+                <OperationMenuItem
+                  name="Map"
+                  description="Transforms each input item for complex data processing and insight extraction. 1 to 1 operation (each document gets one result, but the output of the operation can be any type, like a list)."
                   onClick={() =>
                     handleAddOperation("LLM", "map", "Untitled Map")
                   }
-                >
-                  Map
-                </DropdownMenuItem>
-                <DropdownMenuItem
+                />
+                <OperationMenuItem
+                  name="Reduce"
+                  description="Aggregates data by key for summarization or folding. Many to 1 operation (many documents get combined into one result)."
                   onClick={() =>
                     handleAddOperation("LLM", "reduce", "Untitled Reduce")
                   }
-                >
-                  Reduce
-                </DropdownMenuItem>
-                <DropdownMenuItem
+                />
+                <OperationMenuItem
+                  name="Resolve"
+                  description="Identifies and merges duplicate entities for data consistency. Keeps the same number of documents; just resolves values."
                   onClick={() =>
                     handleAddOperation("LLM", "resolve", "Untitled Resolve")
                   }
-                >
-                  Resolve
-                </DropdownMenuItem>
-                <DropdownMenuItem
+                />
+                <OperationMenuItem
+                  name="Filter"
+                  description="Selectively includes or excludes data based on specific conditions. This is like a map operation, but with a boolean output schema. The size of your dataset may decrease, as documents that evaluate to false based on the prompt will be dropped from the dataset."
                   onClick={() =>
                     handleAddOperation("LLM", "filter", "Untitled Filter")
                   }
-                >
-                  Filter
-                </DropdownMenuItem>
-                <DropdownMenuItem
+                />
+                <OperationMenuItem
+                  name="Parallel Map"
+                  description="Like a Map operation, but processes multiple documents in parallel for improved performance. Best used when documents can be processed independently."
                   onClick={() =>
                     handleAddOperation(
                       "LLM",
@@ -813,9 +852,7 @@ const PipelineGUI: React.FC = () => {
                       "Untitled Parallel Map"
                     )
                   }
-                >
-                  Parallel Map
-                </DropdownMenuItem>
+                />
                 <DropdownMenuSeparator />
                 <DropdownMenuLabel>Non-LLM Operations</DropdownMenuLabel>
                 <DropdownMenuItem
@@ -848,7 +885,9 @@ const PipelineGUI: React.FC = () => {
                 </DropdownMenuItem>
                 <DropdownMenuSeparator />
                 <DropdownMenuLabel>Code Operations</DropdownMenuLabel>
-                <DropdownMenuItem
+                <OperationMenuItem
+                  name="Code Map"
+                  description="Like the LLM Map operation, but uses a Python function instead of an LLM. Write custom Python code to transform each document."
                   onClick={() =>
                     handleAddOperation(
                       "non-LLM",
@@ -856,10 +895,10 @@ const PipelineGUI: React.FC = () => {
                       "Untitled Code Map"
                     )
                   }
-                >
-                  Code Map
-                </DropdownMenuItem>
-                <DropdownMenuItem
+                />
+                <OperationMenuItem
+                  name="Code Reduce"
+                  description="Like the LLM Reduce operation, but uses a Python function instead of an LLM. Write custom Python code to aggregate multiple documents into one."
                   onClick={() =>
                     handleAddOperation(
                       "non-LLM",
@@ -867,10 +906,10 @@ const PipelineGUI: React.FC = () => {
                       "Untitled Code Reduce"
                     )
                   }
-                >
-                  Code Reduce
-                </DropdownMenuItem>
-                <DropdownMenuItem
+                />
+                <OperationMenuItem
+                  name="Code Filter"
+                  description="Like the LLM Filter operation, but uses a Python function instead of an LLM. Write custom Python code to determine which documents to keep."
                   onClick={() =>
                     handleAddOperation(
                       "non-LLM",
@@ -878,9 +917,7 @@ const PipelineGUI: React.FC = () => {
                       "Untitled Code Filter"
                     )
                   }
-                >
-                  Code Filter
-                </DropdownMenuItem>
+                />
               </DropdownMenuContent>
             </DropdownMenu>
             <div className="flex space-x-2">