ucbepic · shreyashankar · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024
diff --git a/Makefile b/Makefile
@@ -5,6 +5,7 @@ tests:
 
 tests-basic:
 	poetry run pytest tests/basic
+	poetry run pytest tests/test_api.py
 
 lint:
 	poetry run ruff check docetl/* --fix

diff --git a/docetl/cli.py b/docetl/cli.py
@@ -1,12 +1,15 @@
 from pathlib import Path
 from typing import Optional
 
+import os
 import typer
 
 from docetl.builder import Optimizer
 from docetl.operations.utils import clear_cache as cc
 from docetl.runner import DSLRunner
 
+from dotenv import load_dotenv
+
 app = typer.Typer()
 
 
@@ -36,6 +39,14 @@ def build(
         resume (bool): Whether to resume optimization from a previous run. Defaults to False.
         timeout (int): Timeout for optimization operations in seconds. Defaults to 60.
     """
+    # Get the current working directory (where the user called the command)
+    cwd = os.getcwd()
+
+    # Load .env file from the current working directory
+    env_file = os.path.join(cwd, ".env")
+    if os.path.exists(env_file):
+        load_dotenv(env_file)
+
     optimizer = Optimizer.from_yaml(
         str(yaml_file),
         max_threads=max_threads,
@@ -63,6 +74,14 @@ def run(
         yaml_file (Path): Path to the YAML file containing the pipeline configuration.
         max_threads (Optional[int]): Maximum number of threads to use for running operations.
     """
+    # Get the current working directory (where the user called the command)
+    cwd = os.getcwd()
+
+    # Load .env file from the current working directory
+    env_file = os.path.join(cwd, ".env")
+    if os.path.exists(env_file):
+        load_dotenv(env_file)
+
     runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
     runner.run()
 

diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py
@@ -505,15 +505,15 @@ def call_llm_with_cache(
             {
                 "type": "function",
                 "function": {
-                    "name": "write_output",
-                    "description": "Write processing output to a database",
+                    "name": "send_output",
+                    "description": "Send structured output back to the user",
                     "strict": True,
                     "parameters": parameters,
                     "additionalProperties": False,
                 },
             }
         ]
-        tool_choice = {"type": "function", "function": {"name": "write_output"}}
+        tool_choice = {"type": "function", "function": {"name": "send_output"}}
         response_format = None
 
     else:
@@ -524,7 +524,7 @@ def call_llm_with_cache(
         tools = [{"type": "function", "function": tool["function"]} for tool in tools]
         response_format = None
 
-    system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data."
+    system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data. The result should be a structured output that you will send back to the user."
     if scratchpad:
         system_prompt += f"""
 
@@ -762,15 +762,15 @@ def call_llm_with_gleaning(
                 {
                     "type": "function",
                     "function": {
-                        "name": "write_output",
-                        "description": "Write processing output to a database",
+                        "name": "send_output",
+                        "description": "Send structured output back to the user",
                         "strict": True,
                         "parameters": parameters,
                         "additionalProperties": False,
                     },
                 }
             ],
-            tool_choice={"type": "function", "function": {"name": "write_output"}},
+            tool_choice={"type": "function", "function": {"name": "send_output"}},
         )
 
         # Update messages with the new response
@@ -820,7 +820,7 @@ def parse_llm_response_helper(
     Parse the response from a language model.
 
     This function extracts the tool calls from the LLM response and returns the arguments
-    of any 'write_output' function calls as a list of dictionaries.
+    of any 'send_output' function calls as a list of dictionaries.
 
     Args:
         response (Any): The response object from the language model.

diff --git a/docetl/schemas.py b/docetl/schemas.py
@@ -372,3 +372,19 @@ class Pipeline(BaseModel):
     output: PipelineOutput
     parsing_tools: List[ParsingTool] = []
     default_model: Optional[str] = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._load_env()
+
+    def _load_env(self):
+        from dotenv import load_dotenv
+        import os
+
+        # Get the current working directory
+        cwd = os.getcwd()
+
+        # Load .env file from the current working directory if it exists
+        env_file = os.path.join(cwd, ".env")
+        if os.path.exists(env_file):
+            load_dotenv(env_file)