diff --git a/Makefile b/Makefile index d3522e1f..5f62404d 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ tests: tests-basic: poetry run pytest tests/basic + poetry run pytest tests/test_api.py lint: poetry run ruff check docetl/* --fix diff --git a/docetl/cli.py b/docetl/cli.py index 65cd67e8..2594c862 100644 --- a/docetl/cli.py +++ b/docetl/cli.py @@ -1,12 +1,15 @@ from pathlib import Path from typing import Optional +import os import typer from docetl.builder import Optimizer from docetl.operations.utils import clear_cache as cc from docetl.runner import DSLRunner +from dotenv import load_dotenv + app = typer.Typer() @@ -36,6 +39,14 @@ def build( resume (bool): Whether to resume optimization from a previous run. Defaults to False. timeout (int): Timeout for optimization operations in seconds. Defaults to 60. """ + # Get the current working directory (where the user called the command) + cwd = os.getcwd() + + # Load .env file from the current working directory + env_file = os.path.join(cwd, ".env") + if os.path.exists(env_file): + load_dotenv(env_file) + optimizer = Optimizer.from_yaml( str(yaml_file), max_threads=max_threads, @@ -63,6 +74,14 @@ def run( yaml_file (Path): Path to the YAML file containing the pipeline configuration. max_threads (Optional[int]): Maximum number of threads to use for running operations. """ + # Get the current working directory (where the user called the command) + cwd = os.getcwd() + + # Load .env file from the current working directory + env_file = os.path.join(cwd, ".env") + if os.path.exists(env_file): + load_dotenv(env_file) + runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads) runner.run() diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py index 9d3ae0b3..502e4303 100644 --- a/docetl/operations/utils.py +++ b/docetl/operations/utils.py @@ -505,15 +505,15 @@ def call_llm_with_cache( { "type": "function", "function": { - "name": "write_output", - "description": "Write processing output to a database", + "name": "send_output", + "description": "Send structured output back to the user", "strict": True, "parameters": parameters, "additionalProperties": False, }, } ] - tool_choice = {"type": "function", "function": {"name": "write_output"}} + tool_choice = {"type": "function", "function": {"name": "send_output"}} response_format = None else: @@ -524,7 +524,7 @@ def call_llm_with_cache( tools = [{"type": "function", "function": tool["function"]} for tool in tools] response_format = None - system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data." + system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data. The result should be a structured output that you will send back to the user." if scratchpad: system_prompt += f""" @@ -762,15 +762,15 @@ def call_llm_with_gleaning( { "type": "function", "function": { - "name": "write_output", - "description": "Write processing output to a database", + "name": "send_output", + "description": "Send structured output back to the user", "strict": True, "parameters": parameters, "additionalProperties": False, }, } ], - tool_choice={"type": "function", "function": {"name": "write_output"}}, + tool_choice={"type": "function", "function": {"name": "send_output"}}, ) # Update messages with the new response @@ -820,7 +820,7 @@ def parse_llm_response_helper( Parse the response from a language model. This function extracts the tool calls from the LLM response and returns the arguments - of any 'write_output' function calls as a list of dictionaries. + of any 'send_output' function calls as a list of dictionaries. Args: response (Any): The response object from the language model. diff --git a/docetl/schemas.py b/docetl/schemas.py index 4ca8dccd..a05c40bb 100644 --- a/docetl/schemas.py +++ b/docetl/schemas.py @@ -372,3 +372,19 @@ class Pipeline(BaseModel): output: PipelineOutput parsing_tools: List[ParsingTool] = [] default_model: Optional[str] = None + + def __init__(self, **data): + super().__init__(**data) + self._load_env() + + def _load_env(self): + from dotenv import load_dotenv + import os + + # Get the current working directory + cwd = os.getcwd() + + # Load .env file from the current working directory if it exists + env_file = os.path.join(cwd, ".env") + if os.path.exists(env_file): + load_dotenv(env_file)