Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: read .env from the user's cwd and change tool schema so ollama llama models work better #38

Merged
merged 2 commits into from
Oct 1, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ tests:

tests-basic:
poetry run pytest tests/basic
poetry run pytest tests/test_api.py

lint:
poetry run ruff check docetl/* --fix
19 changes: 19 additions & 0 deletions docetl/cli.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from pathlib import Path
from typing import Optional

import os
import typer

from docetl.builder import Optimizer
from docetl.operations.utils import clear_cache as cc
from docetl.runner import DSLRunner

from dotenv import load_dotenv

app = typer.Typer()


@@ -36,6 +39,14 @@ def build(
resume (bool): Whether to resume optimization from a previous run. Defaults to False.
timeout (int): Timeout for optimization operations in seconds. Defaults to 60.
"""
# Get the current working directory (where the user called the command)
cwd = os.getcwd()

# Load .env file from the current working directory
env_file = os.path.join(cwd, ".env")
if os.path.exists(env_file):
load_dotenv(env_file)

optimizer = Optimizer.from_yaml(
str(yaml_file),
max_threads=max_threads,
@@ -63,6 +74,14 @@ def run(
yaml_file (Path): Path to the YAML file containing the pipeline configuration.
max_threads (Optional[int]): Maximum number of threads to use for running operations.
"""
# Get the current working directory (where the user called the command)
cwd = os.getcwd()

# Load .env file from the current working directory
env_file = os.path.join(cwd, ".env")
if os.path.exists(env_file):
load_dotenv(env_file)

runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
runner.run()

16 changes: 8 additions & 8 deletions docetl/operations/utils.py
Original file line number Diff line number Diff line change
@@ -505,15 +505,15 @@ def call_llm_with_cache(
{
"type": "function",
"function": {
"name": "write_output",
"description": "Write processing output to a database",
"name": "send_output",
"description": "Send structured output back to the user",
"strict": True,
"parameters": parameters,
"additionalProperties": False,
},
}
]
tool_choice = {"type": "function", "function": {"name": "write_output"}}
tool_choice = {"type": "function", "function": {"name": "send_output"}}
response_format = None

else:
@@ -524,7 +524,7 @@ def call_llm_with_cache(
tools = [{"type": "function", "function": tool["function"]} for tool in tools]
response_format = None

system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data."
system_prompt = f"You are a helpful assistant, intelligently processing data. This is a {op_type} operation. You will perform the specified task on the provided data. The result should be a structured output that you will send back to the user."
if scratchpad:
system_prompt += f"""

@@ -762,15 +762,15 @@ def call_llm_with_gleaning(
{
"type": "function",
"function": {
"name": "write_output",
"description": "Write processing output to a database",
"name": "send_output",
"description": "Send structured output back to the user",
"strict": True,
"parameters": parameters,
"additionalProperties": False,
},
}
],
tool_choice={"type": "function", "function": {"name": "write_output"}},
tool_choice={"type": "function", "function": {"name": "send_output"}},
)

# Update messages with the new response
@@ -820,7 +820,7 @@ def parse_llm_response_helper(
Parse the response from a language model.

This function extracts the tool calls from the LLM response and returns the arguments
of any 'write_output' function calls as a list of dictionaries.
of any 'send_output' function calls as a list of dictionaries.

Args:
response (Any): The response object from the language model.
16 changes: 16 additions & 0 deletions docetl/schemas.py
Original file line number Diff line number Diff line change
@@ -372,3 +372,19 @@ class Pipeline(BaseModel):
output: PipelineOutput
parsing_tools: List[ParsingTool] = []
default_model: Optional[str] = None

def __init__(self, **data):
super().__init__(**data)
self._load_env()

def _load_env(self):
from dotenv import load_dotenv
import os

# Get the current working directory
cwd = os.getcwd()

# Load .env file from the current working directory if it exists
env_file = os.path.join(cwd, ".env")
if os.path.exists(env_file):
load_dotenv(env_file)