Skip to content

Commit

Permalink
Merge pull request #89 from ucbepic/staging
Browse files Browse the repository at this point in the history
feat: output to csv if user specifies a csv file
  • Loading branch information
shreyashankar authored Oct 10, 2024
2 parents 70604cb + c6f9491 commit 393285b
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 15 deletions.
6 changes: 3 additions & 3 deletions docetl/operations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def call_llm_with_cache(
len(props) == 1
and list(props.values())[0].get("type") == "string"
and scratchpad is None
and "ollama" in model
and ("ollama" in model or "azure/gpt-4o-mini" in model)
):
use_tools = False

Expand All @@ -635,7 +635,7 @@ def call_llm_with_cache(
"type": "function",
"function": {
"name": "send_output",
"description": "Send structured output back to the user",
"description": "Send output back to the user",
"strict": True,
"parameters": parameters,
"additionalProperties": False,
Expand Down Expand Up @@ -858,7 +858,7 @@ def call_llm_with_gleaning(
"type": "function",
"function": {
"name": "send_output",
"description": "Send structured output back to the user",
"description": "Send output back to the user",
"strict": True,
"parameters": parameters,
"additionalProperties": False,
Expand Down
26 changes: 21 additions & 5 deletions docetl/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,16 @@ def __init__(self, config: Dict, max_threads: int = None):
# Check if output path is correctly formatted as JSON
output_path = self.config.get("pipeline", {}).get("output", {}).get("path")
if output_path:
if not output_path.lower().endswith(".json"):
if not (
output_path.lower().endswith(".json")
or output_path.lower().endswith(".csv")
):
raise ValueError(
f"Output path '{output_path}' is not a JSON file. Please provide a path ending with '.json'."
f"Output path '{output_path}' is not a JSON or CSV file. Please provide a path ending with '.json' or '.csv'."
)
else:
raise ValueError(
"No output path specified in the configuration. Please provide an output path ending with '.json' in the configuration."
"No output path specified in the configuration. Please provide an output path ending with '.json' or '.csv' in the configuration."
)

self.syntax_check()
Expand All @@ -77,6 +80,11 @@ def __init__(self, config: Dict, max_threads: int = None):
all_ops_until_and_including_current = [
op_map[prev_op] for prev_op in step["operations"][:idx]
] + [op_map[op_name]]
# If there's no model in the op, add the default model
for op in all_ops_until_and_including_current:
if "model" not in op:
op["model"] = self.default_model

all_ops_str = json.dumps(all_ops_until_and_including_current)
self.step_op_hashes[step["name"]][op_name] = hashlib.sha256(
all_ops_str.encode()
Expand Down Expand Up @@ -207,8 +215,16 @@ def save_output(self, data: List[Dict]):
self.console.rule("[cyan]Saving Output[/cyan]")
output_config = self.config["pipeline"]["output"]
if output_config["type"] == "file":
with open(output_config["path"], "w") as file:
json.dump(data, file, indent=2)
if output_config["path"].lower().endswith(".json"):
with open(output_config["path"], "w") as file:
json.dump(data, file, indent=2)
else: # CSV
import csv

with open(output_config["path"], "w", newline="") as file:
writer = csv.DictWriter(file, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
self.console.print(
f"[green italic]💾 Output saved to {output_config['path']}[/green italic]"
)
Expand Down
14 changes: 7 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 393285b

Please sign in to comment.