Skip to content

Commit

Permalink
fix: change gleaning prompt to validation_prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyashankar committed Oct 13, 2024
1 parent 7635afa commit 7b1e04d
Showing 1 changed file with 6 additions and 10 deletions.
16 changes: 6 additions & 10 deletions docetl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,12 @@ def _validate_parsing(
return []

for tool in parsing_tools:
if (
not isinstance(tool, dict)
or "function" not in tool
):
if not isinstance(tool, dict) or "function" not in tool:
raise ValueError(
"Each parsing tool must be a dictionary with a 'function' key and any arguments required by that function"
)
if not isinstance(tool["function"], str):
raise ValueError(
"'function' in parsing tools must be a string"
)
raise ValueError("'function' in parsing tools must be a string")
if "function_kwargs" in tool and not isinstance(
tool["function_kwargs"], dict
):
Expand Down Expand Up @@ -212,7 +207,7 @@ def _process_item(
):
result = func(item, **function_kwargs)
return [item.copy() | res for res in result]

def _apply_parsing_tools(self, data: List[Dict]) -> List[Dict]:
"""
Apply parsing tools to the data.
Expand All @@ -233,7 +228,7 @@ def _apply_parsing_tools(self, data: List[Dict]) -> List[Dict]:
# with the existing yaml format...
if "function_kwargs" in function_kwargs:
function_kwargs.update(function_kwargs.pop("function_kwargs"))

try:
func = get_parser(tool["function"])
except KeyError:
Expand All @@ -243,7 +238,8 @@ def _apply_parsing_tools(self, data: List[Dict]) -> List[Dict]:
):
# Define the custom function in the current scope
exec(
self.user_defined_parsing_tool_map[
"from typing import List, Dict\n"
+ self.user_defined_parsing_tool_map[
tool["function"]
].function_code
)
Expand Down

0 comments on commit 7b1e04d

Please sign in to comment.