Skip to content

Commit

Permalink
feat: enhance annotate_eml flexibility with object/path input/output
Browse files Browse the repository at this point in the history
Expand the `annotate_eml` function to accept and return either an object
or a file path as input, increasing its versatility and ease of use in various
scenarios.

Update call signatures in functions that use `annotate_eml`.
  • Loading branch information
clnsmth committed Nov 12, 2024
1 parent 7f388e3 commit bb1773e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 15 deletions.
29 changes: 18 additions & 11 deletions src/spinneret/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,21 +204,27 @@ def annotate_workbook(
return None


def annotate_eml(eml_path: str, workbook_path: str, output_path: str) -> None:
def annotate_eml(
eml: Union[str, etree._ElementTree],
workbook: Union[str, pd.core.frame.DataFrame],
output_path: str = None,
) -> etree._ElementTree:
"""Annotate an EML file with terms from the corresponding workbook
:param eml_path: The path to the EML file to be annotated.
:param workbook_path: The path to the workbook corresponding to the EML file.
:param eml: Either the path to the EML file corresponding to the
`workbook`, or the EML file itself as an lxml etree.
:param workbook: Either the path to the workbook corresponding to the
`eml`, or the workbook itself as a pandas DataFrame.
:param output_path: The path to write the annotated EML file.
:returns: None
:returns: The annotated EML file as an lxml etree.
:notes: The EML file is annotated with terms from the corresponding workbook.
Terms from the workbook are added even if they are already present in
the EML file.
:notes: The EML file is annotated with terms from the corresponding
workbook. Terms from the workbook are added even if they are already
present in the EML file.
"""
# Load the EML and workbook for processing
eml = load_eml(eml_path)
wb = load_workbook(workbook_path)
eml = load_eml(eml)
wb = load_workbook(workbook)

# Iterate over workbook rows and annotate the EML
for _, row in wb.iterrows():
Expand Down Expand Up @@ -276,8 +282,9 @@ def annotate_eml(eml_path: str, workbook_path: str, output_path: str) -> None:
attribute = root.find(attribute_xpath)
attribute.insert(len(attribute) + 1, annotation)

# Write eml to file
write_eml(eml, output_path)
if output_path:
write_eml(eml, output_path)
return eml


def create_annotation_element(predicate_label, predicate_id, object_label, object_id):
Expand Down
4 changes: 2 additions & 2 deletions src/spinneret/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ def annotate_eml_files(workbook_dir: str, eml_dir: str, output_dir: str) -> None
# Create annotated EML file
print(f"Creating annotated EML file for {eml_path}")
annotate_eml(
eml_path=eml_path,
workbook_path=workbook_dir + "/" + workbook_file,
eml=eml_path,
workbook=workbook_dir + "/" + workbook_file,
output_path=eml_path_annotated,
)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def test_annotate_eml(tmp_path):
assert eml.xpath(".//annotation") == []

# Annotate the EML file
annotate_eml(eml_path=eml_file, workbook_path=wb_file, output_path=output_file)
annotate_eml(eml=eml_file, workbook=wb_file, output_path=output_file)

# Check that the EML file was annotated
assert os.path.exists(output_file)
Expand Down Expand Up @@ -259,7 +259,7 @@ def test_annotate_eml_ignores_ungrounded_terms(tmp_path):
# No EML Annotations should exist since all the workbook annotations are
# ungrounded terms.
output_file = str(tmp_path) + "/edi.3.9_annotated.xml"
annotate_eml(eml_path=eml_file, workbook_path=wb_file, output_path=output_file)
annotate_eml(eml=eml_file, workbook=wb_file, output_path=output_file)
assert os.path.exists(output_file)
eml_annotated = load_eml(output_file)
annotations = eml_annotated.xpath(".//annotation")
Expand Down

0 comments on commit bb1773e

Please sign in to comment.