Skip to content

Commit

Permalink
feat: introduce create_shadow_eml wrapper
Browse files Browse the repository at this point in the history
Create a `create_shadow_eml` wrapper function to streamline the process
of applying shadow metadata enrichment functions to individual EML
documents.
  • Loading branch information
clnsmth authored Sep 30, 2024
1 parent ded2748 commit 31d1d01
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/spinneret/data/eml/edi.3.9.xml
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
<country>US</country>
</address>
<electronicMailAddress>[email protected]</electronicMailAddress>
<userId directory="https://orcid.org/">https://orcid.org/0000-0002-1693-8322</userId>
<userId directory="https://orcid.org/">0000-0002-1693-8322</userId>
</creator>
<pubDate>2021-03-02</pubDate>
<language>English</language>
Expand Down
18 changes: 18 additions & 0 deletions src/spinneret/shadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,21 @@ def convert_userid_to_url(eml: etree.ElementTree) -> etree.ElementTree:
element.text = new_value

return eml


def create_shadow_eml(eml_path: str, output_path: str) -> None:
"""
:param eml_path: The path to the EML file to be annotated.
:param output_path: The path to write the annotated EML file.
:returns: None
:notes: This function wraps a set of enrichment functions to create a
shadow EML file.
"""
# Load the EML for processing
eml = etree.parse(eml_path, parser=etree.XMLParser(remove_blank_text=True))

# Call each enrichment functions, passing the result of each to the next
eml = convert_userid_to_url(eml)

# Write eml to file
eml.write(output_path, pretty_print=True, encoding="utf-8", xml_declaration=True)
31 changes: 30 additions & 1 deletion tests/test_shadow.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Test shadow code"""

from lxml import etree
from spinneret.shadow import convert_userid_to_url
from spinneret.shadow import convert_userid_to_url, create_shadow_eml
from spinneret.datasets import get_example_eml_dir
from spinneret.utilities import is_url


def test_convert_userid_to_url():
Expand Down Expand Up @@ -58,3 +60,30 @@ def test_convert_userid_to_url():
eml = etree.ElementTree(etree.fromstring(data))
res = convert_userid_to_url(eml)
assert res.xpath("//userId")[0].text == "https://example.com/user1"


def test_create_shadow_eml(tmp_path):
"""Test create_shadow_eml"""
eml_file = get_example_eml_dir() + "/" + "edi.3.9.xml"
output_file = str(tmp_path) + "/edi.3.9_shadow.xml"

create_shadow_eml(eml_path=eml_file, output_path=output_file)

eml = etree.parse(eml_file)
shadow_eml = etree.parse(output_file)

# Check that the shadow EML is different from the original EML
assert etree.tostring(eml) != etree.tostring(shadow_eml)

# Check that at least one of the userId elements in the original EML are
# not URLs
user_ids = eml.xpath("//userId")
for element in user_ids:
if not is_url(element.text):
break
else:
assert False
# Check that all userId elements have been converted to URLs
shadow_user_ids = shadow_eml.xpath("//userId")
for element in shadow_user_ids:
assert is_url(element.text)

0 comments on commit 31d1d01

Please sign in to comment.