diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index db9df37..35b9c73 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -4,6 +4,7 @@
from typing import Union
from requests import get, exceptions
import pandas as pd
+from lxml import etree
# pylint: disable=too-many-locals
@@ -179,3 +180,80 @@ def annotate_workbook(workbook_path: str, output_path: str) -> None:
# Write the annotated workbook back to the original path
wb.to_csv(output_path, sep="\t", index=False, encoding="utf-8")
+
+
+def annotate_eml(eml_path: str, workbook_path: str, output_path: str) -> None:
+ """Annotate an EML file with terms from the corresponding workbook
+
+ :param eml_path: The path to the EML file to be annotated.
+ :param workbook_path: The path to the workbook corresponding to the EML file.
+ :param output_path: The path to write the annotated EML file.
+ :returns: None
+
+ :notes: The EML file is annotated with terms from the corresponding workbook.
+ Terms from the workbook are added even if they are already present in
+ the EML file.
+ """
+ # Load the EML and workbook for processing
+ eml = etree.parse(eml_path, parser=etree.XMLParser(remove_blank_text=True))
+ wb = pd.read_csv(workbook_path, sep="\t", encoding="utf-8")
+
+ # Iterate over workbook rows and annotate the EML
+ for _, row in wb.iterrows():
+
+ # Only annotate if required components are present
+ if (
+ not pd.isnull(row["predicate"])
+ and not pd.isnull(row["predicate_id"])
+ and not pd.isnull(row["object"])
+ and not pd.isnull(row["object_id"])
+ ):
+ # Create the annotation element
+ annotation = create_annotation_element(
+ predicate_label=row["predicate"],
+ predicate_id=row["predicate_id"],
+ object_label=row["object"],
+ object_id=row["object_id"],
+ )
+
+ # Insert the annotation
+ if row["element"] == "dataset":
+ # Insert the annotation before the contact element to correctly
+ # locate dataset level annotations in the EML, we use a
+ # consistent reference point that is required by the EML
+ # schema.
+ root = eml.getroot()
+ dataset = root.find(".//dataset")
+ contact = dataset.find("contact")
+ dataset.insert(dataset.index(contact), annotation)
+ elif row["element"] == "attribute":
+ # Convert absolute XPath to relative path to avoid errors
+ attribute_xpath = row["element_xpath"].replace("/eml:eml", "./")
+ # Insert the annotation at the end of the attribute list.
+ root = eml.getroot()
+ attribute = root.find(attribute_xpath)
+ attribute.insert(len(attribute) + 1, annotation)
+
+ # Write eml to file
+ eml.write(output_path, pretty_print=True, encoding="utf-8", xml_declaration=True)
+
+
+def create_annotation_element(predicate_label, predicate_id, object_label, object_id):
+ """Create an EML annotation element
+
+ :param predicate_label: The predicate label of the annotation.
+ :param predicate_id: The URI of the predicate.
+ :param object_label: The object label of the annotation.
+ :param object_id: The URI of the object.
+ """
+ annotation_elem = etree.Element("annotation")
+
+ property_uri_elem = etree.SubElement(annotation_elem, "propertyURI")
+ property_uri_elem.attrib["label"] = predicate_label
+ property_uri_elem.text = predicate_id
+
+ value_uri_elem = etree.SubElement(annotation_elem, "valueURI")
+ value_uri_elem.attrib["label"] = object_label
+ value_uri_elem.text = object_id
+
+ return annotation_elem
diff --git a/tests/edi.3.9_annotated.xml b/tests/edi.3.9_annotated.xml
new file mode 100644
index 0000000..2ca7b18
--- /dev/null
+++ b/tests/edi.3.9_annotated.xml
@@ -0,0 +1,1004 @@
+
+
+
+
+ uid=EDI,o=EDI,dc=edirepository,dc=org
+ all
+
+
+ uid=SBC,o=EDI,dc=ecoinformatics,dc=org
+ all
+
+
+ uid=sbcmbon,o=EDI,dc=edirepository,dc=org
+ all
+
+
+ uid=lkui,dc=ecoinformatics,dc=org
+ all
+
+
+ public
+ read
+
+
+
+ doi:10.6073/pasta/6cf47d3a8310368f62c75e6b6e63076a
+ kelp forest integrated cover upc rpc
+ Santa Barbara Channel Marine BON: Nearshore kelp forest integrated benthic cover, 1980-ongoing
+
+ SCB Marine Biodiversity Observation Network
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ sbcbon@msi.ucsb.edu
+
+
+
+ Robert J
+ Miller
+
+ UCSB
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ 805-893-6174
+ miller@msi.ucsb.edu
+ https://orcid.org/0000-0002-8350-3759
+
+
+
+ Andrew R
+ Rassweiler
+
+ UCSB
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ 805-893-7823
+ andrew.rassweiler@lifesci.ucsb.edu
+ https://orcid.org/0000-0002-8760-3888
+
+
+
+ Jenn
+ Caselle
+
+ UCSB
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ 805-893-5144
+ caselle@ucsb.edu
+
+
+
+ David
+ Kushner
+
+ NPS
+
+ National Park Service
+ Ventura
+ CA
+ US
+
+ 805-658-5773
+ david_kushner@nps.gov
+
+
+
+ Daniel C
+ Reed
+
+ UCSB
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ 805-893-8363
+ dan.reed@lifesci.ucsb.edu
+ https://orcid.org/0000-0003-3015-8717
+
+
+
+ Kevin D
+ Lafferty
+
+ USGS
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ Klafferty@usgs.gov
+ https://orcid.org/0000-0001-7583-4593
+
+
+
+ Li
+ Kui
+
+ UCSB
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ lkui@ucsb.edu
+ https://orcid.org/0000-0002-5894-4907
+
+
+
+ Margaret
+ O'Brien
+
+ UCSB
+
+ Marine Science Institute University of California
+ Santa Barbara
+ CA
+ 93106-6150
+ US
+
+ margaret.obrien@ucsb.edu
+ https://orcid.org/0000-0002-1693-8322
+
+ 2021-03-02
+ English
+
+
+ The Santa Barbara Channel Marine Biodiversity Observation Network
+ (SBCMBON) tracks long-term patterns in species abundance and
+ diversity. This dataset contains cover of kelp forest sessile
+ invertebrates, understory macroalgae, and substrate types by
+ integrating data from four contributing projects working in the kelp
+ forests of the Santa Barbara Channel, USA. Divers collect data on
+ using either uniform point contact (UPC) or random point contact (RPC)
+ methods.
+
+
+ The four contributing projects are two research projects: The Santa
+ Barbara Coastal LTER (SBC LTER) and the Partnership for
+ Interdisciplinary Studies of Coastal Oceans (PISCO), the kelp forest
+ monitoring program of the Santa Barbara Channel National Park, and the
+ San Nicolas Island monitoring program supported by USGS. Together,
+ these projects have recorded data for more than 200 species at
+ approximately 100 sites on both the mainland coast and on the Santa
+ Barbara Channel Islands. Sampling began in 1982 and is ongoing. Data
+ were collected by human observation (divers using SCUBA) during
+ regular surveys.
+
+
+ Percent cover is recorded for taxa where individuals cannot be
+ counted. Cover can be calculated from the data here as the fraction of
+ total points at which the taxon was present x 100. With UPC and RPC
+ methods, multiple species can be recorded at any given point. The
+ total percent cover of all species combined using this method can
+ exceed 100%; however, the percent cover of any single species cannot
+ exceed 100%. See Methods for information on integration and data
+ processing.
+
+
+ MBON is funded by National Aeronautics and Space Administration
+ (NASA), Bureau of Ocean Energy Management (BOEM), and National Oceanic
+ and Atmospheric Administration (NOAA).
+
+
+ For users who are interested in using all or part of this integrated
+ datasets, please contact data owners to discuss your research
+ interests, data-related issues or any other questions. A recommended
+ citation for the data package is available from the download page. In
+ addition, any manuscript generated using this dataset is expected to
+ be sent to the data owners before publication so we can be sure the
+ data is used in the proper context and methods are reported
+ accurately:
+
+
+ Santa Barbara Coastal LTER (LTER):
+
+
+ Dan Reed dan.reed@lifesci.ucsb.edu
+
+
+ Robert Miller miller@msi.ucsb.edu
+
+
+ Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO):
+
+
+ Jenn Caselle caselle@ucsb.edu
+
+
+ Kelp forest monitoring (KFM):
+
+
+ David Kushner david_kushner@nps.gov
+
+
+ Joshua Sprague joshua_sprague@nps.gov
+
+
+ San Nicolas Island monitoring (SNI):
+
+
+ Kevin Lafferty Klafferty@usgs.gov
+
+
+ Mike Kenner mkenner@ucsc.edu
+
+
+
+ Population Abundance
+ Essential Biodiversity Variables
+
+
+ BasisofRecord: HumanObservation
+ Occurrence: OrganismQuantity
+ Taxon: ScientificName
+ Darwin Core Terms
+
+
+ algae
+ invertebrate
+ random point contact
+ Santa Barbara Channel Marine BON
+ uniform point contact
+ none
+
+
+
+ This data package is released under the Creative Commons License
+ Attribution 4.0 International (CC BY 4.0, see
+ https://creativecommons.org/licenses/by/4.0/). This license states
+ that consumers ("Data Users" herein) may distribute, adapt,
+ reuse, remix, and build upon this work, as long as they give
+ appropriate credit, provide a link to the license, and indicate if
+ changes were made. If redistributed, a Data User may not apply
+ additional restrictions or technological measures that prevent access.
+
+
+ The Data User has an ethical obligation to cite the data source
+ appropriately in any publication or product that results from its use,
+ and notify the data contact or creator. Communication, collaboration,
+ or co-authorship (as appropriate) with the creators of this data
+ package is encouraged to prevent duplicate research or publication.
+ The Data User is urged to contact the authors of these data if any
+ questions about methodology or results occur. The Data User should
+ realize that these data may be actively used by others for ongoing
+ research and that coordination may be necessary to prevent duplication
+ or inappropriate use. The Data User should realize that
+ misinterpretation may occur if data are used outside of the context of
+ the original study. The Data User should be aware that data are
+ updated periodically and it is the responsibility of the Data User to
+ check for new versions of the data.
+
+
+ While substantial efforts are made to ensure the accuracy of data and
+ associated documentation, complete accuracy of data sets cannot be
+ guaranteed. This data package (with its components) is made available
+ “as is” and with no warranty of accuracy or fitness for use. The
+ creators of this data package and the repository where these data were
+ obtained shall not be liable for any damages resulting from
+ misinterpretation, use or misuse of the data package or its
+ components.
+
+
+
+
+ http://sbc.marinebon.org/
+
+
+
+
+ California, USA: Nearshore reefs of the Santa Barbara Channel and Channel Islands, California, USA
+
+ -120.65022
+ -118.4
+ 34.87315
+ 32.8
+
+
+
+
+
+ 1980-08-25
+
+
+ 2020-07-31
+
+
+
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_03000117
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_01000058
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_01000335
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_00000562
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_00000098
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_01000687
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/PATO_0000467
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_00000015
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/ENVO_2000015
+
+
+ http://purl.obolibrary.org/obo/IAO_0000136
+ http://purl.obolibrary.org/obo/PATO_0000057
+
+
+ SCB MBON
+ Information Manager, Southern California Bight Marine Biodiversity Observation Network
+
+ Marine Science Institute
+ University of California
+ Santa Barbara
+ California
+ 93106-6150
+ United States
+
+ sbcbon@msi.ucsb.edu
+ http://sbc.marinebon.org
+
+
+ Environmental Data Initiative
+ info@environmentaldatainitiative.org
+ https://environmentaldatainitiative.org
+
+ Environmental Data Initiative
+
+
+
+
+ The integrated dataset combines four monitoring projects in the kelp
+ forests of the Santa Barbara Channel, USA. The surveying methods, raw
+ data, and meta data files for each project can be found online.
+
+
+ Santa Barbara Coastal LTER (LTER):
+ https://sbclter.msi.ucsb.edu/data/catalog/
+
+
+ Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO):
+ https://data.piscoweb.org/metacatui/view/doi:10.6085/AA/PISCO_kelpforest.1.6
+
+
+ Kelp forest monitoring (KFM):
+ http://www.esapubs.org/archive/ecol/E094/245/
+
+
+ San Nicolas Island monitoring (SNI):
+ http://www.esapubs.org/archive/ecol/E094/244/
+
+
+ The data integration process includes data cleaning, column
+ standardization, species validation, and data merging. The data
+ cleaning was processed in seven steps: 1. Select the sites located
+ within the Santa Barbara Channel; 2. Separate the biological survey
+ data into three categories: fish survey, quad and swath count, and
+ benthic cover; 3. Assign survey area to the corresponding record for
+ each survey, and calculate the total point count for the benthic cover
+ data; 4. Add life stage (adult and juvenile) information to the survey
+ record; 5. Remove redundant rows; 6. Convert date format into ISO
+ compliant format (YYYY-MM-DD).
+
+
+ After the cleaning process, the designated columns (for the integrated
+ dataset) were selected and standardized indices were generated for
+ site, subsite, transect, replicate plot, and taxon. Species’
+ scientific name were run through an online data base
+ http://www.marinespecies.org/ to obtain an authoritative name and
+ taxon information. Finally, the standardized-format table from the
+ four projects were merged into one csv file for each survey category.
+
+
+ For researchers who want to use the integrated datasets, there are
+ some potential issues and cautions due to the difference among
+ sampling methods of these four programs. Please refer to the full
+ protocol (PDF) listed below for additional details.
+
+
+
+ Integrated data post processing for data analysis
+
+
+ Kui
+
+
+
+
+ http://sbc.marinebon.org/Metadata/Protocols/MBON_post_data_processing_20181130.pdf
+
+
+
+
+
+ This method step describes provenance-based metadata.This provenance metadata does not contain entity specific information.
+
+ PISCO Kelp Forest Community Surveys
+
+
+ Mark
+ Carr
+
+ mhcarr@ucsc.edu
+ https://orcid.org/0000-0001-9644-7680
+
+
+
+ This online link references an EML document that describes data used in the creation of this derivative data package.
+ https://data.piscoweb.org/metacatui/view/doi:10.6085/AA/PISCO_kelpforest.1.6
+
+
+
+
+ Mark
+ Carr
+
+ mhcarr@ucsc.edu
+
+
+
+
+
+ Southern California Bight Marine Biodiversity Observation Network
+
+
+ Dr.
+ Robert J
+ Miller
+
+
+ Marine Science Institute
+ University of California
+ Santa Barbara
+ California
+ 93106-6150
+ United States
+
+ 805 893 6174
+ miller@msi.ucsb.edu
+ Principal Investigator
+
+
+
+ Dr.
+ Daniel
+ Reed
+
+
+ Marine Science Institute
+ University of California
+ Santa Barbara
+ California
+ 93106-6150
+ United States
+
+ 805 893 8363
+ reed@lifesci.ucsb.edu
+ Co-Principal Investigator
+
+
+
+ Dr.
+ David
+ Siegel
+
+
+ Institute for Computational Earth System Science
+ University of California
+ Santa Barbara
+ California
+ 93106-3060
+ United States
+
+ 805 893 4547
+ davey@icess.ucsb.edu
+ Co-Principal Investigator
+
+
+
+ Dr.
+ Craig
+ Carlson
+
+
+ Department of Ecology, Evolution and Marine Biology
+ University of California
+ Santa Barbara
+ California
+ 93106-9620
+ United States
+
+ 805 893 2541
+ craig.carlson@lifesci.ucsb.edu
+ Co-Principal Investigator
+
+
+
+ Dr.
+ Kevin D
+ Lafferty
+
+
+ US Geological Survey Western Ecological Research Center
+ University of California
+ Santa Barbara
+ California
+ 93106
+ United States
+
+ Klafferty@usgs.gov
+ Co-Principal Investigator
+
+
+
+ Dr.
+ B.S.
+ Manjunath
+
+
+ Department of Electrical and Computer Engineering
+ University of California
+ Santa Barbara
+ California
+ 93106-9560
+ United States
+
+ 805 893 7112
+ manj@ece.ucsb.edu
+ Co-Principal Investigator
+
+
+
+ Dr.
+ Andrew
+ Rassweiler
+
+
+ Department of Biological Science
+ Florida State University
+ Tallahassee
+ Florida
+ 32306-4295
+ United States
+
+ 850 644 1555
+ rassweiler@bio.fsu.edu
+ Co-Principal Investigator
+
+
+
+ The Southern California Bight Marine Biodiversity Observation Network (SCB MBON) is
+ designed to provide a complete picture of marine biodiversity in the region. SCB MBON is
+ developing a widely applicable research model that integrates new information with
+ existing data to improve current research and monitoring programs and provide greater
+ insight into marine biodiversity.
+
+
+
+
+ MBON is funded by National Aeronautics and Space Administration (NASA), Bureau of
+ Ocean Energy Management (BOEM), and National Oceanic and Atmospheric Administration
+ (NOAA).
+
+
+
+
+ SBCMBON kelp forest integrated benthic cover biological survey
+ Cover from four projects using UPC and RPC methods
+
+ SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv
+ 409555780
+ a1d0f0250c2570b89e0fffbbea13337e
+
+
+ 1
+ \r\n
+ column
+
+ ,
+ no
+ "
+
+
+
+
+
+ https://pasta.lternet.edu/package/data/eml/edi/3/9/07339606e346313dba6608b9300d1717
+
+
+
+
+
+ data_source
+ Data source
+ Source project for this data
+ string
+
+
+
+
+
+ kfm
+ Kelp forest monitoring program of the Santa Barbara Channel National Park
+
+
+ lter
+ Santa Barbara Coastal LTER
+
+
+ pisco
+ Partnership for Interdisciplinary Studies of Coastal Oceans
+
+
+ sni
+ San Nicolas Island monitoring program
+
+
+
+
+
+
+
+ sample_method
+ Sampling method
+ Sampling method
+ string
+
+
+
+
+ any text
+
+
+
+
+
+
+ date
+ Date
+ Date of survey
+ date
+
+
+ YYYY-MM-DD
+ 1
+
+
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00002051
+
+
+
+ site_id
+ Site ID
+ ID of a site, assigned by each project
+ string
+
+
+
+
+ any text
+
+
+
+
+
+
+ subsite_id
+ Subsite ID
+ Identifier for the subsite,one level below site
+ string
+
+
+
+
+ any text
+
+
+
+
+
+
+ transect_id
+ Transect ID
+ Identifier for the transect
+ string
+
+
+
+
+ any text
+
+
+
+
+
+
+ replicate_id
+ Replicate ID
+ Identifier for the replicate
+ string
+
+
+
+
+ any text
+
+
+
+
+
+
+ proj_taxon_id
+ Project-taxon code
+ Code assigned by SBC MBON for this taxon from this data source (project)
+ string
+
+
+
+
+ [a-z]-[a-z]-[0-9][0-9][0-9]
+
+
+
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00002744
+
+
+
+ points
+ Points
+ Number of total points counted on a UPC or RPC survey
+ float
+
+
+
+ number
+
+ 1
+
+ real
+
+
+
+
+
+ count
+ Count
+ Number of organisms counted
+ float
+
+
+
+ number
+
+ 1
+
+ integer
+
+
+
+
+ .
+ value not recorded or not available
+
+
+
+ auth_taxon_id
+ Authoritative Taxon Code
+ Taxon code assigned by an authoritative source
+ string
+
+
+
+
+ any text
+
+
+
+
+
+ .
+ value not recorded or not available
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00002608
+
+
+
+ auth_name
+ Authoritative Taxon Code Source
+ Name of the athority or registry assigning the Authoritative Taxon Code
+ string
+
+
+
+
+ any text
+
+
+
+
+
+ .
+ value not recorded or not available
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00001193
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00002608
+
+
+
+ taxon_name
+ Taxon name
+ Taxon name, usually species binomial or other taxon name
+ string
+
+
+
+
+ any text
+
+
+
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00001193
+
+
+ http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType
+ http://purl.dataone.org/odo/ECSO_00000313
+
+
+
+ site_name
+ Site name
+ The site, as named by each project
+ string
+
+
+
+
+ any text
+
+
+
+
+
+
+ subsite_name
+ Subsite name
+ Survey region within a site
+ string
+
+
+
+
+ any text
+
+
+
+
+
+ .
+ value not recorded or not available
+
+
+
+ latitude
+ Latitude
+ Site latitude
+ float
+
+
+
+ degree
+
+ 1e-04
+
+ real
+
+
+
+
+ .
+ value not recorded or not available
+
+
+
+ longitude
+ Longitude
+ Site longitude
+ float
+
+
+
+ degree
+
+ 1e-04
+
+ real
+
+
+
+
+ .
+ value not recorded or not available
+
+
+
+ 2826412
+
+
+
+
+
+
+ 360 degrees comprise a unit circle.
+
+
+ a number
+
+
+
+
+
diff --git a/tests/test_annotator.py b/tests/test_annotator.py
index 69b3a47..d527ab8 100644
--- a/tests/test_annotator.py
+++ b/tests/test_annotator.py
@@ -4,8 +4,15 @@
from shutil import copyfile
import pytest
import pandas as pd
-from spinneret.annotator import get_bioportal_annotation, annotate_workbook
+from lxml import etree
+from spinneret.annotator import (
+ get_bioportal_annotation,
+ annotate_workbook,
+ annotate_eml,
+ create_annotation_element,
+)
from spinneret.utilities import load_configuration
+from spinneret.datasets import get_example_eml_dir
def test_get_bioportal_annotation():
@@ -81,3 +88,43 @@ def test_annotate_workbook(tmp_path):
# The columns to be annotated should be full
for col in cols_to_annotate:
assert not wb[col].isnull().all()
+
+
+def test_annotate_eml(tmp_path):
+ """Test annotate_eml"""
+ eml_file = get_example_eml_dir() + "/" + "edi.3.9.xml"
+ wb_file = "tests/edi.3.9_annotation_workbook_annotated.tsv"
+ output_file = str(tmp_path) + "/edi.3.9_annotated.xml"
+
+ # Check that there are no annotations in the EML file
+ eml = etree.parse(eml_file)
+ assert eml.xpath(".//annotation") == []
+
+ # Annotate the EML file
+ annotate_eml(eml_path=eml_file, workbook_path=wb_file, output_path=output_file)
+
+ # Check that the EML file was annotated
+ assert os.path.exists(output_file)
+ eml_annotated = etree.parse(output_file)
+ annotations = eml_annotated.xpath(".//annotation")
+ assert annotations != []
+ # The number of annotation elements should be equal to the number of rows
+ # in the workbook where predicates and objects are both present.
+ wb = pd.read_csv(wb_file, sep="\t", encoding="utf-8")
+ num_rows = len(
+ wb.dropna(subset=["predicate", "predicate_id", "object", "object_id"])
+ )
+ assert len(annotations) == num_rows
+
+
+# pylint: disable=line-too-long
+def test_create_annotation_element():
+ """Test create_annotation_element"""
+ fixture = """predicate_idobject_id"""
+ annotation_element = create_annotation_element(
+ predicate_label="predicate_label",
+ predicate_id="predicate_id",
+ object_label="object_label",
+ object_id="object_id",
+ )
+ assert bytes.decode(etree.tostring(annotation_element)) == fixture