diff --git a/src/spinneret/graph.py b/src/spinneret/graph.py
index af263f0..2fbe2d9 100644
--- a/src/spinneret/graph.py
+++ b/src/spinneret/graph.py
@@ -20,16 +20,166 @@ def create_graph(metadata_files: list = None, vocabulary_files: list = None) ->
# Load metadata
if metadata_files is not None:
for filename in metadata_files:
- g.parse(filename)
+ g.parse(filename, format="json-ld")
# Load vocabularies
if vocabulary_files is not None:
for filename in vocabulary_files:
g.parse(filename, format=guess_format(filename))
+ # Some string literals should be converted to URIRefs to create linked
+ # data with vocabularies. These are often cases where SOSO conventions
+ # recommend connecting the value to an object property (e.g. `url`) rather
+ # than the object @id. The following code makes these conversions based on
+ # where vocabulary URIs can be used in SOSO markup.
+ g = convert_keyword_url_to_uri(g)
+ g = convert_variable_property_id_to_uri(g)
+ g = convert_variable_measurement_technique_to_uri(g)
+ g = convert_variable_unit_code_to_uri(g)
+ g = convert_license_to_uri(g)
+
return g
+def convert_keyword_url_to_uri(graph: Graph) -> Graph:
+ """
+ :param graph: Graph of metadata and vocabularies
+ :returns: Graph with keyword URLs converted to URIs
+ :notes: Converts values of `schema:keyword/schema:DefinedTerm/schema:url`
+ to URI references if the value appears to be a URL.
+ """
+ update_request = """
+ PREFIX schema:
+
+ DELETE {
+ ?term schema:url ?value .
+ }
+ INSERT {
+ ?term schema:url ?newURI .
+ }
+ WHERE {
+ ?dataset schema:keywords ?term .
+ ?term a schema:DefinedTerm .
+ ?term schema:url ?value .
+ FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) .
+ BIND (URI(?value) AS ?newURI)
+ }
+ """
+ graph.update(update_request)
+ return graph
+
+
+def convert_variable_property_id_to_uri(graph: Graph) -> Graph:
+ """
+ :param graph: Graph of metadata and vocabularies
+ :returns: Graph with variable property IDs converted to URIs
+ :notes: Converts values of `schema:variableMeasured/schema:PropertyValue/
+ schema:propertyID` to URI references if the value appears to be a URL.
+ """
+ update_request = """
+ PREFIX schema:
+
+ DELETE {
+ ?term schema:propertyID ?value .
+ }
+ INSERT {
+ ?term schema:propertyID ?newURI .
+ }
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:propertyID ?value .
+ FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) .
+ BIND (URI(?value) AS ?newURI)
+ }
+ """
+ graph.update(update_request)
+ return graph
+
+
+def convert_variable_measurement_technique_to_uri(graph: Graph) -> Graph:
+ """
+ :param graph: Graph of metadata and vocabularies
+ :returns: Graph with variable measurement techniques converted to URIs
+ :notes: Converts values of `schema:variableMeasured/schema:PropertyValue/
+ schema:measurementTechnique` to URI references if the value appears to
+ be a URL.
+ """
+ update_request = """
+ PREFIX schema:
+
+ DELETE {
+ ?term schema:measurementTechnique ?value .
+ }
+ INSERT {
+ ?term schema:measurementTechnique ?newURI .
+ }
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:measurementTechnique ?value .
+ FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) .
+ BIND (URI(?value) AS ?newURI)
+ }
+ """
+ graph.update(update_request)
+ return graph
+
+
+def convert_variable_unit_code_to_uri(graph: Graph) -> Graph:
+ """
+ :param graph: Graph of metadata and vocabularies
+ :returns: Graph with variable unit codes converted to URIs
+ :notes: Converts values of `schema:variableMeasured/schema:PropertyValue/
+ schema:unitCode` to URI references if the value appears to be a URL.
+ """
+ update_request = """
+ PREFIX schema:
+
+ DELETE {
+ ?term schema:unitCode ?value .
+ }
+ INSERT {
+ ?term schema:unitCode ?newURI .
+ }
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:unitCode ?value .
+ FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) .
+ BIND (URI(?value) AS ?newURI)
+ }
+ """
+ graph.update(update_request)
+ return graph
+
+
+def convert_license_to_uri(graph: Graph) -> Graph:
+ """
+ :param graph: Graph of metadata and vocabularies
+ :returns: Graph with licenses converted to URIs
+ :notes: Converts values of `schema:license` to URI references if the value
+ appears to be a URL.
+ """
+ update_request = """
+ PREFIX schema:
+
+ DELETE {
+ ?dataset schema:license ?value .
+ }
+ INSERT {
+ ?dataset schema:license ?newURI .
+ }
+ WHERE {
+ ?dataset schema:license ?value .
+ FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) .
+ BIND (URI(?value) AS ?newURI)
+ }
+ """
+ graph.update(update_request)
+ return graph
+
+
if __name__ == "__main__":
# Example usage
WORKING_DIR = "/Users/csmith/Data/soso/all_edi_test_results"
diff --git a/tests/test_graph.py b/tests/test_graph.py
index ee1a67d..a8cf0cf 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -2,7 +2,15 @@
from os import listdir
import importlib
-from spinneret.graph import create_graph
+from rdflib import Graph, Literal, URIRef
+from spinneret.graph import (
+ create_graph,
+ convert_keyword_url_to_uri,
+ convert_variable_property_id_to_uri,
+ convert_variable_measurement_technique_to_uri,
+ convert_variable_unit_code_to_uri,
+ convert_license_to_uri,
+)
def test_create_graph():
@@ -26,3 +34,335 @@ def test_create_graph():
res = create_graph(metadata_files=metadata_files, vocabulary_files=vocabulary_files)
assert res is not None
assert len(res) == 668 # based on current metadata and vocabulary files
+
+
+def test_convert_keyword_url_to_uri_converts_if_url():
+ """Test that the convert_keyword_url_to_uri function applies the conversion
+ if the value looks like a URL"""
+
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "keywords": [
+ {
+ "@type": "DefinedTerm",
+ "url": "http://purl.obolibrary.org/obo/CHEBI_33284"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_keyword_url_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?url
+ WHERE {
+ ?dataset schema:keywords ?term .
+ ?term a schema:DefinedTerm .
+ ?term schema:url ?url .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], URIRef) # is now a URIRef
+
+
+def test_convert_keyword_url_to_uri_does_not_convert_if_text():
+ """Test that the convert_keyword_url_to_uri function does not apply the
+ conversion if the value does not look like a URL"""
+
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "keywords": [
+ {
+ "@type": "DefinedTerm",
+ "url": "not URL formatted text"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_keyword_url_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?url
+ WHERE {
+ ?dataset schema:keywords ?term .
+ ?term a schema:DefinedTerm .
+ ?term schema:url ?url .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], Literal) # is still a Literal
+
+
+def test_convert_variable_property_id_to_uri_converts_if_url():
+ """Test that the convert_variable_property_id_to_uri function applies the
+ conversion if the value looks like a URL"""
+
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "variableMeasured": [
+ {
+ "@type": "PropertyValue",
+ "propertyID": "http://purl.dataone.org/odo/ECSO_00002566"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_variable_property_id_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?propertyID
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:propertyID ?propertyID .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], URIRef) # is now a URIRef
+
+
+def test_convert_variable_property_id_to_uri_does_not_convert_if_text():
+ """Test that the convert_variable_property_id_to_uri function does not
+ apply the conversion if the value does not look like a URL"""
+
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "variableMeasured": [
+ {
+ "@type": "PropertyValue",
+ "propertyID": "not URL formatted text"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_variable_property_id_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?propertyID
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:propertyID ?propertyID .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], Literal) # is now a URIRef
+
+
+def test_convert_variable_measurement_technique_to_uri_converts_if_url():
+ """Test that the convert_variable_measurement_technique_to_uri function
+ applies the conversion if the value looks like a URL"""
+
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "variableMeasured": [
+ {
+ "@type": "PropertyValue",
+ "measurementTechnique": "http://example.org/method/8675309"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_variable_measurement_technique_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?value
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:measurementTechnique ?value .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], URIRef) # is now a URIRef
+
+
+def test_convert_variable_measurement_technique_to_uri_does_not_convert_if_text():
+ """Test that the convert_variable_measurement_technique_to_uri function
+ does not apply the conversion if the value does not look like a URL"""
+
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "variableMeasured": [
+ {
+ "@type": "PropertyValue",
+ "measurementTechnique": "not URL formatted text"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_variable_measurement_technique_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?value
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:measurementTechnique ?value .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], Literal) # is still a Literal
+
+
+def test_convert_variable_unit_code_to_uri_converts_if_url():
+ """Test that the convert_variable_unit_code_to_uri function applies the
+ conversion if the value looks like a URL"""
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "variableMeasured": [
+ {
+ "@type": "PropertyValue",
+ "unitCode": "http://example.org/unit/2112"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_variable_unit_code_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?value
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:unitCode ?value .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], URIRef) # is now a URIRef
+
+
+def test_convert_variable_unit_code_to_uri_does_not_convert_if_text():
+ """Test that the convert_variable_unit_code_to_uri function does not apply
+ the conversion if the value does not look like a URL"""
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "variableMeasured": [
+ {
+ "@type": "PropertyValue",
+ "unitCode": "not URL formatted text"
+ }
+ ]
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_variable_unit_code_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?value
+ WHERE {
+ ?dataset schema:variableMeasured ?term .
+ ?term a schema:PropertyValue .
+ ?term schema:unitCode ?value .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], Literal) # is still a Literal
+
+
+def test_convert_license_to_uri_converts_if_url():
+ """Test that the convert_license_to_uri function applies the conversion if
+ the value looks like a URL"""
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "license": "http://spdx.org/licenses/CC0-1.0"
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_license_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?value
+ WHERE {
+ ?dataset schema:license ?value .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], URIRef) # is now a URIRef
+
+
+def test_convert_license_to_uri_does_not_convert_if_text():
+ """Test that the convert_license_to_uri function does not apply the
+ conversion if the value does not look like a URL"""
+ test_data = """
+ {
+ "@context": {"@vocab": "https://schema.org/"},
+ "@type": "Dataset",
+ "license": "not URL formatted text"
+ }
+ """
+ g = Graph()
+ g.parse(data=test_data, format="json-ld")
+
+ g = convert_license_to_uri(g)
+ query = """
+ PREFIX schema:
+
+ SELECT ?value
+ WHERE {
+ ?dataset schema:license ?value .
+ }
+ """
+ results = g.query(query)
+ for result in results:
+ assert isinstance(result[0], Literal) # is still a Literal