diff --git a/src/spinneret/graph.py b/src/spinneret/graph.py index af263f0..2fbe2d9 100644 --- a/src/spinneret/graph.py +++ b/src/spinneret/graph.py @@ -20,16 +20,166 @@ def create_graph(metadata_files: list = None, vocabulary_files: list = None) -> # Load metadata if metadata_files is not None: for filename in metadata_files: - g.parse(filename) + g.parse(filename, format="json-ld") # Load vocabularies if vocabulary_files is not None: for filename in vocabulary_files: g.parse(filename, format=guess_format(filename)) + # Some string literals should be converted to URIRefs to create linked + # data with vocabularies. These are often cases where SOSO conventions + # recommend connecting the value to an object property (e.g. `url`) rather + # than the object @id. The following code makes these conversions based on + # where vocabulary URIs can be used in SOSO markup. + g = convert_keyword_url_to_uri(g) + g = convert_variable_property_id_to_uri(g) + g = convert_variable_measurement_technique_to_uri(g) + g = convert_variable_unit_code_to_uri(g) + g = convert_license_to_uri(g) + return g +def convert_keyword_url_to_uri(graph: Graph) -> Graph: + """ + :param graph: Graph of metadata and vocabularies + :returns: Graph with keyword URLs converted to URIs + :notes: Converts values of `schema:keyword/schema:DefinedTerm/schema:url` + to URI references if the value appears to be a URL. + """ + update_request = """ + PREFIX schema: + + DELETE { + ?term schema:url ?value . + } + INSERT { + ?term schema:url ?newURI . + } + WHERE { + ?dataset schema:keywords ?term . + ?term a schema:DefinedTerm . + ?term schema:url ?value . + FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) . + BIND (URI(?value) AS ?newURI) + } + """ + graph.update(update_request) + return graph + + +def convert_variable_property_id_to_uri(graph: Graph) -> Graph: + """ + :param graph: Graph of metadata and vocabularies + :returns: Graph with variable property IDs converted to URIs + :notes: Converts values of `schema:variableMeasured/schema:PropertyValue/ + schema:propertyID` to URI references if the value appears to be a URL. + """ + update_request = """ + PREFIX schema: + + DELETE { + ?term schema:propertyID ?value . + } + INSERT { + ?term schema:propertyID ?newURI . + } + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:propertyID ?value . + FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) . + BIND (URI(?value) AS ?newURI) + } + """ + graph.update(update_request) + return graph + + +def convert_variable_measurement_technique_to_uri(graph: Graph) -> Graph: + """ + :param graph: Graph of metadata and vocabularies + :returns: Graph with variable measurement techniques converted to URIs + :notes: Converts values of `schema:variableMeasured/schema:PropertyValue/ + schema:measurementTechnique` to URI references if the value appears to + be a URL. + """ + update_request = """ + PREFIX schema: + + DELETE { + ?term schema:measurementTechnique ?value . + } + INSERT { + ?term schema:measurementTechnique ?newURI . + } + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:measurementTechnique ?value . + FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) . + BIND (URI(?value) AS ?newURI) + } + """ + graph.update(update_request) + return graph + + +def convert_variable_unit_code_to_uri(graph: Graph) -> Graph: + """ + :param graph: Graph of metadata and vocabularies + :returns: Graph with variable unit codes converted to URIs + :notes: Converts values of `schema:variableMeasured/schema:PropertyValue/ + schema:unitCode` to URI references if the value appears to be a URL. + """ + update_request = """ + PREFIX schema: + + DELETE { + ?term schema:unitCode ?value . + } + INSERT { + ?term schema:unitCode ?newURI . + } + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:unitCode ?value . + FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) . + BIND (URI(?value) AS ?newURI) + } + """ + graph.update(update_request) + return graph + + +def convert_license_to_uri(graph: Graph) -> Graph: + """ + :param graph: Graph of metadata and vocabularies + :returns: Graph with licenses converted to URIs + :notes: Converts values of `schema:license` to URI references if the value + appears to be a URL. + """ + update_request = """ + PREFIX schema: + + DELETE { + ?dataset schema:license ?value . + } + INSERT { + ?dataset schema:license ?newURI . + } + WHERE { + ?dataset schema:license ?value . + FILTER (isLiteral(?value) && REGEX(?value, "^https?://", "i")) . + BIND (URI(?value) AS ?newURI) + } + """ + graph.update(update_request) + return graph + + if __name__ == "__main__": # Example usage WORKING_DIR = "/Users/csmith/Data/soso/all_edi_test_results" diff --git a/tests/test_graph.py b/tests/test_graph.py index ee1a67d..a8cf0cf 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -2,7 +2,15 @@ from os import listdir import importlib -from spinneret.graph import create_graph +from rdflib import Graph, Literal, URIRef +from spinneret.graph import ( + create_graph, + convert_keyword_url_to_uri, + convert_variable_property_id_to_uri, + convert_variable_measurement_technique_to_uri, + convert_variable_unit_code_to_uri, + convert_license_to_uri, +) def test_create_graph(): @@ -26,3 +34,335 @@ def test_create_graph(): res = create_graph(metadata_files=metadata_files, vocabulary_files=vocabulary_files) assert res is not None assert len(res) == 668 # based on current metadata and vocabulary files + + +def test_convert_keyword_url_to_uri_converts_if_url(): + """Test that the convert_keyword_url_to_uri function applies the conversion + if the value looks like a URL""" + + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "keywords": [ + { + "@type": "DefinedTerm", + "url": "http://purl.obolibrary.org/obo/CHEBI_33284" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_keyword_url_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?url + WHERE { + ?dataset schema:keywords ?term . + ?term a schema:DefinedTerm . + ?term schema:url ?url . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], URIRef) # is now a URIRef + + +def test_convert_keyword_url_to_uri_does_not_convert_if_text(): + """Test that the convert_keyword_url_to_uri function does not apply the + conversion if the value does not look like a URL""" + + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "keywords": [ + { + "@type": "DefinedTerm", + "url": "not URL formatted text" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_keyword_url_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?url + WHERE { + ?dataset schema:keywords ?term . + ?term a schema:DefinedTerm . + ?term schema:url ?url . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], Literal) # is still a Literal + + +def test_convert_variable_property_id_to_uri_converts_if_url(): + """Test that the convert_variable_property_id_to_uri function applies the + conversion if the value looks like a URL""" + + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "variableMeasured": [ + { + "@type": "PropertyValue", + "propertyID": "http://purl.dataone.org/odo/ECSO_00002566" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_variable_property_id_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?propertyID + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:propertyID ?propertyID . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], URIRef) # is now a URIRef + + +def test_convert_variable_property_id_to_uri_does_not_convert_if_text(): + """Test that the convert_variable_property_id_to_uri function does not + apply the conversion if the value does not look like a URL""" + + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "variableMeasured": [ + { + "@type": "PropertyValue", + "propertyID": "not URL formatted text" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_variable_property_id_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?propertyID + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:propertyID ?propertyID . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], Literal) # is now a URIRef + + +def test_convert_variable_measurement_technique_to_uri_converts_if_url(): + """Test that the convert_variable_measurement_technique_to_uri function + applies the conversion if the value looks like a URL""" + + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "variableMeasured": [ + { + "@type": "PropertyValue", + "measurementTechnique": "http://example.org/method/8675309" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_variable_measurement_technique_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?value + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:measurementTechnique ?value . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], URIRef) # is now a URIRef + + +def test_convert_variable_measurement_technique_to_uri_does_not_convert_if_text(): + """Test that the convert_variable_measurement_technique_to_uri function + does not apply the conversion if the value does not look like a URL""" + + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "variableMeasured": [ + { + "@type": "PropertyValue", + "measurementTechnique": "not URL formatted text" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_variable_measurement_technique_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?value + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:measurementTechnique ?value . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], Literal) # is still a Literal + + +def test_convert_variable_unit_code_to_uri_converts_if_url(): + """Test that the convert_variable_unit_code_to_uri function applies the + conversion if the value looks like a URL""" + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "variableMeasured": [ + { + "@type": "PropertyValue", + "unitCode": "http://example.org/unit/2112" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_variable_unit_code_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?value + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:unitCode ?value . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], URIRef) # is now a URIRef + + +def test_convert_variable_unit_code_to_uri_does_not_convert_if_text(): + """Test that the convert_variable_unit_code_to_uri function does not apply + the conversion if the value does not look like a URL""" + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "variableMeasured": [ + { + "@type": "PropertyValue", + "unitCode": "not URL formatted text" + } + ] + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_variable_unit_code_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?value + WHERE { + ?dataset schema:variableMeasured ?term . + ?term a schema:PropertyValue . + ?term schema:unitCode ?value . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], Literal) # is still a Literal + + +def test_convert_license_to_uri_converts_if_url(): + """Test that the convert_license_to_uri function applies the conversion if + the value looks like a URL""" + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "license": "http://spdx.org/licenses/CC0-1.0" + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_license_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?value + WHERE { + ?dataset schema:license ?value . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], URIRef) # is now a URIRef + + +def test_convert_license_to_uri_does_not_convert_if_text(): + """Test that the convert_license_to_uri function does not apply the + conversion if the value does not look like a URL""" + test_data = """ + { + "@context": {"@vocab": "https://schema.org/"}, + "@type": "Dataset", + "license": "not URL formatted text" + } + """ + g = Graph() + g.parse(data=test_data, format="json-ld") + + g = convert_license_to_uri(g) + query = """ + PREFIX schema: + + SELECT ?value + WHERE { + ?dataset schema:license ?value . + } + """ + results = g.query(query) + for result in results: + assert isinstance(result[0], Literal) # is still a Literal