Skip to content

Commit

Permalink
Add TSV writer and update other writers' docs (#107)
Browse files Browse the repository at this point in the history
Closes #106

1. Adds more docs to JSON-LD export, which supports MONDO Robot use case
2. Adds more docs to SHACL export
3. Implements simple TSV export, which supports SemanticSQL use case
4. Adds section "writing a context" into the tutorial that links to the
writer functions
  • Loading branch information
cthoyt authored Mar 13, 2024
1 parent a364fdc commit e36724d
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 14 deletions.
32 changes: 32 additions & 0 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,38 @@ project.
>>> converter = curies.get_bioregistry_converter()
>>> slim_converter = converter.get_subconverter(prefixes)

Writing a Context
-----------------
After loading and modifying a context, there are several functions for writing
a context to a file:

- :func:`curies.write_extended_prefix_map`
- :func:`curies.write_jsonld_context`
- :func:`curies.write_shacl`
- :func:`curies.write_tsv`

Here's a self-contained example on how this works:

.. code-block:: python
import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_shacl(converter, "example_shacl.ttl")
which outputs the following file:

.. code-block::
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
[
sh:declare
[ sh:prefix "CHEBI" ; sh:namespace "http://purl.obolibrary.org/obo/CHEBI_"^^xsd:anyURI ]
] .
Faultless handling of overlapping URI prefixes
----------------------------------------------
Most implementations of URI parsing iterate through the CURIE prefix/URI prefix pairs
Expand Down
2 changes: 2 additions & 0 deletions src/curies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
write_extended_prefix_map,
write_jsonld_context,
write_shacl,
write_tsv,
)
from .discovery import discover, discover_from_rdf
from .reconciliation import remap_curie_prefixes, remap_uri_prefixes, rewire
Expand Down Expand Up @@ -53,6 +54,7 @@
"write_extended_prefix_map",
"write_jsonld_context",
"write_shacl",
"write_tsv",
# sources
"get_obo_converter",
"get_prefixcommons_converter",
Expand Down
136 changes: 122 additions & 14 deletions src/curies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"write_extended_prefix_map",
"write_jsonld_context",
"write_shacl",
"write_tsv",
]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -2129,29 +2130,83 @@ def _ensure_path(path: Union[str, Path]) -> Path:
return path


def _get_jsonld_context(
converter: Converter, *, expand: bool = False, include_synonyms: bool = False
) -> Dict[str, Any]:
"""Get a JSON-LD context based on the converter."""
context = {}
for record in converter.records:
term = _get_expanded_term(record, expand=expand)
context[record.prefix] = term
if include_synonyms:
for prefix_synonym in record.prefix_synonyms:
context[prefix_synonym] = term
return {"@context": context}


def write_jsonld_context(
converter: Converter,
path: Union[str, Path],
*,
include_synonyms: bool = False,
expand: bool = False,
) -> None:
"""Write the converter's bijective map as a JSON-LD context to a file."""
"""Write the converter's bijective map as a JSON-LD context to a file.
:param converter: The converter to export
:param path: The path to a file to write to
:param include_synonyms: If true, includes CURIE prefix synonyms.
URI prefix synonyms are not output.
:param expand: If False, output a dictionary-like ``@context`` element.
If True, use ``@prefix`` and ``@id`` as keys for the CURIE prefix
and URI prefix, respectively, to maximize compatibility.
The following example shows writing a JSON-LD context:
.. code-block:: python
import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_jsonld_context(converter, "example_context.json")
.. code-block:: json
{
"@context": {
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_"
}
}
Because some implementations of JSON-LD do not like URI prefixes that end
with an underscore ``_``, we can use the ``expand`` keyword to turn on more
verbose JSON-LD context output that contains explicit ``@prefix`` and
``@id`` annotations
.. code-block:: python
import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_jsonld_context(converter, "example_context.json", expand=True)
.. code-block:: json
{
"@context": {
"CHEBI": {
"@id": "http://purl.obolibrary.org/obo/CHEBI_",
"@prefix": true
}
}
}
"""
path = _ensure_path(path)
context = {}
for record in converter.records:
term = _get_expanded_term(record, expand=expand)
context[record.prefix] = term
if include_synonyms:
for prefix_synonym in record.prefix_synonyms:
context[prefix_synonym] = term
obj = _get_jsonld_context(converter, include_synonyms=include_synonyms, expand=expand)
with path.open("w") as file:
json.dump(
fp=file,
indent=4,
sort_keys=True,
obj={"@context": context},
)
json.dump(obj, file, indent=4, sort_keys=True)


def _get_expanded_term(record: Record, *, expand: bool) -> Union[str, Dict[str, Any]]:
Expand Down Expand Up @@ -2183,6 +2238,24 @@ def write_shacl(
URI prefix synonyms are not output.
.. seealso:: https://www.w3.org/TR/shacl/#sparql-prefixes
.. code-block:: python
import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_shacl(converter, "example_shacl.ttl")
.. code-block::
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
[
sh:declare
[ sh:prefix "CHEBI" ; sh:namespace "http://purl.obolibrary.org/obo/CHEBI_"^^xsd:anyURI ]
] .
"""
text = dedent(
"""\
Expand All @@ -2207,6 +2280,41 @@ def write_shacl(
path.write_text(text.format(entries=",\n".join(lines)))


def write_tsv(
converter: Converter, path: Union[str, Path], *, header: Tuple[str, str] = ("prefix", "base")
) -> None:
"""Write a simple prefix map CSV file.
:param converter: The converter to export
:param path: The path to a file to write to
:param header: A 2-tuple of strings representing the header used in the file,
where the first element is the label for CURIE prefixes and the second
element is the label for URI prefixes
.. code-block:: python
import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_tsv(converter, "example_context.tsv")
.. code-block::
prefix base
CHEBI http://purl.obolibrary.org/obo/CHEBI_
"""
import csv

path = _ensure_path(path)

with path.open("w") as csvfile:
writer = csv.writer(csvfile, delimiter="\t")
writer.writerow(header)
for record in converter.records:
writer.writerow((record.prefix, record.uri_prefix))


def _get_shacl_line(prefix: str, uri_prefix: str, pattern: Optional[str] = None) -> str:
line = f' [ sh:prefix "{prefix}" ; sh:namespace "{uri_prefix}"^^xsd:anyURI '
if pattern:
Expand Down

0 comments on commit e36724d

Please sign in to comment.