-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: split all_ontology into individual files. (#93)
## Reason for Change - #81 - #82 ## Changes - add sematic_versioning as a requirement for all_ontology build and API. It is used to find the latest version of cellxgene_schema. - modify *tools/ontology-builder/src/all_ontology_generator.py* to generate a separate file for each ontology in ontology_info.json. By default all_ontology_generator.py generates json.gz file for the latest cellxgene_schema version. - update *artifact-schemas/ontology_info_schema.json* to support split ontology.json.gz formats - move `ontologyCategory` to the top level in *artifact-schemas/all_ontology_schema.json*. This is because each ontology is save into isn't own file, so the terms do not need to be nested under their ontology name. - update *artifact-schemas/all_ontology_schema.json* to support multiple cellxgene schema versions - nest ontology_info.json properties under a Cellxgene Schema version. This allows us to support multiple schema versions from ontology_info.json file. - It is used by *tools/ontology-builder/src/all_ontology_generator.py* to generate the ontology artifacts for the current schema version. - It is used by *api/python/src/cellxgene_ontology_guide/supported_versions.py* to get the schema version and determine what ontology versions to use. - update *api/python/src/cellxgene_ontology_guide/ontology_parser.py* to use `CXGSchema` to access ontology terms. - add *api/python/src/cellxgene_ontology_guide/supported_versions.py* to the ontologies associated with a schema version. - add caching of loaded ontology files to avoid excessive unzipping of ontology files. ## Testing steps - Add unit tests for all new functionality - Updated unit tests for existing functionality. - Verified multiple ontology assets are created - Verified the API can used the multiple assets. ## Notes for Reviewer - can artifact_download.py be removed? --------- Co-authored-by: github-actions <[email protected]>
- Loading branch information
Showing
27 changed files
with
498 additions
and
357 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 0 additions & 44 deletions
44
api/python/src/cellxgene_ontology_guide/artifact_download.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
api/python/src/cellxgene_ontology_guide/supported_versions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import functools | ||
import gzip | ||
import json | ||
import os | ||
from typing import Any, Dict, List, Optional | ||
|
||
from constants import DATA_ROOT, ONTOLOGY_FILENAME_SUFFIX, ONTOLOGY_INFO_FILENAME | ||
from semantic_version import Version | ||
|
||
from cellxgene_ontology_guide.entities import Ontology | ||
|
||
|
||
@functools.cache | ||
def load_ontology_file(file_name: str) -> Any: | ||
"""Load the ontology file from the data directory and return it as a dict.""" | ||
with gzip.open(os.path.join(DATA_ROOT, file_name), "rt") as f: | ||
return json.load(f) | ||
|
||
|
||
def clear_ontology_file_cache() -> None: | ||
"""Clear the cache for the load_ontology_file function.""" | ||
load_ontology_file.cache_clear() | ||
|
||
|
||
def get_latest_schema_version(versions: List[str]) -> str: | ||
"""Given a list of schema versions, return the latest version. | ||
:param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0" | ||
:return: str latest version with a "v" prefix | ||
""" | ||
|
||
def _coerce(v: str) -> Version: | ||
return Version.coerce(v[1:]) if v[0] == "v" else Version.coerce(v) | ||
|
||
return "v" + str(sorted([_coerce(version) for version in versions])[-1]) | ||
|
||
|
||
def load_supported_versions() -> Any: | ||
"""Load the ontology_info.json file and return it as a dict.""" | ||
with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f: | ||
return json.load(f) | ||
|
||
|
||
class CXGSchema: | ||
"""A class to represent the ontology information used by a cellxgene schema version.""" | ||
|
||
def __init__(self, version: Optional[str] = None): | ||
""" | ||
:param version: The schema version to use. If not provided, the latest schema version will be used. | ||
""" | ||
ontology_info = load_supported_versions() | ||
if version is None: | ||
version = get_latest_schema_version(ontology_info.keys()) | ||
elif version not in ontology_info: | ||
raise ValueError(f"Schema version {version} is not supported in this package version.") | ||
|
||
self.version = version | ||
self.supported_ontologies = ontology_info[version] | ||
self.ontology_file_names: Dict[str, str] = {} | ||
|
||
def ontology(self, name: str) -> Any: | ||
"""Return the ontology terms for the given ontology name. Load from the file cache if available. | ||
:param name: str name of the ontology to get the terms for | ||
:return: dict representation of the ontology terms | ||
""" | ||
if name not in self.ontology_file_names: | ||
if getattr(Ontology, name, None) is None: | ||
raise ValueError(f"Ontology {name} is not supported in this package version.") | ||
|
||
try: | ||
onto_version = self.supported_ontologies[name]["version"] | ||
except KeyError as e: | ||
raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e | ||
file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}" | ||
self.ontology_file_names[name] = file_name # save to file name to access from cache | ||
return load_ontology_file(self.ontology_file_names[name]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from unittest.mock import patch | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture | ||
def mock_load_supported_versions(tmpdir): | ||
with patch("cellxgene_ontology_guide.supported_versions.load_supported_versions") as mock: | ||
yield mock | ||
|
||
|
||
@pytest.fixture | ||
def mock_load_ontology_file(): | ||
with patch("cellxgene_ontology_guide.supported_versions.load_ontology_file") as mock: | ||
yield mock |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.