Skip to content

Commit

Permalink
gene_list endpoint that obtains genes from UBKG instead of Cells API;…
Browse files Browse the repository at this point in the history
… utility to extract index information from cells api.
  • Loading branch information
AlanSimmons committed Oct 10, 2023
1 parent 3952eed commit a3a58c9
Show file tree
Hide file tree
Showing 12 changed files with 430 additions and 94 deletions.
2 changes: 2 additions & 0 deletions src/cells_index/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.csv
*.tsv
Empty file added src/cells_index/__init__.py
Empty file.
83 changes: 83 additions & 0 deletions src/cells_index/build_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# coding: utf-8

# Prototype utility that builds a CSV file of information extracted from the Cells API.

import logging
import csv
import os

from hubmap_api_py_client import Client
from hubmap_api_py_client.errors import ClientError

logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s:%(lineno)d: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info(f'Getting information from Cells API...')

# Instantiate hubmap-api-py-client.
client_url = 'https://cells.dev.hubmapconsortium.org/api/'
client = Client(client_url)

# Open CSV and write header.
fpath = os.path.dirname(os.getcwd())
fpath = os.path.join(fpath, 'cells_index/cells.tsv')
csvfile = open(fpath, 'w', newline='')
cellwriter = csv.writer(csvfile, delimiter='\t',quotechar='|', quoting=csv.QUOTE_MINIMAL)
cellwriter.writerow(['gene_symbol', 'dataset_uuid', 'organ', 'cell_type'])

# Load dataset and cell information using Cells API.

# 1. All datasets
datasets = client.select_datasets()
logging.info(f'{len(datasets)} datasets')

# 2. All cells in datasets.
dataset_uuids = []
datasets = datasets.get_list()
for d in datasets:
dataset_uuids.append(d['uuid'])

cells_in_datasets = client.select_cells(where='dataset', has=dataset_uuids)
logging.info(f'{len(cells_in_datasets)} cells in datasets')

# 3. All genes
genes = client.select_genes().get_list()
logging.info(f'{len(genes)} genes')

# DEBUG - use set of test genes for prototype.
genes = [{'gene_symbol':'MMRN1'}]
# Check every gene for presence in cells in datasets.
gene_symbols = []

for gene in genes:
gene_symbol = gene['gene_symbol']
try:
# Find cells with the gene, and intersect with cells from datasets to find
# cells with the gene in datasets.
logging.info(f'Looking for cells with gene {gene_symbol}, rna')
cells_with_gene_rna = client.select_cells(where='gene', has=[f'{gene_symbol} > 1'], genomic_modality='rna')
logging.info(f'Looking for cells with gene {gene_symbol}, atac')
cells_with_gene_atac = client.select_cells(where='gene', has=[f'{gene_symbol} > 1'], genomic_modality='atac')

# Cells from all modalities
cells_with_gene = cells_with_gene_rna | cells_with_gene_atac

# Cells with gene in datasets
cells_with_gene_in_datasets = cells_with_gene & cells_in_datasets
cells_list = cells_with_gene_in_datasets.get_list()

# Find distinct combinations of cell type, dataset, gene.
cell_types = []
for c in cells_list:
cell_type = c['cell_type']
if not cell_type in cell_types:
cell_types.append(cell_type)
dataset_uuid = c['dataset']
organ = c['organ']
cellwriter.writerow([gene_symbol, dataset_uuid, organ, cell_type])

except ClientError:
# The genes list contains elements that are not actually genes, and that
# result in errors from the client that are meaningless in this context.
pass
3 changes: 2 additions & 1 deletion src/hs_ontology_api/cypher/genedetail.cypher
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// GENE DETAIL
// Return detailed information on a gene, based on a input list of HGNC identifiers.
// Used by the genes endpoint.
// Used by the gene_detail endpoint.

CALL

Expand Down
21 changes: 21 additions & 0 deletions src/hs_ontology_api/cypher/geneslist.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// GENES LIST
// Return information on genes in the UBKG
// Used by the genes_list endpoint.

CALL
{
OPTIONAL MATCH (tGene:Term)<-[r]-(cGene:Code)<-[:CODE]-(pGene:Concept)-[:DEF]->(d:Definition) WHERE r.CUI=pGene.CUI AND cGene.SAB='HGNC' AND d.SAB='REFSEQ' AND type(r) IN ['PT','ACR'] RETURN toInteger(cGene.CODE) AS hgnc_id, CASE type(r) WHEN 'PT' THEN 'approved_name' WHEN 'ACR' THEN 'approved_symbol' ELSE type(r) END AS ret_key, tGene.name AS ret_value, d.DEF AS description
order by hgnc_id,ret_key
}
// Pivot approved_name and approved_symbol.
WITH hgnc_id,ret_key, COLLECT(ret_value) AS values, description
WITH hgnc_id,apoc.map.fromLists(COLLECT(ret_key),COLLECT(values)) AS map, description
WHERE hgnc_id IS NOT NULL
RETURN hgnc_id,
map['approved_symbol'] AS approved_symbol,
map['approved_name'] AS approved_name,
description
ORDER BY approved_symbol
// Pagination parameters to be added by calling function.
SKIP $skiprows
LIMIT $limitrows
2 changes: 2 additions & 0 deletions src/hs_ontology_api/cypher/geneslist_count.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Returns count of HGNC genes with RefSeq definitions in UBKG.
MATCH (cGene:Code)<-[:CODE]-(pGene:Concept)-[:DEF]->(d:Definition) WHERE cGene.SAB='HGNC' RETURN COUNT(DISTINCT cGene) AS genelistcount
156 changes: 138 additions & 18 deletions src/hs_ontology_api/models/geneslist.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,72 @@
# coding: utf-8

# JAS October 2023
# GenesFromCells model class
# Used by the genesfromcells endpoint.
# Provides information on genes identified by the Cells API--i.e., that have relevance to HuBMAP/SenNet.
# GenesList model class
# Used by the geneslist endpoint.
# Provides information on genes identified by either the UBKG or the Cells API--i.e., that have relevance to HuBMAP/SenNet.

from __future__ import absolute_import
from typing import List
from ubkg_api.models.base_model_ import Model
from ubkg_api.models import util

class GenesList(Model):
def __init__(self, hgnc_ids=None):
def __init__(self, hgnc_id=None, approved_symbol=None, approved_name=None, description=None, page=None):
"""GenesList - a model defined in OpenAPI
:param hgnc_ids: list of HGNC IDs for genes identified by Cells
:type hgnc_ids: List[str]
:param hgnc_id: hgnc ID
:type hgnc_id: str
:param approved_symbol: approved symbol
:type approved_symbol: str
:param approved_name: approved name
:type approved_name: str
:param description: RefSeq description
:type description: str
:param page: page offset
:type page: str
"""
# Types for JSON objects
self.openapi_types = {
'hgnc_ids': List[str]
'hgnc_id': str,
'approved_symbol': str,
'approved_name': str,
'description': str,
'page':int
}

# Attribute mappings used by the base Model class to assert key/value pairs.
self.attribute_map = {
'hgnc_ids': 'hgnc_ids'
'hgnc_id': 'hgnc_id',
'approved_symbol': 'approved_symbol',
'approved_name': 'approved_name',
'description': 'description',
'page':'page'
}

# Property assignments
self._hgnc_ids = hgnc_ids
self._hgnc_id = hgnc_id
self._page = int(page)
if approved_symbol is None:
self._approved_symbol = ''
else:
self._approved_symbol = approved_symbol[0]

if approved_name is None:
self._approved_name = ''
else:
self._approved_name = approved_name[0]

self._description = description

def serialize(self):
# Key/value format of response.
return {
"hgnc_ids": self._hgnc_ids
"hgnc_id": self._hgnc_id,
"approved_symbol": self._approved_symbol,
"approved_name": self._approved_name,
"description": self._description,
"page": self._page
}

@classmethod
Expand All @@ -49,23 +81,111 @@ def from_dict(cls, dikt) -> 'GenesList':
return util.deserialize_model(dikt, cls)

@property
def hgnc_ids(self):
"""Gets the hgnc_id of this GenesFromCells.
def hgnc_id(self):
"""Gets the hgnc_id of this GenesList.
Current HGNC approved id for the gene.
:return: The hgnc_id of this GeneDetail.
:rtype: str
"""
return self._hgnc_ids
return self._hgnc_id

@hgnc_ids.setter
def hgnc_ids(self, hgnc_ids):
"""Sets the hgnc_id of this GenesFromCells.
@hgnc_id.setter
def hgnc_id(self, hgnc_id):
"""Sets the hgnc_id of this GenesList.
Current HGNC approved id for the gene.
:param hgnc_id: The hgnc_id of this Gene
:param hgnc_id: The hgnc_id of this gene
:type approved_id: str
"""

self._hgnc_ids = hgnc_ids
self._hgnc_id = hgnc_id

@property
def approved_symbol(self):
"""Gets the approved_symbol of this GenesList.
Current HGNC approved symbol for the gene.
:return: The approved_symbol of this GenesList.
:rtype: str
"""
return self._approved_symbol

@approved_symbol.setter
def approved_symbol(self, approved_symbol):
"""Sets the approved_symbol of this GenesList.
Current HGNC approved symbol for the gene.
:param approved_symbol: The approved symbol of this Gene
:type approved_symbol: str
"""

self._approved_symbol = approved_symbol

@property
def approved_name(self):
"""Gets the approved_name of this GenesList.
Current HGNC approved name for the gene.
:return: The approved_name of this GenesList.
:rtype: str
"""
return self._approved_name

@approved_name.setter
def approved_name(self, approved_name):
"""Sets the approved_name of this GenesList.
Current HGNC approved name for the gene.
:param approved_name: The approved_name of this Gene
:type approved_name: str
"""

self._approved_name = approved_name

@property
def description(self):
"""Gets the description of this GenesList.
RefSeq summary for the gene.
:return: The description of this GenesList.
:rtype: str
"""
return self._description

@description.setter
def description(self, description):
"""Sets the description of this GenesList.
RefSeq summary for the gene.
:param description: The description of this Gene
:type description: str
"""

self._description = description

@property
def page(self):
"""Gets the page of this GenesList.
Offset page.
:return: The page of this GenesList.
:rtype: int
"""
return self._page

@description.setter
def description(self, page):
"""Sets the page of this GenesList.
Offset page.
:param page: The description of this Gene
:type description: int
"""

self._page = page
1 change: 1 addition & 0 deletions src/hs_ontology_api/routes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading

0 comments on commit a3a58c9

Please sign in to comment.