Skip to content

Commit

Permalink
More type fixes for data providers...
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton authored and nsoranzo committed Jul 31, 2024
1 parent 3d61491 commit 73711a4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 40 deletions.
25 changes: 0 additions & 25 deletions lib/galaxy/visualization/data_providers/basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import sys
from json import loads
from typing import Iterator

from galaxy.datatypes.tabular import Tabular
from galaxy.model import DatasetInstance
Expand Down Expand Up @@ -30,29 +28,6 @@ def __init__(
self.dependencies = dependencies
self.error_max_vals = error_max_vals

def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]:
"""
Returns an iterator that provides data in the region chrom:start-end
"""
raise Exception("Unimplemented Function")

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
"""
Process data from an iterator to a format that can be provided to client.
"""
raise Exception("Unimplemented Function")

def get_data(self, chrom, start, end, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Returns data as specified by kwargs. start_val is the first element to
return and max_vals indicates the number of values to return.
Return value must be a dictionary with the following attributes:
dataset_type, data
"""
iterator = self.get_iterator(chrom, start, end)
return self.process_data(iterator, start_val, max_vals, **kwargs)


class ColumnDataProvider(BaseDataProvider):
"""Data provider for columnar data"""
Expand Down
33 changes: 18 additions & 15 deletions lib/galaxy/visualization/data_providers/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
from galaxy.visualization.data_providers.basic import BaseDataProvider
from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar


IntWebParam = Union[str, int]

#
# Utility functions.
#
Expand Down Expand Up @@ -194,7 +197,7 @@ def valid_chroms(self):
"""
return None # by default

def has_data(self, chrom, start, end, **kwargs):
def has_data(self, chrom):
"""
Returns true if dataset has data in the specified genome window, false
otherwise.
Expand All @@ -214,13 +217,13 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]:
"""
raise Exception("Unimplemented Function")

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Process data from an iterator to a format that can be provided to client.
"""
raise Exception("Unimplemented Function")

def get_data(self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.maxsize, **kwargs):
def get_data(self, chrom: str, start: IntWebParam, end: IntWebParam, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Returns data in region defined by chrom, start, and end. start_val and
max_vals are used to denote the data to return: start_val is the first element to
Expand All @@ -229,7 +232,7 @@ def get_data(self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.ma
Return value must be a dictionary with the following attributes:
dataset_type, data
"""
start, end = int(low), int(high)
start, end = int(start), int(end)
with self.open_data_file() as data_file:
iterator = self.get_iterator(data_file, chrom, start, end, **kwargs)
data = self.process_data(iterator, start_val, max_vals, start=start, end=end, **kwargs)
Expand Down Expand Up @@ -399,7 +402,7 @@ class IntervalDataProvider(GenomeDataProvider):
def get_iterator(self, data_file, chrom, start, end, **kwargs):
raise Exception("Unimplemented Function")

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Provides
"""
Expand Down Expand Up @@ -481,7 +484,7 @@ class BedDataProvider(GenomeDataProvider):
def get_iterator(self, data_file, chrom, start, end, **kwargs):
raise Exception("Unimplemented Method")

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Provides
"""
Expand Down Expand Up @@ -619,7 +622,7 @@ class VcfDataProvider(GenomeDataProvider):

dataset_type = "variant"

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Returns a dict with the following attributes::
Expand Down Expand Up @@ -841,7 +844,7 @@ def process_data(
self,
iterator,
start_val=0,
max_vals=None,
max_vals=sys.maxsize,
ref_seq=None,
iterator_type="nth",
mean_depth=None,
Expand Down Expand Up @@ -1117,7 +1120,7 @@ def has_data(self, chrom):
f.close()
return all_dat is not None

def get_data(self, chrom, start, end, start_val=0, max_vals=None, num_samples=1000, **kwargs):
def get_data(self, chrom: str, start, end, start_val=0, max_vals=sys.maxsize, **kwargs):
start = int(start)
end = int(end)

Expand Down Expand Up @@ -1189,7 +1192,7 @@ def summarize_region(bbi, chrom, start, end, num_points):
return result

# Approach is different depending on region size.
num_samples = int(num_samples)
num_samples = int(kwargs.get("num_samples", 100))
if end - start < num_samples:
# Get values for individual bases in region, including start and end.
# To do this, need to increase end to next base and request number of points.
Expand Down Expand Up @@ -1271,7 +1274,7 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]:

return data_file.find(chrom, start, end)

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
results = []
message = None
with open(self.original_dataset.get_file_name()) as source:
Expand Down Expand Up @@ -1345,7 +1348,7 @@ def features_in_region_iter():

return features_in_region_iter()

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Process data from an iterator to a format that can be provided to client.
"""
Expand Down Expand Up @@ -1373,7 +1376,7 @@ class GtfTabixDataProvider(TabixDataProvider):
Returns data from GTF datasets that are indexed via tabix.
"""

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
# Loop through lines and group by transcript_id; each group is a feature.

# TODO: extend this code or use code in gff_util to process GFF/3 as well
Expand Down Expand Up @@ -1428,7 +1431,7 @@ class ENCODEPeakDataProvider(GenomeDataProvider):
def get_iterator(self, data_file, chrom, start, end, **kwargs):
raise Exception("Unimplemented Method")

def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Provides
"""
Expand Down Expand Up @@ -1528,7 +1531,7 @@ def get_filters(self):


class ChromatinInteractionsDataProvider(GenomeDataProvider):
def process_data(self, iterator, start_val=0, max_vals=None, **kwargs):
def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs):
"""
Provides
"""
Expand Down

0 comments on commit 73711a4

Please sign in to comment.