Skip to content

Commit

Permalink
Merge pull request #18283 from davelopez/24.0_fix_feature_location_in…
Browse files Browse the repository at this point in the history
…dex_data_provider

[24.0] Check various preconditions in FeatureLocationIndexDataProvider
  • Loading branch information
mvdbeek authored May 31, 2024
2 parents 6901b1f + da202e0 commit 10f9b3a
Showing 1 changed file with 43 additions and 32 deletions.
75 changes: 43 additions & 32 deletions lib/galaxy/visualization/data_providers/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
GFFReaderWrapper,
parse_gff_attributes,
)
from galaxy.exceptions import MessageException
from galaxy.model import DatasetInstance
from galaxy.visualization.data_providers.basic import BaseDataProvider
from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar
Expand Down Expand Up @@ -106,43 +107,53 @@ def __init__(self, converted_dataset):
self.converted_dataset = converted_dataset

def get_data(self, query):
if self.converted_dataset is None or not self.converted_dataset.is_ok:
raise MessageException("The dataset is not available or is in an error state.")
# Init.
textloc_file = open(self.converted_dataset.get_file_name())
line_len = int(textloc_file.readline())
file_len = os.path.getsize(self.converted_dataset.get_file_name())
query = query.lower()

# Find query in file using binary search.
low = 0
high = int(file_len / line_len)
while low < high:
mid: int = (low + high) // 2
position = mid * line_len
textloc_file.seek(position)

# Compare line with query and update low, high.
result = []
with open(self.converted_dataset.get_file_name()) as textloc_file:
line = textloc_file.readline()
if line < query:
low = mid + 1
else:
high = mid
if not line:
raise MessageException("The dataset is empty.")
try:
line_len = int(line)
except ValueError:
raise MessageException(f"Expected an integer at first line, but found: '{line}'")
if line_len < 1:
raise MessageException(f"The first line must be a positive integer, but found: {line_len}")

file_len = os.path.getsize(self.converted_dataset.get_file_name())
query = query.lower()

# Find query in file using binary search.
low = 0
high = int(file_len / line_len)
while low < high:
mid: int = (low + high) // 2
position = mid * line_len
textloc_file.seek(position)

# Compare line with query and update low, high.
line = textloc_file.readline()
if line < query:
low = mid + 1
else:
high = mid

# Need to move back one line because last line read may be included in
# results.
position = low * line_len
textloc_file.seek(position)
# Need to move back one line because last line read may be included in
# results.
position = low * line_len
textloc_file.seek(position)

# At right point in file, generate hits.
result = []
while True:
line = textloc_file.readline()
if not line.startswith(query):
break
if line[-1:] == "\n":
line = line[:-1]
result.append(line.split()[1:])
# At right point in file, generate hits.
while True:
line = textloc_file.readline()
if not line.startswith(query):
break
if line[-1:] == "\n":
line = line[:-1]
result.append(line.split()[1:])

textloc_file.close()
return result


Expand Down

0 comments on commit 10f9b3a

Please sign in to comment.