Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.0] Check various preconditions in FeatureLocationIndexDataProvider #18283

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 43 additions & 32 deletions lib/galaxy/visualization/data_providers/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
GFFReaderWrapper,
parse_gff_attributes,
)
from galaxy.exceptions import MessageException
from galaxy.model import DatasetInstance
from galaxy.visualization.data_providers.basic import BaseDataProvider
from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar
Expand Down Expand Up @@ -106,43 +107,53 @@ def __init__(self, converted_dataset):
self.converted_dataset = converted_dataset

def get_data(self, query):
if self.converted_dataset is None or not self.converted_dataset.is_ok:
raise MessageException("The dataset is not available or is in an error state.")
# Init.
textloc_file = open(self.converted_dataset.get_file_name())
line_len = int(textloc_file.readline())
file_len = os.path.getsize(self.converted_dataset.get_file_name())
query = query.lower()

# Find query in file using binary search.
low = 0
high = int(file_len / line_len)
while low < high:
mid: int = (low + high) // 2
position = mid * line_len
textloc_file.seek(position)

# Compare line with query and update low, high.
result = []
with open(self.converted_dataset.get_file_name()) as textloc_file:
line = textloc_file.readline()
if line < query:
low = mid + 1
else:
high = mid
if not line:
raise MessageException("The dataset is empty.")
try:
line_len = int(line)
except ValueError:
raise MessageException(f"Expected an integer at first line, but found: '{line}'")
if line_len < 1:
raise MessageException(f"The first line must be a positive integer, but found: {line_len}")

file_len = os.path.getsize(self.converted_dataset.get_file_name())
query = query.lower()

# Find query in file using binary search.
low = 0
high = int(file_len / line_len)
while low < high:
mid: int = (low + high) // 2
position = mid * line_len
textloc_file.seek(position)

# Compare line with query and update low, high.
line = textloc_file.readline()
if line < query:
low = mid + 1
else:
high = mid

# Need to move back one line because last line read may be included in
# results.
position = low * line_len
textloc_file.seek(position)
# Need to move back one line because last line read may be included in
# results.
position = low * line_len
textloc_file.seek(position)

# At right point in file, generate hits.
result = []
while True:
line = textloc_file.readline()
if not line.startswith(query):
break
if line[-1:] == "\n":
line = line[:-1]
result.append(line.split()[1:])
# At right point in file, generate hits.
while True:
line = textloc_file.readline()
if not line.startswith(query):
break
if line[-1:] == "\n":
line = line[:-1]
result.append(line.split()[1:])

textloc_file.close()
return result


Expand Down
Loading