From da202e09f13c0be9807b5e963afcb25476a6a243 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 31 May 2024 15:03:59 +0200 Subject: [PATCH] Check various preconditions in FeatureLocationIndexDataProvider And raise appropriate error otherwise --- .../visualization/data_providers/genome.py | 75 +++++++++++-------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/lib/galaxy/visualization/data_providers/genome.py b/lib/galaxy/visualization/data_providers/genome.py index cc9e9f48ad6f..739c24eba958 100644 --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -39,6 +39,7 @@ GFFReaderWrapper, parse_gff_attributes, ) +from galaxy.exceptions import MessageException from galaxy.model import DatasetInstance from galaxy.visualization.data_providers.basic import BaseDataProvider from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar @@ -106,43 +107,53 @@ def __init__(self, converted_dataset): self.converted_dataset = converted_dataset def get_data(self, query): + if self.converted_dataset is None or not self.converted_dataset.is_ok: + raise MessageException("The dataset is not available or is in an error state.") # Init. - textloc_file = open(self.converted_dataset.get_file_name()) - line_len = int(textloc_file.readline()) - file_len = os.path.getsize(self.converted_dataset.get_file_name()) - query = query.lower() - - # Find query in file using binary search. - low = 0 - high = int(file_len / line_len) - while low < high: - mid: int = (low + high) // 2 - position = mid * line_len - textloc_file.seek(position) - - # Compare line with query and update low, high. + result = [] + with open(self.converted_dataset.get_file_name()) as textloc_file: line = textloc_file.readline() - if line < query: - low = mid + 1 - else: - high = mid + if not line: + raise MessageException("The dataset is empty.") + try: + line_len = int(line) + except ValueError: + raise MessageException(f"Expected an integer at first line, but found: '{line}'") + if line_len < 1: + raise MessageException(f"The first line must be a positive integer, but found: {line_len}") + + file_len = os.path.getsize(self.converted_dataset.get_file_name()) + query = query.lower() + + # Find query in file using binary search. + low = 0 + high = int(file_len / line_len) + while low < high: + mid: int = (low + high) // 2 + position = mid * line_len + textloc_file.seek(position) + + # Compare line with query and update low, high. + line = textloc_file.readline() + if line < query: + low = mid + 1 + else: + high = mid - # Need to move back one line because last line read may be included in - # results. - position = low * line_len - textloc_file.seek(position) + # Need to move back one line because last line read may be included in + # results. + position = low * line_len + textloc_file.seek(position) - # At right point in file, generate hits. - result = [] - while True: - line = textloc_file.readline() - if not line.startswith(query): - break - if line[-1:] == "\n": - line = line[:-1] - result.append(line.split()[1:]) + # At right point in file, generate hits. + while True: + line = textloc_file.readline() + if not line.startswith(query): + break + if line[-1:] == "\n": + line = line[:-1] + result.append(line.split()[1:]) - textloc_file.close() return result