From f821e3b1ed56f8992ee4d7fbeaf2ffba6ac3e3fb Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 30 Jul 2024 16:10:50 -0400 Subject: [PATCH] More type fixes for data providers... --- .../visualization/data_providers/basic.py | 25 -------------- .../visualization/data_providers/genome.py | 33 ++++++++++--------- 2 files changed, 18 insertions(+), 40 deletions(-) diff --git a/lib/galaxy/visualization/data_providers/basic.py b/lib/galaxy/visualization/data_providers/basic.py index 9935060f7658..686b5e870c24 100644 --- a/lib/galaxy/visualization/data_providers/basic.py +++ b/lib/galaxy/visualization/data_providers/basic.py @@ -1,6 +1,4 @@ -import sys from json import loads -from typing import Iterator from galaxy.datatypes.tabular import Tabular from galaxy.model import DatasetInstance @@ -30,29 +28,6 @@ def __init__( self.dependencies = dependencies self.error_max_vals = error_max_vals - def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: - """ - Returns an iterator that provides data in the region chrom:start-end - """ - raise Exception("Unimplemented Function") - - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): - """ - Process data from an iterator to a format that can be provided to client. - """ - raise Exception("Unimplemented Function") - - def get_data(self, chrom, start, end, start_val=0, max_vals=sys.maxsize, **kwargs): - """ - Returns data as specified by kwargs. start_val is the first element to - return and max_vals indicates the number of values to return. - - Return value must be a dictionary with the following attributes: - dataset_type, data - """ - iterator = self.get_iterator(chrom, start, end) - return self.process_data(iterator, start_val, max_vals, **kwargs) - class ColumnDataProvider(BaseDataProvider): """Data provider for columnar data""" diff --git a/lib/galaxy/visualization/data_providers/genome.py b/lib/galaxy/visualization/data_providers/genome.py index dbdae6d771b9..1063a886f0bc 100644 --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -44,6 +44,9 @@ from galaxy.visualization.data_providers.basic import BaseDataProvider from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar + +IntWebParam = Union[str, int] + # # Utility functions. # @@ -194,7 +197,7 @@ def valid_chroms(self): """ return None # by default - def has_data(self, chrom, start, end, **kwargs): + def has_data(self, chrom): """ Returns true if dataset has data in the specified genome window, false otherwise. @@ -214,13 +217,13 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: """ raise Exception("Unimplemented Function") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Process data from an iterator to a format that can be provided to client. """ raise Exception("Unimplemented Function") - def get_data(self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.maxsize, **kwargs): + def get_data(self, chrom: str, start: IntWebParam, end: IntWebParam, start_val=0, max_vals=sys.maxsize, **kwargs): """ Returns data in region defined by chrom, start, and end. start_val and max_vals are used to denote the data to return: start_val is the first element to @@ -229,7 +232,7 @@ def get_data(self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.ma Return value must be a dictionary with the following attributes: dataset_type, data """ - start, end = int(low), int(high) + start, end = int(start), int(end) with self.open_data_file() as data_file: iterator = self.get_iterator(data_file, chrom, start, end, **kwargs) data = self.process_data(iterator, start_val, max_vals, start=start, end=end, **kwargs) @@ -399,7 +402,7 @@ class IntervalDataProvider(GenomeDataProvider): def get_iterator(self, data_file, chrom, start, end, **kwargs): raise Exception("Unimplemented Function") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ @@ -481,7 +484,7 @@ class BedDataProvider(GenomeDataProvider): def get_iterator(self, data_file, chrom, start, end, **kwargs): raise Exception("Unimplemented Method") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ @@ -619,7 +622,7 @@ class VcfDataProvider(GenomeDataProvider): dataset_type = "variant" - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Returns a dict with the following attributes:: @@ -841,7 +844,7 @@ def process_data( self, iterator, start_val=0, - max_vals=None, + max_vals=sys.maxsize, ref_seq=None, iterator_type="nth", mean_depth=None, @@ -1117,7 +1120,7 @@ def has_data(self, chrom): f.close() return all_dat is not None - def get_data(self, chrom, start, end, start_val=0, max_vals=None, num_samples=1000, **kwargs): + def get_data(self, chrom: str, start, end, start_val=0, max_vals=sys.maxsize, **kwargs): start = int(start) end = int(end) @@ -1189,7 +1192,7 @@ def summarize_region(bbi, chrom, start, end, num_points): return result # Approach is different depending on region size. - num_samples = int(num_samples) + num_samples = int(kwargs.get("num_samples", 100)) if end - start < num_samples: # Get values for individual bases in region, including start and end. # To do this, need to increase end to next base and request number of points. @@ -1271,7 +1274,7 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: return data_file.find(chrom, start, end) - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): results = [] message = None with open(self.original_dataset.get_file_name()) as source: @@ -1345,7 +1348,7 @@ def features_in_region_iter(): return features_in_region_iter() - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Process data from an iterator to a format that can be provided to client. """ @@ -1373,7 +1376,7 @@ class GtfTabixDataProvider(TabixDataProvider): Returns data from GTF datasets that are indexed via tabix. """ - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): # Loop through lines and group by transcript_id; each group is a feature. # TODO: extend this code or use code in gff_util to process GFF/3 as well @@ -1428,7 +1431,7 @@ class ENCODEPeakDataProvider(GenomeDataProvider): def get_iterator(self, data_file, chrom, start, end, **kwargs): raise Exception("Unimplemented Method") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ @@ -1528,7 +1531,7 @@ def get_filters(self): class ChromatinInteractionsDataProvider(GenomeDataProvider): - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """