From 2395e6579b28a8f7130b1980e3000768502ff522 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Thu, 15 Feb 2024 10:44:11 +0100 Subject: [PATCH] column_parameter: use column_names metadata if present makes column_parameter work for all datatypes setting column_names and still fallback to try to take the first line (tab separated) fixes: https://github.com/galaxyproject/galaxy/issues/17468 --- lib/galaxy/tools/parameters/basic.py | 38 +++++++++++++++++--------- test/functional/tools/column_param.xml | 26 ++++++++++++++++-- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 3cf9e7d377d6..a3e4fb9742ad 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1479,20 +1479,32 @@ def get_options(self, trans, other_values): Show column labels rather than c1..cn if use_header_names=True """ options: List[Tuple[str, Union[str, Tuple[str, str]], bool]] = [] - if self.usecolnames: # read first row - assume is a header with metadata useful for making good choices + # if available use column_names metadata for option names + # otherwise read first row - assume is a header with tab separated names + if self.usecolnames: dataset = other_values.get(self.data_ref, None) - try: - with open(dataset.get_file_name()) as f: - head = f.readline() - cnames = head.rstrip("\n\r ").split("\t") - column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)] - if self.numerical: # If numerical was requested, filter columns based on metadata - if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"): - if len(dataset.metadata.column_types) >= len(cnames): - numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]] - column_list = [column_list[i] for i in numerics] - except Exception: - column_list = self.get_column_list(trans, other_values) + if ( + hasattr(dataset, "metadata") + and hasattr(dataset.metadata, "column_names") + and dataset.metadata.element_is_set("column_names") + ): + log.error(f"column_names {dataset.metadata.column_names}") + column_list = [ + ("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(dataset.metadata.column_names) + ] + else: + try: + with open(dataset.get_file_name()) as f: + head = f.readline() + cnames = head.rstrip("\n\r ").split("\t") + column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)] + except Exception: + column_list = self.get_column_list(trans, other_values) + if self.numerical: # If numerical was requested, filter columns based on metadata + if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"): + if len(dataset.metadata.column_types) >= len(column_list): + numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]] + column_list = [column_list[i] for i in numerics] else: column_list = self.get_column_list(trans, other_values) for col in column_list: diff --git a/test/functional/tools/column_param.xml b/test/functional/tools/column_param.xml index e62b33a2e31f..2889e074f91b 100644 --- a/test/functional/tools/column_param.xml +++ b/test/functional/tools/column_param.xml @@ -1,11 +1,16 @@ '$output1' && +cut + -f '$col' + #if $input1.is_of_type('csv') + -d',' + #end if + '$input1' > '$output1' && echo "col $col" > '$output2' && echo "col_names $col_names" >> '$output2' ]]> - + @@ -46,5 +51,22 @@ echo "col_names $col_names" >> '$output2' + + + + + + + + + + + + + + + + +