From 2395e6579b28a8f7130b1980e3000768502ff522 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Thu, 15 Feb 2024 10:44:11 +0100 Subject: [PATCH 1/2] column_parameter: use column_names metadata if present makes column_parameter work for all datatypes setting column_names and still fallback to try to take the first line (tab separated) fixes: https://github.com/galaxyproject/galaxy/issues/17468 --- lib/galaxy/tools/parameters/basic.py | 38 +++++++++++++++++--------- test/functional/tools/column_param.xml | 26 ++++++++++++++++-- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 3cf9e7d377d6..a3e4fb9742ad 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1479,20 +1479,32 @@ def get_options(self, trans, other_values): Show column labels rather than c1..cn if use_header_names=True """ options: List[Tuple[str, Union[str, Tuple[str, str]], bool]] = [] - if self.usecolnames: # read first row - assume is a header with metadata useful for making good choices + # if available use column_names metadata for option names + # otherwise read first row - assume is a header with tab separated names + if self.usecolnames: dataset = other_values.get(self.data_ref, None) - try: - with open(dataset.get_file_name()) as f: - head = f.readline() - cnames = head.rstrip("\n\r ").split("\t") - column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)] - if self.numerical: # If numerical was requested, filter columns based on metadata - if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"): - if len(dataset.metadata.column_types) >= len(cnames): - numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]] - column_list = [column_list[i] for i in numerics] - except Exception: - column_list = self.get_column_list(trans, other_values) + if ( + hasattr(dataset, "metadata") + and hasattr(dataset.metadata, "column_names") + and dataset.metadata.element_is_set("column_names") + ): + log.error(f"column_names {dataset.metadata.column_names}") + column_list = [ + ("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(dataset.metadata.column_names) + ] + else: + try: + with open(dataset.get_file_name()) as f: + head = f.readline() + cnames = head.rstrip("\n\r ").split("\t") + column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)] + except Exception: + column_list = self.get_column_list(trans, other_values) + if self.numerical: # If numerical was requested, filter columns based on metadata + if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"): + if len(dataset.metadata.column_types) >= len(column_list): + numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]] + column_list = [column_list[i] for i in numerics] else: column_list = self.get_column_list(trans, other_values) for col in column_list: diff --git a/test/functional/tools/column_param.xml b/test/functional/tools/column_param.xml index e62b33a2e31f..2889e074f91b 100644 --- a/test/functional/tools/column_param.xml +++ b/test/functional/tools/column_param.xml @@ -1,11 +1,16 @@ '$output1' && +cut + -f '$col' + #if $input1.is_of_type('csv') + -d',' + #end if + '$input1' > '$output1' && echo "col $col" > '$output2' && echo "col_names $col_names" >> '$output2' ]]> - + @@ -46,5 +51,22 @@ echo "col_names $col_names" >> '$output2' + + + + + + + + + + + + + + + + + From aefbdab725175b5f15e7f3e7d9d94f65181b0709 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Sun, 18 Feb 2024 12:08:11 +0100 Subject: [PATCH 2/2] docs --- lib/galaxy/tool_util/xsd/galaxy.xsd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index d9885d4cce71..bc439214d520 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -3424,7 +3424,9 @@ $attribute_list:data_ref,dynamic_options,display,multiple:5 #### ``data_column`` -This parameter type is used to select columns from a parameter. +This parameter type is used to select columns from a data parameter. +It uses the ``column_names`` metadata if present (only since 24.0) +and as a fallback the tab separated values of the first line. $attribute_list:force_select,numerical,use_header_name,multiple:5