Skip to content

Commit

Permalink
Merge pull request #17478 from bernt-matthias/topic/column-param-meta…
Browse files Browse the repository at this point in the history
…data

`data_column` parameter: use `column_names` metadata if present
  • Loading branch information
mvdbeek authored Feb 23, 2024
2 parents 327b706 + aefbdab commit 516932c
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 16 deletions.
4 changes: 3 additions & 1 deletion lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -3450,7 +3450,9 @@ $attribute_list:data_ref,dynamic_options,display,multiple:5
#### ``data_column``
This parameter type is used to select columns from a parameter.
This parameter type is used to select columns from a data parameter.
It uses the ``column_names`` metadata if present (only since 24.0)
and as a fallback the tab separated values of the first line.
$attribute_list:force_select,numerical,use_header_name,multiple:5
Expand Down
38 changes: 25 additions & 13 deletions lib/galaxy/tools/parameters/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1484,20 +1484,32 @@ def get_options(self, trans, other_values):
Show column labels rather than c1..cn if use_header_names=True
"""
options: List[Tuple[str, Union[str, Tuple[str, str]], bool]] = []
if self.usecolnames: # read first row - assume is a header with metadata useful for making good choices
# if available use column_names metadata for option names
# otherwise read first row - assume is a header with tab separated names
if self.usecolnames:
dataset = other_values.get(self.data_ref, None)
try:
with open(dataset.get_file_name()) as f:
head = f.readline()
cnames = head.rstrip("\n\r ").split("\t")
column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)]
if self.numerical: # If numerical was requested, filter columns based on metadata
if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"):
if len(dataset.metadata.column_types) >= len(cnames):
numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]]
column_list = [column_list[i] for i in numerics]
except Exception:
column_list = self.get_column_list(trans, other_values)
if (
hasattr(dataset, "metadata")
and hasattr(dataset.metadata, "column_names")
and dataset.metadata.element_is_set("column_names")
):
log.error(f"column_names {dataset.metadata.column_names}")
column_list = [
("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(dataset.metadata.column_names)
]
else:
try:
with open(dataset.get_file_name()) as f:
head = f.readline()
cnames = head.rstrip("\n\r ").split("\t")
column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)]
except Exception:
column_list = self.get_column_list(trans, other_values)
if self.numerical: # If numerical was requested, filter columns based on metadata
if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"):
if len(dataset.metadata.column_types) >= len(column_list):
numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]]
column_list = [column_list[i] for i in numerics]
else:
column_list = self.get_column_list(trans, other_values)
for col in column_list:
Expand Down
26 changes: 24 additions & 2 deletions test/functional/tools/column_param.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
<tool id="column_param" name="Column Param" version="1.0.0">
<command><![CDATA[
cut -f '$col' '$input1' > '$output1' &&
cut
-f '$col'
#if $input1.is_of_type('csv')
-d','
#end if
'$input1' > '$output1' &&
echo "col $col" > '$output2' &&
echo "col_names $col_names" >> '$output2'
]]></command>
<inputs>
<param name="input1" type="data" format="tabular" label="Input 1" />
<param name="input1" type="data" format="tabular,csv" label="Input 1" />
<param name="col" type="data_column" data_ref="input1" label="Column to Use" />
<param name="col_names" type="data_column" data_ref="input1" use_header_names="true" label="Column to Use" />
</inputs>
Expand Down Expand Up @@ -46,5 +51,22 @@ echo "col_names $col_names" >> '$output2'
</assert_contents>
</output>
</test>
<!-- test csv input -->
<test>
<param name="input1" value="1.csv" ftype="csv"/>
<param name="col" value="1" />
<param name="col_names" value="c1: Transaction_date" />
<output name="output1">
<assert_contents>
<has_line line="1/2/09 6:17" />
</assert_contents>
</output>
<output name="output2">
<assert_contents>
<has_line line="col 1" />
<has_line line="col_names 1" />
</assert_contents>
</output>
</test>
</tests>
</tool>

0 comments on commit 516932c

Please sign in to comment.