Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data_column parameter: use column_names metadata if present #17478

Merged
merged 2 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -3424,7 +3424,9 @@ $attribute_list:data_ref,dynamic_options,display,multiple:5

#### ``data_column``

This parameter type is used to select columns from a parameter.
This parameter type is used to select columns from a data parameter.
It uses the ``column_names`` metadata if present (only since 24.0)
and as a fallback the tab separated values of the first line.

$attribute_list:force_select,numerical,use_header_name,multiple:5

Expand Down
38 changes: 25 additions & 13 deletions lib/galaxy/tools/parameters/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,20 +1479,32 @@ def get_options(self, trans, other_values):
Show column labels rather than c1..cn if use_header_names=True
"""
options: List[Tuple[str, Union[str, Tuple[str, str]], bool]] = []
if self.usecolnames: # read first row - assume is a header with metadata useful for making good choices
# if available use column_names metadata for option names
# otherwise read first row - assume is a header with tab separated names
if self.usecolnames:
dataset = other_values.get(self.data_ref, None)
try:
with open(dataset.get_file_name()) as f:
head = f.readline()
cnames = head.rstrip("\n\r ").split("\t")
column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)]
if self.numerical: # If numerical was requested, filter columns based on metadata
if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"):
if len(dataset.metadata.column_types) >= len(cnames):
numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]]
column_list = [column_list[i] for i in numerics]
except Exception:
column_list = self.get_column_list(trans, other_values)
if (
hasattr(dataset, "metadata")
and hasattr(dataset.metadata, "column_names")
and dataset.metadata.element_is_set("column_names")
):
log.error(f"column_names {dataset.metadata.column_names}")
column_list = [
("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(dataset.metadata.column_names)
]
else:
try:
with open(dataset.get_file_name()) as f:
head = f.readline()
cnames = head.rstrip("\n\r ").split("\t")
column_list = [("%d" % (i + 1), "c%d: %s" % (i + 1, x)) for i, x in enumerate(cnames)]
except Exception:
column_list = self.get_column_list(trans, other_values)
if self.numerical: # If numerical was requested, filter columns based on metadata
if hasattr(dataset, "metadata") and hasattr(dataset.metadata, "column_types"):
if len(dataset.metadata.column_types) >= len(column_list):
numerics = [i for i, x in enumerate(dataset.metadata.column_types) if x in ["int", "float"]]
column_list = [column_list[i] for i in numerics]
else:
column_list = self.get_column_list(trans, other_values)
for col in column_list:
Expand Down
26 changes: 24 additions & 2 deletions test/functional/tools/column_param.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
<tool id="column_param" name="Column Param" version="1.0.0">
<command><![CDATA[
cut -f '$col' '$input1' > '$output1' &&
cut
-f '$col'
#if $input1.is_of_type('csv')
-d','
#end if
'$input1' > '$output1' &&
echo "col $col" > '$output2' &&
echo "col_names $col_names" >> '$output2'
]]></command>
<inputs>
<param name="input1" type="data" format="tabular" label="Input 1" />
<param name="input1" type="data" format="tabular,csv" label="Input 1" />
<param name="col" type="data_column" data_ref="input1" label="Column to Use" />
<param name="col_names" type="data_column" data_ref="input1" use_header_names="true" label="Column to Use" />
</inputs>
Expand Down Expand Up @@ -46,5 +51,22 @@ echo "col_names $col_names" >> '$output2'
</assert_contents>
</output>
</test>
<!-- test csv input -->
<test>
<param name="input1" value="1.csv" ftype="csv"/>
<param name="col" value="1" />
<param name="col_names" value="c1: Transaction_date" />
<output name="output1">
<assert_contents>
<has_line line="1/2/09 6:17" />
</assert_contents>
</output>
<output name="output2">
<assert_contents>
<has_line line="col 1" />
<has_line line="col_names 1" />
</assert_contents>
</output>
</test>
</tests>
</tool>
Loading