Merge branch 'release_23.1' into release_23.2

galaxyproject · Jan 15, 2024 · a2cb914 · a2cb914
2 parents 8ae1db1 + 5ba634b
commit a2cb914
Show file tree

Hide file tree

Showing 6 changed files with 127 additions and 6 deletions.
diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample
@@ -216,6 +216,9 @@
       <display file="igv/interval_as_bed.xml" inherit="true"/>
     </datatype>
     <datatype extension="jellyfish" type="galaxy.datatypes.binary:Binary" subclass="true" display_in_upload="true" description="Jellyfish database files are k-mer counts in binary format with a readable head. They are operated on and converted to human-readable text through jellyfish commands." />
+    <datatype extension="ktab" type="galaxy.datatypes.binary:Binary" subclass="true" description="A table of canonical k‑mers and their counts for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
+    <datatype extension="hist" type="galaxy.datatypes.binary:Binary" subclass="true" description="A binary histogram file of kmers and frequencies for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
+    <datatype extension="prof" type="galaxy.datatypes.binary:Binary" subclass="true" description="Read profile file for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
 
     <!-- ISA data types -->
     <datatype extension="isa-tab" type="galaxy.datatypes.isa:IsaTab" mimetype="application/isa-tools" display_in_upload="true" description="ISA-Tab data type." description_url="https://isa-tools.org"/>

diff --git a/lib/galaxy/config/sample/tool_conf.xml.sample b/lib/galaxy/config/sample/tool_conf.xml.sample
@@ -79,6 +79,7 @@
     <tool file="filters/bed_to_bigbed.xml" />
   </section>
   <section id="filter" name="Filter and Sort">
+    <tool file="stats/filtering_1_1_0.xml" />
     <tool file="stats/filtering.xml" />
     <tool file="filters/sorter.xml" />
     <tool file="filters/grep.xml" />

diff --git a/lib/galaxy/tool_util/toolbox/views/static.py b/lib/galaxy/tool_util/toolbox/views/static.py
@@ -105,7 +105,7 @@ def definition_with_items_to_panel(definition, allow_sections: bool = True, item
                                 f"Failed to find matching section for (id, name) = ({section_def.id}, {section_def.name})"
                             )
                             continue
-                        section = closest_section.copy()
+                        section = closest_section.copy(merge_tools=True)
                         if section_def.id is not None:
                             section.id = section_def.id
                         if section_def.name is not None:

diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py
@@ -126,6 +126,8 @@ class ConditionalStepWhen(BooleanToolParameter):
 
 def to_cwl(value, hda_references, step):
     element_identifier = None
+    if isinstance(value, model.HistoryDatasetCollectionAssociation):
+        value = value.collection
     if isinstance(value, model.DatasetCollectionElement) and value.hda:
         element_identifier = value.element_identifier
         value = value.hda
@@ -155,14 +157,13 @@ def to_cwl(value, hda_references, step):
                 properties, value.dataset.created_from_basename or element_identifier or value.name
             )
             return properties
-    elif hasattr(value, "collection"):
-        collection = value.collection
-        if collection.collection_type == "list":
-            return [to_cwl(dce, hda_references=hda_references, step=step) for dce in collection.dataset_elements]
+    elif isinstance(value, model.DatasetCollection):
+        if value.collection_type == "list":
+            return [to_cwl(dce, hda_references=hda_references, step=step) for dce in value.dataset_elements]
         else:
             # Could be record or nested lists
             rval = {}
-            for element in collection.elements:
+            for element in value.elements:
                 rval[element.element_identifier] = to_cwl(
                     element.element_object, hda_references=hda_references, step=step
                 )

diff --git a/test/unit/workflows/test_modules.py b/test/unit/workflows/test_modules.py
@@ -261,6 +261,19 @@ def test_to_cwl():
     assert hda_references == hdas
 
 
+def test_to_cwl_nested_collection():
+    hda = model.HistoryDatasetAssociation(create_dataset=True, flush=False)
+    hda.dataset.state = model.Dataset.states.OK
+    dc_inner = model.DatasetCollection(collection_type="list")
+    model.DatasetCollectionElement(collection=dc_inner, element_identifier="inner", element=hda)
+    dc_outer = model.DatasetCollection(collection_type="list:list")
+    model.DatasetCollectionElement(collection=dc_outer, element_identifier="outer", element=dc_inner)
+    hdca = model.HistoryDatasetCollectionAssociation(name="the collection", collection=dc_outer)
+    result = modules.to_cwl(hdca, [], model.WorkflowStep())
+    assert result["outer"][0]["class"] == "File"
+    assert result["outer"][0]["basename"] == "inner"
+
+
 class MapOverTestCase(NamedTuple):
     data_input: str
     step_input_def: Union[str, List[str]]

diff --git a/tools/stats/filtering_1_1_0.xml b/tools/stats/filtering_1_1_0.xml
@@ -0,0 +1,103 @@
+<tool id="Filter1" name="Filter" version="1.1.0">
+  <description>data on any column using simple expressions</description>
+  <edam_operations>
+    <edam_operation>operation_0335</edam_operation>
+  </edam_operations>
+  <command>
+    python '$__tool_directory__/filtering.py' '$input' '$out_file1' '$inputs' ${input.metadata.columns} "${input.metadata.column_types}" $header_lines
+  </command>
+  <configfiles>
+    <inputs name="inputs" />
+  </configfiles>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
+    <param name="cond" type="text" value="c1=='chr22'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
+      <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
+      <sanitizer>
+        <valid initial="string.printable"/>
+      </sanitizer>
+    </param>
+    <param name="header_lines" type="integer" value="0" label="Number of header lines to skip"/>
+  </inputs>
+  <outputs>
+    <data format_source="input" name="out_file1" metadata_source="input"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="1.bed"/>
+      <param name="cond" value="c1=='chr22'"/>
+      <param name="header_lines" value="0"/>
+      <output name="out_file1" file="filter1_test1.bed"/>
+    </test>
+    <test>
+      <param name="input" value="7.bed"/>
+      <param name="cond" value="c1=='chr1' and c3-c2>=2000 and c6=='+'"/>
+      <param name="header_lines" value="0"/>
+      <output name="out_file1" file="filter1_test2.bed"/>
+    </test>
+    <!-- Test filtering of file with a variable number of columns. -->
+    <test>
+      <param name="input" value="filter1_in3.sam"/>
+      <param name="cond" value="c3=='chr1' and c5>5"/>
+      <param name="header_lines" value="0"/>
+      <output name="out_file1" file="filter1_test3.sam"/>
+    </test>
+    <test>
+      <param name="input" value="filter1_inbad.bed"/>
+      <param name="cond" value="c1=='chr22'"/>
+      <param name="header_lines" value="0"/>
+      <output name="out_file1" file="filter1_test4.bed"/>
+    </test>
+    <test>
+      <param name="input" value="filter1_in5.tab"/>
+      <param name="cond" value="c8>500"/>
+      <param name="header_lines" value="1"/>
+      <output name="out_file1" file="filter1_test5.tab"/>
+    </test>
+    <test>
+      <param name="input" value="filter1_in6.bed"/>
+      <param name="cond" value="c2=='100%'"/>
+      <param name="header_lines" value="0"/>
+      <output name="out_file1" file="filter1_test6.bed"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: warningmark
+
+Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**)
+
+.. class:: infomark
+
+**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the columns being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings).  If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition.  The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue".
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+**Syntax**
+
+The filter tool allows you to restrict the dataset using simple conditional statements.
+
+- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
+- Make sure that multi-character operators contain no white space ( e.g., **&lt;=** is valid while **&lt; =** is not valid )
+- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
+- Non-numerical values must be included in single or double quotes ( e.g., **c6=='+'** )
+- Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or not c6=='+'** )
+
+-----
+
+**Example**
+
+- **c1=='chr1'** selects lines in which the first column is chr1
+- **c3-c2&lt;100*c4** selects lines where subtracting column 3 from column 2 is less than the value of column 4 times 100
+- **len(c2.split(',')) &lt; 4** will select lines where the second column has less than four comma separated elements
+- **c2>=1** selects lines in which the value of column 2 is greater than or equal to 1
+- Numbers should not contain commas - **c2&lt;=44,554,350** will not work, but **c2&lt;=44554350** will
+- Some words in the data can be used, but must be single or double quoted ( e.g., **c3=='exon'** )
+
+  </help>
+  <citations/>
+</tool>