"Fix" running CWL tools in Docker for recent changes to cwltool.

With an integration test based on GA4GH task execution phase 1 example.
common-workflow-lab · Mar 13, 2017 · a9b5865 · a9b5865
1 parent f5aeac7
commit a9b5865
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 5 deletions.
diff --git a/lib/galaxy/tools/cwl/parser.py b/lib/galaxy/tools/cwl/parser.py
@@ -28,7 +28,7 @@
 JOB_JSON_FILE = ".cwl_job.json"
 SECONDARY_FILES_EXTRA_PREFIX = "__secondary_files__"
 
-
+DOCKER_REQUIREMENT = "DockerRequirement"
 SUPPORTED_TOOL_REQUIREMENTS = [
     "CreateFileRequirement",
     "DockerRequirement",
@@ -71,10 +71,14 @@ def load_job_proxy(job_directory, strict_cwl_validation=True):
 def to_cwl_tool_object(tool_path, strict_cwl_validation=True):
     proxy_class = None
     cwl_tool = _schema_loader(strict_cwl_validation).tool(path=tool_path)
+
     if isinstance(cwl_tool, int):
         raise Exception("Failed to load tool.")
 
     raw_tool = cwl_tool.tool
+    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
+    # between Galaxy and cwltool.
+    _hack_cwl_requirements(cwl_tool)
     check_requirements(raw_tool)
     if "class" not in raw_tool:
         raise Exception("File does not declare a class, not a valid Draft 3+ CWL tool.")
@@ -107,6 +111,22 @@ def _schema_loader(strict_cwl_validation):
     return target_schema_loader
 
 
+def _hack_cwl_requirements(cwl_tool):
+    raw_tool = cwl_tool.tool
+    if "requirements" in raw_tool:
+        requirements = raw_tool["requirements"]
+        move_to_hint = None
+        for i, r in enumerate(requirements):
+            if r["class"] == DOCKER_REQUIREMENT:
+                move_to_hint = i
+        if move_to_hint is not None:
+            hint = requirements.pop(move_to_hint)
+            if "hints" not in raw_tool:
+                raw_tool["hints"] = []
+            raw_tool["hints"].append(hint)
+    cwl_tool.requirements = raw_tool.get("requirements", [])
+
+
 def check_requirements(rec, tool=True):
     if isinstance(rec, dict):
         if "requirements" in rec:
@@ -246,7 +266,10 @@ def _ensure_cwl_job_initialized(self):
                 self._output_callback,
                 basedir=self._job_directory,
                 select_resources=self._select_resources,
-                use_container=False
+                outdir=os.path.join(self._job_directory, "cwloutput"),
+                tmpdir=os.path.join(self._job_directory, "cwltmp"),
+                stagedir=os.path.join(self._job_directory, "cwlstagedir"),
+                use_container=False,
             ))
             self._is_command_line_job = hasattr(self._cwl_job, "command_line")
 

diff --git a/test/base/populators.py b/test/base/populators.py
@@ -69,7 +69,8 @@ def replacement_item(value):
         if type_class != "File":
             return value
 
-        file_path = value.get("path", None)
+        # TODO: Dispatch on draft 3 vs v1.0+ tools here in the future.
+        file_path = value.get("path", None) or value.get("location", None)
         if file_path is None:
             return value
 
@@ -243,8 +244,13 @@ def upload_path(path):
         run_response = self.run_tool( tool_id, job_as_dict, history_id, inputs_representation="cwl", assert_ok=assert_ok )
         run_object = CwlToolRun( history_id, run_response )
         if assert_ok:
-            final_state = self.wait_for_job( run_object.job_id )
-            assert final_state == "ok"
+            try:
+                final_state = self.wait_for_job( run_object.job_id )
+                assert final_state == "ok"
+            except Exception:
+                self._summarize_history_errors( history_id )
+                raise
+
         return run_object
 
     def get_history_dataset_content( self, history_id, wait=True, **kwds ):

diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml
@@ -137,6 +137,8 @@
   <tool file="cwl_tools/draft3_custom/optional-output2.cwl" />
   <tool file="cwl_tools/draft3_custom/showindex1.cwl" />
 
+  <tool file="cwl_tools/v1.0_custom/md5sum_non_strict.cwl" />
+
   <!-- Tools interesting only for building up test workflows. -->
 
   <!-- Next three tools demonstrate concatenating multiple datasets

diff --git a/test/integration/test_dockerized_jobs.py b/test/integration/test_dockerized_jobs.py
@@ -22,6 +22,7 @@ def handle_galaxy_config_kwds(cls, config):
         # Disable tool dependency resolution.
         config["tool_dependency_dir"] = "none"
         config["enable_beta_mulled_containers"] = "true"
+        config["strict_cwl_validation"] = "false"
 
     def setUp(self):
         super(DockerizedJobsIntegrationTestCase, self).setUp()
@@ -39,3 +40,9 @@ def test_mulled_simple(self):
         self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
         output = self.dataset_populator.get_history_dataset_content(self.history_id)
         assert "0.7.15-r1140" in output
+
+    def test_cwl(self):
+        run_object = self.dataset_populator.run_cwl_tool("md5sum_non_strict", "test/functional/tools/cwl_tools/v1.0_custom/md5sum_job.json")
+        output_file = run_object.output(0)
+        output_content = self.dataset_populator.get_history_dataset_content( run_object.history_id, dataset=output_file )
+        self.assertEquals(output_content, "00579a00e3e7fa0674428ac7049423e2\n")
diff --git a/test/unit/tools/cwl_tools/v1.0_custom/md5sum.input b/test/unit/tools/cwl_tools/v1.0_custom/md5sum.input
@@ -0,0 +1 @@
+this is the test file that will be used when calculating an md5sum
diff --git a/test/unit/tools/cwl_tools/v1.0_custom/md5sum_job.json b/test/unit/tools/cwl_tools/v1.0_custom/md5sum_job.json
@@ -0,0 +1 @@
+{"input_file": {"class": "File", "location": "md5sum.input"}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		this is the test file that will be used when calculating an md5sum
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"input_file": {"class": "File", "location": "md5sum.input"}}