Skip to content

Commit

Permalink
Various fixes for stricter cwltool and cwltest
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Oct 31, 2023
1 parent b8c9968 commit ef9f5d4
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 35 deletions.
35 changes: 35 additions & 0 deletions lib/galaxy/tool_util/cwl/representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import json
import logging
import os
import tarfile
import uuid
from enum import Enum
from typing import (
Any,
Expand Down Expand Up @@ -218,7 +220,39 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper):
"archive_nameext": nameext,
"archive_nameroot": nameroot,
}

def tar_to_directory(directory_item):
# TODO: Should we just make sure that archive exists in extra_files_path ??
tar_file_location = directory_item["archive_location"]
directory_name = directory_item["name"]

assert os.path.exists(tar_file_location), tar_file_location

tmp_dir = os.path.join(inputs_dir, "direx", str(uuid.uuid4())) # direx for "DIR EXtract"
directory_location = os.path.join(tmp_dir, directory_name)

os.makedirs(tmp_dir)

assert os.path.exists(tmp_dir), tmp_dir

# TODO: safe version of this!
bkp_cwd = os.getcwd()
os.chdir(tmp_dir)
tar = tarfile.open(tar_file_location)
tar.extractall(directory_location)
tar.close()
os.chdir(bkp_cwd)

assert os.path.exists(directory_location), directory_location

directory_item["location"] = directory_location
directory_item["nameext"] = "None"
directory_item["nameroot"] = directory_name
directory_item["basename"] = directory_name

tar_to_directory(directory_json)
extra_params.update(directory_json)

entry_to_location(extra_params, extra_params["location"])
return extra_params

Expand All @@ -227,6 +261,7 @@ def entry_to_location(entry: Dict[str, Any], parent_location: str):
# TODO unit test
if entry["class"] == "File" and "path" in entry and "location" not in entry:
entry["location"] = os.path.join(parent_location, entry.pop("path"))
entry["size"] = os.path.getsize(entry["location"])
elif entry["class"] == "Directory" and "listing" in entry:
if "location" not in entry and "path" in entry:
entry["location"] = os.path.join(parent_location, entry.pop("path"))
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tool_util/cwl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,13 @@ def dir_listing(dir_path):
if extra_file_class == "File":
ec = get_dataset(output_metadata, filename=path)
ec["basename"] = extra_file_basename
ec_properties = output_properties(pseudo_location=pseudo_location, **ec)
filename = f"{dir_path}/{extra_file_basename}"
_download_url = (
output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(filename)}"
)
ec_properties = output_properties(
pseudo_location=pseudo_location, download_url=_download_url, **ec
)
elif extra_file_class == "Directory":
ec_properties = {}
ec_properties["class"] = "Directory"
Expand Down
34 changes: 0 additions & 34 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import re
import tarfile
import tempfile
import uuid
from collections.abc import MutableMapping
from pathlib import Path
from typing import (
Expand Down Expand Up @@ -3173,39 +3172,6 @@ def exec_before_job(self, app, inp_data, out_data, param_dict=None):
# this really seems wrong -John
input_json = {k: v for k, v in input_json.items() if v != ""}

# handle 'Directory' type (uncompress tar file)
for v in input_json.values():
if isinstance(v, dict) and "class" in v and v["class"] == "Directory":
if "archive_nameext" in v and v["archive_nameext"] == ".tar":
tar_file_location = v["archive_location"]
directory_name = v["name"]

assert os.path.exists(tar_file_location), tar_file_location

tmp_dir = os.path.join(
local_working_directory, "direx", str(uuid.uuid4())
) # direx for "DIR EXtract"
directory_location = os.path.join(tmp_dir, directory_name)

os.makedirs(tmp_dir)

assert os.path.exists(tmp_dir), tmp_dir

# TODO: safe version of this!
bkp_cwd = os.getcwd()
os.chdir(tmp_dir)
tar = tarfile.open(tar_file_location)
tar.extractall(directory_location)
tar.close()
os.chdir(bkp_cwd)

assert os.path.exists(directory_location), directory_location

v["location"] = directory_location
v["nameext"] = "None"
v["nameroot"] = directory_name
v["basename"] = directory_name

cwl_job_proxy = self._cwl_tool_proxy.job_proxy(
input_json,
output_dict,
Expand Down
27 changes: 27 additions & 0 deletions lib/galaxy_test/base/populators.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,30 @@ def to_local_location(listing, location):
to_local_location(item["listing"], location=item["location"])


def fix_conflicts(path):
# find first key that does not clash with an existing entry in targets
# start with entry.target + '_' + 2 and then keep incrementing
# the number till there is no clash
i = 2
tgt = f"{path}_{i}"
while os.path.exists(tgt):
i += 1
tgt = f"{path}_{i}"
return tgt


def output_to_disk(output, download_folder):
if isinstance(output, list):
return [output_to_disk(item, download_folder=download_folder) for item in output]
if isinstance(output, dict):
if "secondaryFiles" in output:
output["secondaryFiles"] = [
output_to_disk(secondary, download_folder=download_folder) for secondary in output["secondaryFiles"]
]
if "basename" in output:
download_path = os.path.join(download_folder, output["basename"])
if os.path.exists(download_path):
download_path = fix_conflicts(download_path)
if output["class"] == "Directory":
zip_path = f"{download_path}.zip"
download_to_file(output["location"], zip_path)
Expand All @@ -293,6 +309,17 @@ def output_to_disk(output, download_folder):
output["location"] = f"file://{download_path}"
if "listing" in output:
to_local_location(output["listing"], output["location"])

return output
elif output.get("class") == "Directory":
# Directory in secondary files
download_folder = os.path.join(download_folder, output["location"])
os.makedirs(download_folder, exist_ok=True)
output["location"] = download_folder
new_listing = [
output_to_disk(secondary, download_folder=download_folder) for secondary in output["listing"]
]
output["listing"] = new_listing
return output
else:
new_output = {}
Expand Down

0 comments on commit ef9f5d4

Please sign in to comment.