diff --git a/lib/galaxy/actions/library.py b/lib/galaxy/actions/library.py
index 1eb363e09dfb..14e2eeac907a 100644
--- a/lib/galaxy/actions/library.py
+++ b/lib/galaxy/actions/library.py
@@ -249,16 +249,14 @@ def _make_library_uploaded_dataset(self, trans, params, name, path, type, librar
uploaded_dataset.to_posix_lines = params.get('to_posix_lines', None)
uploaded_dataset.space_to_tab = params.get('space_to_tab', None)
uploaded_dataset.tag_using_filenames = params.get('tag_using_filenames', True)
+ uploaded_dataset.purge_source = getattr(trans.app.config, 'ftp_upload_purge', True)
if in_folder:
uploaded_dataset.in_folder = in_folder
uploaded_dataset.data = upload_common.new_upload(trans, 'api', uploaded_dataset, library_bunch)
uploaded_dataset.link_data_only = link_data_only
uploaded_dataset.uuid = uuid_str
if link_data_only == 'link_to_files':
- uploaded_dataset.data.file_name = os.path.abspath(path)
- # Since we are not copying the file into Galaxy's managed
- # default file location, the dataset should never be purgable.
- uploaded_dataset.data.dataset.purgable = False
+ uploaded_dataset.data.link_to(path)
trans.sa_session.add_all((uploaded_dataset.data, uploaded_dataset.data.dataset))
trans.sa_session.flush()
return uploaded_dataset
diff --git a/lib/galaxy/app.py b/lib/galaxy/app.py
index ee03bf35cbef..574324b9aab3 100644
--- a/lib/galaxy/app.py
+++ b/lib/galaxy/app.py
@@ -12,7 +12,9 @@
from galaxy import config, jobs
from galaxy.jobs import metrics as job_metrics
from galaxy.managers.collections import DatasetCollectionManager
+from galaxy.managers.folders import FolderManager
from galaxy.managers.histories import HistoryManager
+from galaxy.managers.libraries import LibraryManager
from galaxy.managers.tags import GalaxyTagManager
from galaxy.openid.providers import OpenIDProviders
from galaxy.queue_worker import GalaxyQueueWorker
@@ -96,6 +98,8 @@ def __init__(self, **kwargs):
self.history_manager = HistoryManager(self)
self.dependency_resolvers_view = DependencyResolversView(self)
self.test_data_resolver = test_data.TestDataResolver(file_dirs=self.config.tool_test_data_directories)
+ self.library_folder_manager = FolderManager()
+ self.library_manager = LibraryManager()
# Tool Data Tables
self._configure_tool_data_tables(from_shed_config=False)
diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
index 197a9bf18e51..c69373b0669f 100644
--- a/lib/galaxy/datatypes/sniff.py
+++ b/lib/galaxy/datatypes/sniff.py
@@ -14,6 +14,7 @@
import zipfile
from six import text_type
+from six.moves.urllib.request import urlopen
from galaxy import util
from galaxy.util import compression_utils
@@ -39,6 +40,12 @@ def get_test_fname(fname):
return full_path
+def stream_url_to_file(path):
+ page = urlopen(path) # page will be .close()ed in stream_to_file
+ temp_name = stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
+ return temp_name
+
+
def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'):
"""Writes a stream to the provided file descriptor, returns the file name. Closes file descriptor"""
# signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better
@@ -131,7 +138,7 @@ def convert_newlines(fname, in_place=True, tmp_dir=None, tmp_prefix="gxupload"):
return (i, temp_name)
-def sep2tabs(fname, in_place=True, patt="\\s+"):
+def sep2tabs(fname, in_place=True, patt="\\s+", tmp_dir=None, tmp_prefix="gxupload"):
"""
Transforms in place a 'sep' separated file to a tab separated one
@@ -143,13 +150,18 @@ def sep2tabs(fname, in_place=True, patt="\\s+"):
'1\\t2\\n3\\t4\\n'
"""
regexp = re.compile(patt)
- fd, temp_name = tempfile.mkstemp()
+ fd, temp_name = tempfile.mkstemp(prefix=tmp_prefix, dir=tmp_dir)
with os.fdopen(fd, "wt") as fp:
i = None
for i, line in enumerate(open(fname)):
- line = line.rstrip('\r\n')
- elems = regexp.split(line)
- fp.write("%s\n" % '\t'.join(elems))
+ if line.endswith("\r"):
+ line = line.rstrip('\r')
+ elems = regexp.split(line)
+ fp.write("%s\r" % '\t'.join(elems))
+ else:
+ line = line.rstrip('\n')
+ elems = regexp.split(line)
+ fp.write("%s\n" % '\t'.join(elems))
if i is None:
i = 0
else:
diff --git a/lib/galaxy/datatypes/upload_util.py b/lib/galaxy/datatypes/upload_util.py
new file mode 100644
index 000000000000..97bb11862ca3
--- /dev/null
+++ b/lib/galaxy/datatypes/upload_util.py
@@ -0,0 +1,47 @@
+from galaxy.datatypes import sniff
+from galaxy.datatypes.binary import Binary
+
+
+class UploadProblemException(Exception):
+
+ def __init__(self, message):
+ self.message = message
+
+
+def handle_unsniffable_binary_check(data_type, ext, path, name, is_binary, requested_ext, check_content, registry):
+ """Return modified values of data_type and ext if unsniffable binary encountered.
+
+ Throw UploadProblemException if content problems or extension mismatches occur.
+
+ Precondition: check_binary called returned True.
+ """
+ if is_binary or registry.is_extension_unsniffable_binary(requested_ext):
+ # We have a binary dataset, but it is not Bam, Sff or Pdf
+ data_type = 'binary'
+ parts = name.split(".")
+ if len(parts) > 1:
+ ext = parts[-1].strip().lower()
+ is_ext_unsniffable_binary = registry.is_extension_unsniffable_binary(ext)
+ if check_content and not is_ext_unsniffable_binary:
+ raise UploadProblemException('The uploaded binary file contains inappropriate content')
+
+ elif is_ext_unsniffable_binary and requested_ext != ext:
+ err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext, ext)
+ raise UploadProblemException(err_msg)
+ return data_type, ext
+
+
+def handle_sniffable_binary_check(data_type, ext, path, registry):
+ """Return modified values of data_type and ext if sniffable binary encountered.
+
+ Precondition: check_binary called returned True.
+ """
+ # Sniff the data type
+ guessed_ext = sniff.guess_ext(path, registry.sniff_order)
+ # Set data_type only if guessed_ext is a binary datatype
+ datatype = registry.get_datatype_by_extension(guessed_ext)
+ if isinstance(datatype, Binary):
+ data_type = guessed_ext
+ ext = guessed_ext
+
+ return data_type, ext
diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt
index 5e5ec8a5921a..0c7f590ec853 100644
--- a/lib/galaxy/dependencies/pinned-requirements.txt
+++ b/lib/galaxy/dependencies/pinned-requirements.txt
@@ -15,6 +15,7 @@ uWSGI==2.0.15
pysam==0.14
# pure Python packages
+bdbag==1.1.1
bleach==2.1.3
bz2file==0.98; python_version < '3.3'
ipaddress==1.0.18; python_version < '3.3'
diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
index 9e16f055df8a..73d00a8ec1e3 100644
--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -1380,7 +1380,7 @@ def path_rewriter(path):
collected_datasets = {
'primary': self.tool.collect_primary_datasets(out_data, self.get_tool_provided_job_metadata(), tool_working_directory, input_ext, input_dbkey)
}
- self.tool.collect_dynamic_collections(
+ self.tool.collect_dynamic_outputs(
out_collections,
self.get_tool_provided_job_metadata(),
job_working_directory=tool_working_directory,
diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py
index fe688abad537..cf507c3c2185 100644
--- a/lib/galaxy/managers/collections.py
+++ b/lib/galaxy/managers/collections.py
@@ -46,17 +46,22 @@ def __init__(self, app):
self.tag_manager = tags.GalaxyTagManager(app.model.context)
self.ldda_manager = lddas.LDDAManager(app)
- def precreate_dataset_collection_instance(self, trans, parent, name, implicit_inputs, implicit_output_name, structure):
+ def precreate_dataset_collection_instance(self, trans, parent, name, structure, implicit_inputs=None, implicit_output_name=None):
# TODO: prebuild all required HIDs and send them in so no need to flush in between.
- dataset_collection = self.precreate_dataset_collection(structure)
+ dataset_collection = self.precreate_dataset_collection(structure, allow_unitialized_element=implicit_output_name is not None)
instance = self._create_instance_for_collection(
trans, parent, name, dataset_collection, implicit_inputs=implicit_inputs, implicit_output_name=implicit_output_name, flush=False
)
return instance
- def precreate_dataset_collection(self, structure):
- if structure.is_leaf or not structure.children_known:
- return model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
+ def precreate_dataset_collection(self, structure, allow_unitialized_element=True):
+ has_structure = not structure.is_leaf and structure.children_known
+ if not has_structure and allow_unitialized_element:
+ dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
+ elif not has_structure:
+ collection_type_description = structure.collection_type_description
+ dataset_collection = model.DatasetCollection(populated=False)
+ dataset_collection.collection_type = collection_type_description.collection_type
else:
collection_type_description = structure.collection_type_description
dataset_collection = model.DatasetCollection(populated=False)
@@ -67,7 +72,7 @@ def precreate_dataset_collection(self, structure):
if substructure.is_leaf:
element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
else:
- element = self.precreate_dataset_collection(substructure)
+ element = self.precreate_dataset_collection(substructure, allow_unitialized_element=allow_unitialized_element)
element = model.DatasetCollectionElement(
element=element,
@@ -78,7 +83,7 @@ def precreate_dataset_collection(self, structure):
dataset_collection.elements = elements
dataset_collection.element_count = len(elements)
- return dataset_collection
+ return dataset_collection
def create(self, trans, parent, name, collection_type, element_identifiers=None,
elements=None, implicit_collection_info=None, trusted_identifiers=None,
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index 66493c441111..67b8dfdf03a2 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -2035,6 +2035,12 @@ def set_file_name(self, filename):
return self.dataset.set_file_name(filename)
file_name = property(get_file_name, set_file_name)
+ def link_to(self, path):
+ self.file_name = os.path.abspath(path)
+ # Since we are not copying the file into Galaxy's managed
+ # default file location, the dataset should never be purgable.
+ self.dataset.purgable = False
+
@property
def extra_files_path(self):
return self.dataset.extra_files_path
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index f87ffa635462..719174aba27f 100755
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -103,6 +103,7 @@
# Tools that require Galaxy's Python environment to be preserved.
GALAXY_LIB_TOOLS_UNVERSIONED = [
"upload1",
+ "__DATA_FETCH__",
# Legacy tools bundled with Galaxy.
"vcf_to_maf_customtrack1",
"laj_1",
@@ -1107,7 +1108,10 @@ def parse_input_elem(self, page_source, enctypes, context=None):
group.file_type_name = elem.get('file_type_name', group.file_type_name)
group.default_file_type = elem.get('default_file_type', group.default_file_type)
group.metadata_ref = elem.get('metadata_ref', group.metadata_ref)
- rval[group.file_type_name].refresh_on_change = True
+ try:
+ rval[group.file_type_name].refresh_on_change = True
+ except KeyError:
+ pass
group_page_source = XmlPageSource(elem)
group.inputs = self.parse_input_elem(group_page_source, enctypes, context)
rval[group.name] = group
@@ -1658,10 +1662,10 @@ def collect_primary_datasets(self, output, tool_provided_metadata, job_working_d
"""
return output_collect.collect_primary_datasets(self, output, tool_provided_metadata, job_working_directory, input_ext, input_dbkey=input_dbkey)
- def collect_dynamic_collections(self, output, tool_provided_metadata, **kwds):
- """ Find files corresponding to dynamically structured collections.
+ def collect_dynamic_outputs(self, output, tool_provided_metadata, **kwds):
+ """Collect dynamic outputs associated with a job from this tool.
"""
- return output_collect.collect_dynamic_collections(self, output, tool_provided_metadata, **kwds)
+ return output_collect.collect_dynamic_outputs(self, output, tool_provided_metadata, **kwds)
def to_archive(self):
tool = self
diff --git a/lib/galaxy/tools/actions/upload.py b/lib/galaxy/tools/actions/upload.py
index 7dbb2f47d583..cdcc28b1adea 100644
--- a/lib/galaxy/tools/actions/upload.py
+++ b/lib/galaxy/tools/actions/upload.py
@@ -1,15 +1,20 @@
+import json
import logging
+import os
+from galaxy.dataset_collections.structure import UnitializedTree
+from galaxy.exceptions import RequestParameterMissingException
from galaxy.tools.actions import upload_common
from galaxy.util import ExecutionTimer
+from galaxy.util.bunch import Bunch
from . import ToolAction
log = logging.getLogger(__name__)
-class UploadToolAction(ToolAction):
+class BaseUploadToolAction(ToolAction):
- def execute(self, tool, trans, incoming={}, set_output_hid=True, history=None, **kwargs):
+ def execute(self, tool, trans, incoming={}, history=None, **kwargs):
dataset_upload_inputs = []
for input_name, input in tool.inputs.items():
if input.type == "upload_dataset":
@@ -19,18 +24,125 @@ def execute(self, tool, trans, incoming={}, set_output_hid=True, history=None, *
persisting_uploads_timer = ExecutionTimer()
incoming = upload_common.persist_uploads(incoming, trans)
log.debug("Persisted uploads %s" % persisting_uploads_timer)
+ rval = self._setup_job(tool, trans, incoming, dataset_upload_inputs, history)
+ return rval
+
+ def _setup_job(self, tool, trans, incoming, dataset_upload_inputs, history):
+ """Take persisted uploads and create a job for given tool."""
+
+ def _create_job(self, *args, **kwds):
+ """Wrapper around upload_common.create_job with a timer."""
+ create_job_timer = ExecutionTimer()
+ rval = upload_common.create_job(*args, **kwds)
+ log.debug("Created upload job %s" % create_job_timer)
+ return rval
+
+
+class UploadToolAction(BaseUploadToolAction):
+
+ def _setup_job(self, tool, trans, incoming, dataset_upload_inputs, history):
check_timer = ExecutionTimer()
- # We can pass an empty string as the cntrller here since it is used to check whether we
- # are in an admin view, and this tool is currently not used there.
uploaded_datasets = upload_common.get_uploaded_datasets(trans, '', incoming, dataset_upload_inputs, history=history)
if not uploaded_datasets:
return None, 'No data was entered in the upload form, please go back and choose data to upload.'
- log.debug("Checked uploads %s" % check_timer)
- create_job_timer = ExecutionTimer()
json_file_path = upload_common.create_paramfile(trans, uploaded_datasets)
data_list = [ud.data for ud in uploaded_datasets]
- rval = upload_common.create_job(trans, incoming, tool, json_file_path, data_list, history=history)
- log.debug("Created upload job %s" % create_job_timer)
- return rval
+ log.debug("Checked uploads %s" % check_timer)
+ return self._create_job(
+ trans, incoming, tool, json_file_path, data_list, history=history
+ )
+
+
+class FetchUploadToolAction(BaseUploadToolAction):
+
+ def _setup_job(self, tool, trans, incoming, dataset_upload_inputs, history):
+ # Now replace references in requests with these.
+ files = incoming.get("files", [])
+ files_iter = iter(files)
+ request = json.loads(incoming.get("request_json"))
+
+ def replace_file_srcs(request_part):
+ if isinstance(request_part, dict):
+ if request_part.get("src", None) == "files":
+ path_def = next(files_iter)
+ if path_def is None or path_def["file_data"] is None:
+ raise RequestParameterMissingException("Failed to find uploaded file matching target with src='files'")
+ request_part["path"] = path_def["file_data"]["local_filename"]
+ if "name" not in request_part:
+ request_part["name"] = path_def["file_data"]["filename"]
+ request_part["src"] = "path"
+ else:
+ for key, value in request_part.items():
+ replace_file_srcs(value)
+ elif isinstance(request_part, list):
+ for value in request_part:
+ replace_file_srcs(value)
+
+ replace_file_srcs(request)
+
+ outputs = []
+ for target in request.get("targets", []):
+ destination = target.get("destination")
+ destination_type = destination.get("type")
+ # Start by just pre-creating HDAs.
+ if destination_type == "hdas":
+ if target.get("elements_from"):
+ # Dynamic collection required I think.
+ continue
+ _precreate_fetched_hdas(trans, history, target, outputs)
+
+ if destination_type == "hdca":
+ _precreate_fetched_collection_instance(trans, history, target, outputs)
+
+ incoming["request_json"] = json.dumps(request)
+ return self._create_job(
+ trans, incoming, tool, None, outputs, history=history
+ )
+
+
+def _precreate_fetched_hdas(trans, history, target, outputs):
+ for item in target.get("elements", []):
+ name = item.get("name", None)
+ if name is None:
+ src = item.get("src", None)
+ if src == "url":
+ url = item.get("url")
+ if name is None:
+ name = url.split("/")[-1]
+ elif src == "path":
+ path = item["path"]
+ if name is None:
+ name = os.path.basename(path)
+
+ file_type = item.get("ext", "auto")
+ dbkey = item.get("dbkey", "?")
+ uploaded_dataset = Bunch(
+ type='file', name=name, file_type=file_type, dbkey=dbkey
+ )
+ data = upload_common.new_upload(trans, '', uploaded_dataset, library_bunch=None, history=history)
+ outputs.append(data)
+ item["object_id"] = data.id
+
+
+def _precreate_fetched_collection_instance(trans, history, target, outputs):
+ collection_type = target.get("collection_type")
+ if not collection_type:
+ # Can't precreate collections of unknown type at this time.
+ return
+
+ name = target.get("name")
+ if not name:
+ return
+
+ collections_service = trans.app.dataset_collections_service
+ collection_type_description = collections_service.collection_type_descriptions.for_collection_type(collection_type)
+ structure = UnitializedTree(collection_type_description)
+ hdca = collections_service.precreate_dataset_collection_instance(
+ trans, history, name, structure=structure
+ )
+ outputs.append(hdca)
+ # Following flushed needed for an ID.
+ trans.sa_session.flush()
+ target["destination"]["object_id"] = hdca.id
diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py
index 44550ea1dde0..4a686a107d45 100644
--- a/lib/galaxy/tools/actions/upload_common.py
+++ b/lib/galaxy/tools/actions/upload_common.py
@@ -16,7 +16,7 @@
from urllib.parse import urlparse
from galaxy import datatypes, util
-from galaxy.exceptions import ObjectInvalid
+from galaxy.exceptions import ConfigDoesNotAllowException, ObjectInvalid
from galaxy.managers import tags
from galaxy.util import unicodify
from galaxy.util.odict import odict
@@ -102,7 +102,7 @@ def validate_url(url, ip_whitelist):
pass
else:
# Otherwise, we deny access.
- raise Exception("Access to this address in not permitted by server configuration")
+ raise ConfigDoesNotAllowException("Access to this address in not permitted by server configuration")
return url
@@ -123,7 +123,7 @@ def persist_uploads(params, trans):
local_filename=local_filename)
elif type(f) == dict and 'local_filename' not in f:
raise Exception('Uploaded file was encoded in a way not understood by Galaxy.')
- if upload_dataset['url_paste'] and upload_dataset['url_paste'].strip() != '':
+ if 'url_paste' in upload_dataset and upload_dataset['url_paste'] and upload_dataset['url_paste'].strip() != '':
upload_dataset['url_paste'] = datatypes.sniff.stream_to_file(
StringIO(validate_url(upload_dataset['url_paste'], trans.app.config.fetch_url_whitelist_ips)),
prefix="strio_url_paste_"
@@ -380,7 +380,7 @@ def _chown(path):
return json_file_path
-def create_job(trans, params, tool, json_file_path, data_list, folder=None, history=None, job_params=None):
+def create_job(trans, params, tool, json_file_path, outputs, folder=None, history=None, job_params=None):
"""
Create the upload job.
"""
@@ -408,21 +408,28 @@ def create_job(trans, params, tool, json_file_path, data_list, folder=None, hist
job.add_parameter(name, value)
job.add_parameter('paramfile', dumps(json_file_path))
object_store_id = None
- for i, dataset in enumerate(data_list):
- if folder:
- job.add_output_library_dataset('output%i' % i, dataset)
+ for i, output_object in enumerate(outputs):
+ output_name = "output%i" % i
+ if hasattr(output_object, "collection"):
+ job.add_output_dataset_collection(output_name, output_object)
+ output_object.job = job
else:
- job.add_output_dataset('output%i' % i, dataset)
- # Create an empty file immediately
- if not dataset.dataset.external_filename:
- dataset.dataset.object_store_id = object_store_id
- try:
- trans.app.object_store.create(dataset.dataset)
- except ObjectInvalid:
- raise Exception('Unable to create output dataset: object store is full')
- object_store_id = dataset.dataset.object_store_id
- trans.sa_session.add(dataset)
- # open( dataset.file_name, "w" ).close()
+ dataset = output_object
+ if folder:
+ job.add_output_library_dataset(output_name, dataset)
+ else:
+ job.add_output_dataset(output_name, dataset)
+ # Create an empty file immediately
+ if not dataset.dataset.external_filename:
+ dataset.dataset.object_store_id = object_store_id
+ try:
+ trans.app.object_store.create(dataset.dataset)
+ except ObjectInvalid:
+ raise Exception('Unable to create output dataset: object store is full')
+ object_store_id = dataset.dataset.object_store_id
+
+ trans.sa_session.add(output_object)
+
job.object_store_id = object_store_id
job.set_state(job.states.NEW)
job.set_handler(tool.get_job_handler(None))
@@ -436,8 +443,9 @@ def create_job(trans, params, tool, json_file_path, data_list, folder=None, hist
trans.app.job_manager.job_queue.put(job.id, job.tool_id)
trans.log_event("Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id)
output = odict()
- for i, v in enumerate(data_list):
- output['output%i' % i] = v
+ for i, v in enumerate(outputs):
+ if not hasattr(output_object, "collection_type"):
+ output['output%i' % i] = v
return job, output
diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py
new file mode 100644
index 000000000000..858d0c8d2349
--- /dev/null
+++ b/lib/galaxy/tools/data_fetch.py
@@ -0,0 +1,325 @@
+import argparse
+import errno
+import json
+import os
+import shutil
+import sys
+import tempfile
+
+import bdbag.bdbag_api
+
+from galaxy.datatypes import sniff
+from galaxy.datatypes.registry import Registry
+from galaxy.datatypes.upload_util import (
+ handle_sniffable_binary_check,
+ handle_unsniffable_binary_check,
+ UploadProblemException,
+)
+from galaxy.util import in_directory
+from galaxy.util.checkers import (
+ check_binary,
+ check_html,
+)
+from galaxy.util.compression_utils import CompressedFile
+
+DESCRIPTION = """Data Import Script"""
+
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv[1:]
+ args = _arg_parser().parse_args(argv)
+
+ registry = Registry()
+ registry.load_datatypes(root_dir=args.galaxy_root, config=args.datatypes_registry)
+
+ request_path = args.request
+ assert os.path.exists(request_path)
+ with open(request_path) as f:
+ request = json.load(f)
+
+ upload_config = UploadConfig(request, registry)
+ galaxy_json = _request_to_galaxy_json(upload_config, request)
+ with open("galaxy.json", "w") as f:
+ json.dump(galaxy_json, f)
+
+
+def _request_to_galaxy_json(upload_config, request):
+ targets = request.get("targets", [])
+ fetched_targets = []
+
+ for target in targets:
+ fetched_target = _fetch_target(upload_config, target)
+ fetched_targets.append(fetched_target)
+
+ return {"__unnamed_outputs": fetched_targets}
+
+
+def _fetch_target(upload_config, target):
+ destination = target.get("destination", None)
+ assert destination, "No destination defined."
+
+ def expand_elements_from(target_or_item):
+ elements_from = target_or_item.get("elements_from", None)
+ items = None
+ if elements_from:
+ if elements_from == "archive":
+ decompressed_directory = _decompress_target(target_or_item)
+ items = _directory_to_items(decompressed_directory)
+ elif elements_from == "bagit":
+ _, elements_from_path = _has_src_to_path(target_or_item)
+ items = _bagit_to_items(elements_from_path)
+ elif elements_from == "bagit_archive":
+ decompressed_directory = _decompress_target(target_or_item)
+ items = _bagit_to_items(decompressed_directory)
+ elif elements_from == "directory":
+ _, elements_from_path = _has_src_to_path(target_or_item)
+ items = _directory_to_items(elements_from_path)
+ else:
+ raise Exception("Unknown elements from type encountered [%s]" % elements_from)
+
+ if items:
+ del target_or_item["elements_from"]
+ target_or_item["elements"] = items
+
+ _for_each_src(expand_elements_from, target)
+ items = target.get("elements", None)
+ assert items is not None, "No element definition found for destination [%s]" % destination
+
+ fetched_target = {}
+ fetched_target["destination"] = destination
+ if "collection_type" in target:
+ fetched_target["collection_type"] = target["collection_type"]
+ if "name" in target:
+ fetched_target["name"] = target["name"]
+
+ def _resolve_src(item):
+ converted_path = None
+
+ name, path = _has_src_to_path(item)
+ dbkey = item.get("dbkey", "?")
+ requested_ext = item.get("ext", "auto")
+ info = item.get("info", None)
+ object_id = item.get("object_id", None)
+ link_data_only = upload_config.link_data_only
+ if "link_data_only" in item:
+ # Allow overriding this on a per file basis.
+ link_data_only = _link_data_only(item)
+ to_posix_lines = upload_config.get_option(item, "to_posix_lines")
+ space_to_tab = upload_config.get_option(item, "space_to_tab")
+ in_place = item.get("in_place", False)
+ purge_source = item.get("purge_source", True)
+
+ # Follow upload.py logic but without the auto-decompress logic.
+ registry = upload_config.registry
+ check_content = upload_config.check_content
+ data_type, ext = None, requested_ext
+
+ is_binary = check_binary(path)
+ if is_binary:
+ data_type, ext = handle_sniffable_binary_check(data_type, ext, path, registry)
+ if data_type is None:
+ root_datatype = registry.get_datatype_by_extension(ext)
+ if getattr(root_datatype, 'compressed', False):
+ data_type = 'compressed archive'
+ ext = ext
+ elif is_binary:
+ data_type, ext = handle_unsniffable_binary_check(
+ data_type, ext, path, name, is_binary, requested_ext, check_content, registry
+ )
+ if not data_type and check_content and check_html(path):
+ raise UploadProblemException('The uploaded file contains inappropriate HTML content')
+
+ if data_type != 'binary':
+ if not link_data_only:
+ if to_posix_lines:
+ if space_to_tab:
+ line_count, converted_path = sniff.convert_newlines_sep2tabs(path, in_place=in_place, tmp_dir=".")
+ else:
+ line_count, converted_path = sniff.convert_newlines(path, in_place=in_place, tmp_dir=".")
+ else:
+ if space_to_tab:
+ line_count, converted_path = sniff.sep2tabs(path, in_place=in_place, tmp_dir=".")
+
+ if requested_ext == 'auto':
+ ext = sniff.guess_ext(converted_path or path, registry.sniff_order)
+ else:
+ ext = requested_ext
+
+ data_type = ext
+
+ if ext == 'auto' and data_type == 'binary':
+ ext = 'data'
+ if ext == 'auto' and requested_ext:
+ ext = requested_ext
+ if ext == 'auto':
+ ext = 'data'
+
+ datatype = registry.get_datatype_by_extension(ext)
+ if link_data_only:
+ # Never alter a file that will not be copied to Galaxy's local file store.
+ if datatype.dataset_content_needs_grooming(path):
+ err_msg = 'The uploaded files need grooming, so change your Copy data into Galaxy? selection to be ' + \
+ 'Copy files into Galaxy instead of Link to files without copying into Galaxy so grooming can be performed.'
+ raise UploadProblemException(err_msg)
+
+ # If this file is not in the workdir make sure it gets there.
+ if not link_data_only and converted_path:
+ path = upload_config.ensure_in_working_directory(converted_path, purge_source, in_place)
+ elif not link_data_only:
+ path = upload_config.ensure_in_working_directory(path, purge_source, in_place)
+
+ if not link_data_only and datatype and datatype.dataset_content_needs_grooming(path):
+ # Groom the dataset content if necessary
+ datatype.groom_dataset_content(path)
+
+ rval = {"name": name, "filename": path, "dbkey": dbkey, "ext": ext, "link_data_only": link_data_only}
+ if info is not None:
+ rval["info"] = info
+ if object_id is not None:
+ rval["object_id"] = object_id
+ return rval
+
+ elements = elements_tree_map(_resolve_src, items)
+
+ fetched_target["elements"] = elements
+ return fetched_target
+
+
+def _bagit_to_items(directory):
+ bdbag.bdbag_api.resolve_fetch(directory)
+ bdbag.bdbag_api.validate_bag(directory)
+ items = _directory_to_items(os.path.join(directory, "data"))
+ return items
+
+
+def _decompress_target(target):
+ elements_from_name, elements_from_path = _has_src_to_path(target)
+ temp_directory = tempfile.mkdtemp(prefix=elements_from_name, dir=".")
+ decompressed_directory = CompressedFile(elements_from_path).extract(temp_directory)
+ return decompressed_directory
+
+
+def elements_tree_map(f, items):
+ new_items = []
+ for item in items:
+ if "elements" in item:
+ new_item = item.copy()
+ new_item["elements"] = elements_tree_map(f, item["elements"])
+ new_items.append(new_item)
+ else:
+ new_items.append(f(item))
+ return new_items
+
+
+def _directory_to_items(directory):
+ items = []
+ dir_elements = {}
+ for root, dirs, files in os.walk(directory):
+ if root in dir_elements:
+ target = dir_elements[root]
+ else:
+ target = items
+ for dir in dirs:
+ dir_dict = {"name": dir, "elements": []}
+ dir_elements[os.path.join(root, dir)] = dir_dict["elements"]
+ target.append(dir_dict)
+ for file in files:
+ target.append({"src": "path", "path": os.path.join(root, file)})
+
+ return items
+
+
+def _has_src_to_path(item):
+ assert "src" in item, item
+ src = item.get("src")
+ name = item.get("name")
+ if src == "url":
+ url = item.get("url")
+ path = sniff.stream_url_to_file(url)
+ if name is None:
+ name = url.split("/")[-1]
+ else:
+ assert src == "path"
+ path = item["path"]
+ if name is None:
+ name = os.path.basename(path)
+ return name, path
+
+
+def _arg_parser():
+ parser = argparse.ArgumentParser(description=DESCRIPTION)
+ parser.add_argument("--galaxy-root")
+ parser.add_argument("--datatypes-registry")
+ parser.add_argument("--request-version")
+ parser.add_argument("--request")
+ return parser
+
+
+class UploadConfig(object):
+
+ def __init__(self, request, registry):
+ self.registry = registry
+ self.check_content = request.get("check_content" , True)
+ self.to_posix_lines = request.get("to_posix_lines", False)
+ self.space_to_tab = request.get("space_to_tab", False)
+ self.link_data_only = _link_data_only(request)
+
+ self.__workdir = os.path.abspath(".")
+ self.__upload_count = 0
+
+ def get_option(self, item, key):
+ """Return item[key] if specified otherwise use default from UploadConfig.
+
+ This default represents the default for the whole request instead item which
+ is the option for individual files.
+ """
+ if key in item:
+ return item[key]
+ else:
+ return getattr(self, key)
+
+ def __new_dataset_path(self):
+ path = "gxupload_%d" % self.__upload_count
+ self.__upload_count += 1
+ return path
+
+ def ensure_in_working_directory(self, path, purge_source, in_place):
+ if in_directory(path, self.__workdir):
+ return path
+
+ new_path = self.__new_dataset_path()
+ if purge_source:
+ try:
+ shutil.move(path, new_path)
+ except OSError as e:
+ # We may not have permission to remove converted_path
+ if e.errno != errno.EACCES:
+ raise
+ else:
+ shutil.copy(path, new_path)
+
+ return new_path
+
+
+def _link_data_only(has_config_dict):
+ link_data_only = has_config_dict.get("link_data_only", False)
+ if not isinstance(link_data_only, bool):
+ # Allow the older string values of 'copy_files' and 'link_to_files'
+ link_data_only = link_data_only == "copy_files"
+ return link_data_only
+
+
+def _for_each_src(f, obj):
+ if isinstance(obj, list):
+ for item in obj:
+ _for_each_src(f, item)
+ if isinstance(obj, dict):
+ if "src" in obj:
+ f(obj)
+ for key, value in obj.items():
+ _for_each_src(f, value)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/lib/galaxy/tools/data_fetch.xml b/lib/galaxy/tools/data_fetch.xml
new file mode 100644
index 000000000000..5160cb2989c8
--- /dev/null
+++ b/lib/galaxy/tools/data_fetch.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ $request_json
+
+
+
+
+
diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py
index 3b1fffa464e8..cda1755aa62b 100644
--- a/lib/galaxy/tools/execute.py
+++ b/lib/galaxy/tools/execute.py
@@ -275,9 +275,9 @@ def precreate_output_collections(self, history, params):
trans=trans,
parent=history,
name=output_collection_name,
+ structure=effective_structure,
implicit_inputs=implicit_inputs,
implicit_output_name=output_name,
- structure=effective_structure,
)
collection_instance.implicit_collection_jobs = implicit_collection_jobs
collection_instances[output_name] = collection_instance
diff --git a/lib/galaxy/tools/parameters/output_collect.py b/lib/galaxy/tools/parameters/output_collect.py
index f6b106884d72..93bf92eee08f 100644
--- a/lib/galaxy/tools/parameters/output_collect.py
+++ b/lib/galaxy/tools/parameters/output_collect.py
@@ -9,9 +9,11 @@
from collections import namedtuple
from galaxy import util
+from galaxy.dataset_collections.structure import UnitializedTree
from galaxy.tools.parser.output_collection_def import (
DEFAULT_DATASET_COLLECTOR_DESCRIPTION,
INPUT_DBKEY_TOKEN,
+ ToolProvidedMetadataDatasetCollection,
)
from galaxy.util import (
ExecutionTimer,
@@ -34,6 +36,9 @@ def get_new_dataset_meta_by_basename(self, output_name, basename):
def has_failed_outputs(self):
return False
+ def get_unnamed_outputs(self):
+ return []
+
class LegacyToolProvidedMetadata(object):
@@ -84,6 +89,9 @@ def has_failed_outputs(self):
return found_failed
+ def get_unnamed_outputs(self):
+ return []
+
class ToolProvidedMetadata(object):
@@ -124,14 +132,21 @@ def _elements_to_datasets(self, elements, level=0):
def has_failed_outputs(self):
found_failed = False
- for meta in self.tool_provided_job_metadata.values():
+ for output_name, meta in self.tool_provided_job_metadata.items():
+ if output_name == "__unnamed_outputs":
+ continue
+
if meta.get("failed", False):
found_failed = True
return found_failed
+ def get_unnamed_outputs(self):
+ log.debug("unnamed outputs [%s]" % self.tool_provided_job_metadata)
+ return self.tool_provided_job_metadata.get("__unnamed_outputs", [])
-def collect_dynamic_collections(
+
+def collect_dynamic_outputs(
tool,
output_collections,
tool_provided_metadata,
@@ -140,6 +155,7 @@ def collect_dynamic_collections(
job=None,
input_dbkey="?",
):
+ app = tool.app
collections_service = tool.app.dataset_collections_service
job_context = JobContext(
tool,
@@ -149,6 +165,145 @@ def collect_dynamic_collections(
inp_data,
input_dbkey,
)
+ # unmapped outputs do not correspond to explicit outputs of the tool, they were inferred entirely
+ # from the tool provided metadata (e.g. galaxy.json).
+ for unnamed_output_dict in tool_provided_metadata.get_unnamed_outputs():
+ assert "destination" in unnamed_output_dict
+ assert "elements" in unnamed_output_dict
+ destination = unnamed_output_dict["destination"]
+ elements = unnamed_output_dict["elements"]
+
+ assert "type" in destination
+ destination_type = destination["type"]
+ assert destination_type in ["library_folder", "hdca", "hdas"]
+ trans = job_context.work_context
+
+ # three destination types we need to handle here - "library_folder" (place discovered files in a library folder),
+ # "hdca" (place discovered files in a history dataset collection), and "hdas" (place discovered files in a history
+ # as stand-alone datasets).
+ if destination_type == "library_folder":
+ # populate a library folder (needs to be already have been created)
+
+ library_folder_manager = app.library_folder_manager
+ library_folder = library_folder_manager.get(trans, app.security.decode_id(destination.get("library_folder_id")))
+
+ def add_elements_to_folder(elements, library_folder):
+ for element in elements:
+ if "elements" in element:
+ assert "name" in element
+ name = element["name"]
+ description = element.get("description")
+ nested_folder = library_folder_manager.create(trans, library_folder.id, name, description)
+ add_elements_to_folder(element["elements"], nested_folder)
+ else:
+ discovered_file = discovered_file_for_unnamed_output(element, job_working_directory)
+ fields_match = discovered_file.match
+ designation = fields_match.designation
+ visible = fields_match.visible
+ ext = fields_match.ext
+ dbkey = fields_match.dbkey
+ info = element.get("info", None)
+ link_data = discovered_file.match.link_data
+
+ # Create new primary dataset
+ name = fields_match.name or designation
+
+ job_context.create_dataset(
+ ext=ext,
+ designation=designation,
+ visible=visible,
+ dbkey=dbkey,
+ name=name,
+ filename=discovered_file.path,
+ info=info,
+ library_folder=library_folder,
+ link_data=link_data
+ )
+
+ add_elements_to_folder(elements, library_folder)
+ elif destination_type == "hdca":
+ # create or populate a dataset collection in the history
+ history = job.history
+ assert "collection_type" in unnamed_output_dict
+ object_id = destination.get("object_id")
+ if object_id:
+ sa_session = tool.app.model.context
+ hdca = sa_session.query(app.model.HistoryDatasetCollectionAssociation).get(int(object_id))
+ else:
+ name = unnamed_output_dict.get("name", "unnamed collection")
+ collection_type = unnamed_output_dict["collection_type"]
+ collection_type_description = collections_service.collection_type_descriptions.for_collection_type(collection_type)
+ structure = UnitializedTree(collection_type_description)
+ hdca = collections_service.precreate_dataset_collection_instance(
+ trans, history, name, structure=structure
+ )
+ filenames = odict.odict()
+
+ def add_to_discovered_files(elements, parent_identifiers=[]):
+ for element in elements:
+ if "elements" in element:
+ add_to_discovered_files(element["elements"], parent_identifiers + [element["name"]])
+ else:
+ discovered_file = discovered_file_for_unnamed_output(element, job_working_directory, parent_identifiers)
+ filenames[discovered_file.path] = discovered_file
+
+ add_to_discovered_files(elements)
+
+ collection = hdca.collection
+ collection_builder = collections_service.collection_builder_for(
+ collection
+ )
+ job_context.populate_collection_elements(
+ collection,
+ collection_builder,
+ filenames,
+ )
+ collection_builder.populate()
+ elif destination_type == "hdas":
+ # discover files as individual datasets for the target history
+ history = job.history
+
+ datasets = []
+
+ def collect_elements_for_history(elements):
+ for element in elements:
+ if "elements" in element:
+ collect_elements_for_history(element["elements"])
+ else:
+ discovered_file = discovered_file_for_unnamed_output(element, job_working_directory)
+ fields_match = discovered_file.match
+ designation = fields_match.designation
+ ext = fields_match.ext
+ dbkey = fields_match.dbkey
+ info = element.get("info", None)
+ link_data = discovered_file.match.link_data
+
+ # Create new primary dataset
+ name = fields_match.name or designation
+
+ hda_id = discovered_file.match.object_id
+ primary_dataset = None
+ if hda_id:
+ sa_session = tool.app.model.context
+ primary_dataset = sa_session.query(app.model.HistoryDatasetAssociation).get(hda_id)
+
+ dataset = job_context.create_dataset(
+ ext=ext,
+ designation=designation,
+ visible=True,
+ dbkey=dbkey,
+ name=name,
+ filename=discovered_file.path,
+ info=info,
+ link_data=link_data,
+ primary_data=primary_dataset,
+ )
+ dataset.raw_set_dataset_state('ok')
+ if not hda_id:
+ datasets.append(dataset)
+
+ collect_elements_for_history(elements)
+ job.history.add_datasets(job_context.sa_session, datasets)
for name, has_collection in output_collections.items():
if name not in tool.output_collections:
@@ -168,13 +323,19 @@ def collect_dynamic_collections(
collection.populated_state = collection.populated_states.NEW
try:
+
collection_builder = collections_service.collection_builder_for(
collection
)
+ dataset_collectors = map(dataset_collector, output_collection_def.dataset_collector_descriptions)
+ output_name = output_collection_def.name
+ filenames = job_context.find_files(output_name, collection, dataset_collectors)
job_context.populate_collection_elements(
collection,
collection_builder,
- output_collection_def,
+ filenames,
+ name=output_collection_def.name,
+ metadata_source_name=output_collection_def.metadata_source,
)
collection_builder.populate()
except Exception:
@@ -194,6 +355,11 @@ def __init__(self, tool, tool_provided_metadata, job, job_working_directory, inp
self.tool_provided_metadata = tool_provided_metadata
self._permissions = None
+ @property
+ def work_context(self):
+ from galaxy.work.context import WorkRequestContext
+ return WorkRequestContext(self.app, user=self.job.user)
+
@property
def permissions(self):
if self._permissions is None:
@@ -214,15 +380,14 @@ def find_files(self, output_name, collection, dataset_collectors):
filenames[discovered_file.path] = discovered_file
return filenames
- def populate_collection_elements(self, collection, root_collection_builder, output_collection_def):
+ def populate_collection_elements(self, collection, root_collection_builder, filenames, name=None, metadata_source_name=None):
# TODO: allow configurable sorting.
#
#
#
#
- dataset_collectors = map(dataset_collector, output_collection_def.dataset_collector_descriptions)
- output_name = output_collection_def.name
- filenames = self.find_files(output_name, collection, dataset_collectors)
+ if name is None:
+ name = "unnamed output"
element_datasets = []
for filename, discovered_file in filenames.items():
@@ -241,6 +406,8 @@ def populate_collection_elements(self, collection, root_collection_builder, outp
# Create new primary dataset
name = fields_match.name or designation
+ link_data = discovered_file.match.link_data
+
dataset = self.create_dataset(
ext=ext,
designation=designation,
@@ -248,14 +415,15 @@ def populate_collection_elements(self, collection, root_collection_builder, outp
dbkey=dbkey,
name=name,
filename=filename,
- metadata_source_name=output_collection_def.metadata_source,
+ metadata_source_name=metadata_source_name,
+ link_data=link_data,
)
log.debug(
"(%s) Created dynamic collection dataset for path [%s] with element identifier [%s] for output [%s] %s",
self.job.id,
filename,
designation,
- output_collection_def.name,
+ name,
create_dataset_timer,
)
element_datasets.append((element_identifiers, dataset))
@@ -270,7 +438,7 @@ def populate_collection_elements(self, collection, root_collection_builder, outp
log.debug(
"(%s) Add dynamic collection datsets to history for output [%s] %s",
self.job.id,
- output_collection_def.name,
+ name,
add_datasets_timer,
)
@@ -300,12 +468,24 @@ def create_dataset(
dbkey,
name,
filename,
- metadata_source_name,
+ metadata_source_name=None,
+ info=None,
+ library_folder=None,
+ link_data=False,
+ primary_data=None,
):
app = self.app
sa_session = self.sa_session
- primary_data = _new_hda(app, sa_session, ext, designation, visible, dbkey, self.permissions)
+ if primary_data is None:
+ if not library_folder:
+ primary_data = _new_hda(app, sa_session, ext, designation, visible, dbkey, self.permissions)
+ else:
+ primary_data = _new_ldda(self.work_context, name, ext, visible, dbkey, library_folder)
+ else:
+ primary_data.extension = ext
+ primary_data.visible = visible
+ primary_data.dbkey = dbkey
# Copy metadata from one of the inputs if requested.
metadata_source = None
@@ -314,7 +494,11 @@ def create_dataset(
sa_session.flush()
# Move data from temp location to dataset location
- app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True)
+ if not link_data:
+ app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True)
+ else:
+ primary_data.link_to(filename)
+
primary_data.set_size()
# If match specified a name use otherwise generate one from
# designation.
@@ -325,6 +509,9 @@ def create_dataset(
else:
primary_data.init_meta()
+ if info is not None:
+ primary_data.info = info
+
primary_data.set_meta()
primary_data.set_peek()
@@ -491,6 +678,20 @@ def discover_files(output_name, tool_provided_metadata, extra_file_collectors, j
yield DiscoveredFile(match.path, collector, match)
+def discovered_file_for_unnamed_output(dataset, job_working_directory, parent_identifiers=[]):
+ extra_file_collector = DEFAULT_TOOL_PROVIDED_DATASET_COLLECTOR
+ target_directory = discover_target_directory(extra_file_collector.directory, job_working_directory)
+ filename = dataset["filename"]
+ # handle link_data_only here, verify filename is in directory if not linking...
+ if not dataset.get("link_data_only"):
+ path = os.path.join(target_directory, filename)
+ if not util.in_directory(path, target_directory):
+ raise Exception("Problem with tool configuration, attempting to pull in datasets from outside working directory.")
+ else:
+ path = filename
+ return DiscoveredFile(path, extra_file_collector, JsonCollectedDatasetMatch(dataset, extra_file_collector, filename, path=path, parent_identifiers=parent_identifiers))
+
+
def discover_target_directory(dir_name, job_working_directory):
if dir_name:
directory = os.path.join(job_working_directory, dir_name)
@@ -605,11 +806,12 @@ def _compose(f, g):
class JsonCollectedDatasetMatch(object):
- def __init__(self, as_dict, collector, filename, path=None):
+ def __init__(self, as_dict, collector, filename, path=None, parent_identifiers=[]):
self.as_dict = as_dict
self.collector = collector
self.filename = filename
self.path = path
+ self._parent_identifiers = parent_identifiers
@property
def designation(self):
@@ -627,7 +829,7 @@ def designation(self):
@property
def element_identifiers(self):
- return self.raw_element_identifiers or [self.designation]
+ return self._parent_identifiers + (self.raw_element_identifiers or [self.designation])
@property
def raw_element_identifiers(self):
@@ -664,6 +866,14 @@ def visible(self):
except KeyError:
return self.collector.default_visible
+ @property
+ def link_data(self):
+ return bool(self.as_dict.get("link_data_only", False))
+
+ @property
+ def object_id(self):
+ return self.as_dict.get("object_id", None)
+
class RegexCollectedDatasetMatch(JsonCollectedDatasetMatch):
@@ -676,6 +886,42 @@ def __init__(self, re_match, collector, filename, path=None):
UNSET = object()
+def _new_ldda(
+ trans,
+ name,
+ ext,
+ visible,
+ dbkey,
+ library_folder,
+):
+ ld = trans.app.model.LibraryDataset(folder=library_folder, name=name)
+ trans.sa_session.add(ld)
+ trans.sa_session.flush()
+ trans.app.security_agent.copy_library_permissions(trans, library_folder, ld)
+
+ ldda = trans.app.model.LibraryDatasetDatasetAssociation(name=name,
+ extension=ext,
+ dbkey=dbkey,
+ library_dataset=ld,
+ user=trans.user,
+ create_dataset=True,
+ sa_session=trans.sa_session)
+ trans.sa_session.add(ldda)
+ ldda.state = ldda.states.OK
+ # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
+ trans.app.security_agent.copy_library_permissions(trans, ld, ldda)
+ # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset
+ trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user))
+ library_folder.add_library_dataset(ld, genome_build=dbkey)
+ trans.sa_session.add(library_folder)
+ trans.sa_session.flush()
+
+ ld.library_dataset_dataset_association_id = ldda.id
+ trans.sa_session.add(ld)
+ trans.sa_session.flush()
+ return ldda
+
+
def _new_hda(
app,
sa_session,
@@ -702,3 +948,4 @@ def _new_hda(
DEFAULT_DATASET_COLLECTOR = DatasetCollector(DEFAULT_DATASET_COLLECTOR_DESCRIPTION)
+DEFAULT_TOOL_PROVIDED_DATASET_COLLECTOR = ToolMetadataDatasetCollector(ToolProvidedMetadataDatasetCollection())
diff --git a/lib/galaxy/tools/special_tools.py b/lib/galaxy/tools/special_tools.py
index 953e69dee647..129b7064a941 100644
--- a/lib/galaxy/tools/special_tools.py
+++ b/lib/galaxy/tools/special_tools.py
@@ -4,6 +4,7 @@
SPECIAL_TOOLS = {
"history export": "galaxy/tools/imp_exp/exp_history_to_archive.xml",
"history import": "galaxy/tools/imp_exp/imp_history_from_archive.xml",
+ "data fetch": "galaxy/tools/data_fetch.xml",
}
diff --git a/lib/galaxy/util/compression_utils.py b/lib/galaxy/util/compression_utils.py
index 7c70aaad6445..e4e4734d4d31 100644
--- a/lib/galaxy/util/compression_utils.py
+++ b/lib/galaxy/util/compression_utils.py
@@ -1,5 +1,10 @@
+from __future__ import absolute_import
+
import gzip
import io
+import logging
+import os
+import tarfile
import zipfile
from .checkers import (
@@ -8,6 +13,8 @@
is_gzip
)
+log = logging.getLogger(__name__)
+
def get_fileobj(filename, mode="r", compressed_formats=None):
"""
@@ -45,3 +52,121 @@ def get_fileobj(filename, mode="r", compressed_formats=None):
return io.TextIOWrapper(fh, encoding='utf-8')
else:
return fh
+
+
+class CompressedFile(object):
+
+ def __init__(self, file_path, mode='r'):
+ if tarfile.is_tarfile(file_path):
+ self.file_type = 'tar'
+ elif zipfile.is_zipfile(file_path) and not file_path.endswith('.jar'):
+ self.file_type = 'zip'
+ self.file_name = os.path.splitext(os.path.basename(file_path))[0]
+ if self.file_name.endswith('.tar'):
+ self.file_name = os.path.splitext(self.file_name)[0]
+ self.type = self.file_type
+ method = 'open_%s' % self.file_type
+ if hasattr(self, method):
+ self.archive = getattr(self, method)(file_path, mode)
+ else:
+ raise NameError('File type %s specified, no open method found.' % self.file_type)
+
+ def extract(self, path):
+ '''Determine the path to which the archive should be extracted.'''
+ contents = self.getmembers()
+ extraction_path = path
+ common_prefix = ''
+ if len(contents) == 1:
+ # The archive contains a single file, return the extraction path.
+ if self.isfile(contents[0]):
+ extraction_path = os.path.join(path, self.file_name)
+ if not os.path.exists(extraction_path):
+ os.makedirs(extraction_path)
+ self.archive.extractall(extraction_path)
+ else:
+ # Get the common prefix for all the files in the archive. If the common prefix ends with a slash,
+ # or self.isdir() returns True, the archive contains a single directory with the desired contents.
+ # Otherwise, it contains multiple files and/or directories at the root of the archive.
+ common_prefix = os.path.commonprefix([self.getname(item) for item in contents])
+ if len(common_prefix) >= 1 and not common_prefix.endswith(os.sep) and self.isdir(self.getmember(common_prefix)):
+ common_prefix += os.sep
+ if not common_prefix.endswith(os.sep):
+ common_prefix = ''
+ extraction_path = os.path.join(path, self.file_name)
+ if not os.path.exists(extraction_path):
+ os.makedirs(extraction_path)
+ self.archive.extractall(extraction_path)
+ # Since .zip files store unix permissions separately, we need to iterate through the zip file
+ # and set permissions on extracted members.
+ if self.file_type == 'zip':
+ for zipped_file in contents:
+ filename = self.getname(zipped_file)
+ absolute_filepath = os.path.join(extraction_path, filename)
+ external_attributes = self.archive.getinfo(filename).external_attr
+ # The 2 least significant bytes are irrelevant, the next two contain unix permissions.
+ unix_permissions = external_attributes >> 16
+ if unix_permissions != 0:
+ if os.path.exists(absolute_filepath):
+ os.chmod(absolute_filepath, unix_permissions)
+ else:
+ log.warning("Unable to change permission on extracted file '%s' as it does not exist" % absolute_filepath)
+ return os.path.abspath(os.path.join(extraction_path, common_prefix))
+
+ def getmembers_tar(self):
+ return self.archive.getmembers()
+
+ def getmembers_zip(self):
+ return self.archive.infolist()
+
+ def getname_tar(self, item):
+ return item.name
+
+ def getname_zip(self, item):
+ return item.filename
+
+ def getmember(self, name):
+ for member in self.getmembers():
+ if self.getname(member) == name:
+ return member
+
+ def getmembers(self):
+ return getattr(self, 'getmembers_%s' % self.type)()
+
+ def getname(self, member):
+ return getattr(self, 'getname_%s' % self.type)(member)
+
+ def isdir(self, member):
+ return getattr(self, 'isdir_%s' % self.type)(member)
+
+ def isdir_tar(self, member):
+ return member.isdir()
+
+ def isdir_zip(self, member):
+ if member.filename.endswith(os.sep):
+ return True
+ return False
+
+ def isfile(self, member):
+ if not self.isdir(member):
+ return True
+ return False
+
+ def open_tar(self, filepath, mode):
+ return tarfile.open(filepath, mode, errorlevel=0)
+
+ def open_zip(self, filepath, mode):
+ return zipfile.ZipFile(filepath, mode)
+
+ def zipfile_ok(self, path_to_archive):
+ """
+ This function is a bit pedantic and not functionally necessary. It checks whether there is
+ no file pointing outside of the extraction, because ZipFile.extractall() has some potential
+ security holes. See python zipfile documentation for more details.
+ """
+ basename = os.path.realpath(os.path.dirname(path_to_archive))
+ zip_archive = zipfile.ZipFile(path_to_archive)
+ for member in zip_archive.namelist():
+ member_path = os.path.realpath(os.path.join(basename, member))
+ if not member_path.startswith(basename):
+ return False
+ return True
diff --git a/lib/galaxy/webapps/galaxy/api/_fetch_util.py b/lib/galaxy/webapps/galaxy/api/_fetch_util.py
new file mode 100644
index 000000000000..7c5e2ea8e543
--- /dev/null
+++ b/lib/galaxy/webapps/galaxy/api/_fetch_util.py
@@ -0,0 +1,217 @@
+import logging
+import os
+
+from galaxy.actions.library import (
+ validate_path_upload,
+ validate_server_directory_upload,
+)
+from galaxy.exceptions import (
+ RequestParameterInvalidException
+)
+from galaxy.tools.actions.upload_common import validate_url
+from galaxy.util import (
+ relpath,
+)
+
+log = logging.getLogger(__name__)
+
+VALID_DESTINATION_TYPES = ["library", "library_folder", "hdca", "hdas"]
+ELEMENTS_FROM_TYPE = ["archive", "bagit", "bagit_archive", "directory"]
+# These elements_from cannot be sym linked to because they only exist during upload.
+ELEMENTS_FROM_TRANSIENT_TYPES = ["archive", "bagit_archive"]
+
+
+def validate_and_normalize_targets(trans, payload):
+ """Validate and normalize all src references in fetch targets.
+
+ - Normalize ftp_import and server_dir src entries into simple path entires
+ with the relevant paths resolved and permissions / configuration checked.
+ - Check for file:// URLs in items src of "url" and convert them into path
+ src items - after verifying path pastes are allowed and user is admin.
+ - Check for valid URLs to be fetched for http and https entries.
+ - Based on Galaxy configuration and upload types set purge_source and in_place
+ as needed for each upload.
+ """
+ targets = payload.get("targets", [])
+
+ for target in targets:
+ destination = _get_required_item(target, "destination", "Each target must specify a 'destination'")
+ destination_type = _get_required_item(destination, "type", "Each target destination must specify a 'type'")
+ if "object_id" in destination:
+ raise RequestParameterInvalidException("object_id not allowed to appear in the request.")
+
+ if destination_type not in VALID_DESTINATION_TYPES:
+ template = "Invalid target destination type [%s] encountered, must be one of %s"
+ msg = template % (destination_type, VALID_DESTINATION_TYPES)
+ raise RequestParameterInvalidException(msg)
+ if destination_type == "library":
+ library_name = _get_required_item(destination, "name", "Must specify a library name")
+ description = destination.get("description", "")
+ synopsis = destination.get("synopsis", "")
+ library = trans.app.library_manager.create(
+ trans, library_name, description=description, synopsis=synopsis
+ )
+ destination["type"] = "library_folder"
+ for key in ["name", "description", "synopsis"]:
+ if key in destination:
+ del destination[key]
+ destination["library_folder_id"] = trans.app.security.encode_id(library.root_folder.id)
+
+ # Unlike upload.py we don't transmit or use run_as_real_user in the job - we just make sure
+ # in_place and purge_source are set on the individual upload fetch sources as needed based
+ # on this.
+ run_as_real_user = trans.app.config.external_chown_script is not None # See comment in upload.py
+ purge_ftp_source = getattr(trans.app.config, 'ftp_upload_purge', True) and not run_as_real_user
+
+ payload["check_content"] = trans.app.config.check_upload_content
+
+ def check_src(item):
+ if "object_id" in item:
+ raise RequestParameterInvalidException("object_id not allowed to appear in the request.")
+
+ # Normalize file:// URLs into paths.
+ if item["src"] == "url" and item["url"].startswith("file://"):
+ item["src"] = "path"
+ item["path"] = item["url"][len("file://"):]
+ del item["path"]
+
+ if "in_place" in item:
+ raise RequestParameterInvalidException("in_place cannot be set in the upload request")
+
+ src = item["src"]
+
+ # Check link_data_only can only be set for certain src types and certain elements_from types.
+ _handle_invalid_link_data_only_elements_type(item)
+ if src not in ["path", "server_dir"]:
+ _handle_invalid_link_data_only_type(item)
+ elements_from = item.get("elements_from", None)
+ if elements_from and elements_from not in ELEMENTS_FROM_TYPE:
+ raise RequestParameterInvalidException("Invalid elements_from/items_from found in request")
+
+ if src == "path" or (src == "url" and item["url"].startswith("file:")):
+ # Validate is admin, leave alone.
+ validate_path_upload(trans)
+ elif src == "server_dir":
+ # Validate and replace with path definition.
+ server_dir = item["server_dir"]
+ full_path, _ = validate_server_directory_upload(trans, server_dir)
+ item["src"] = "path"
+ item["path"] = full_path
+ elif src == "ftp_import":
+ ftp_path = item["ftp_path"]
+ full_path = None
+
+ # It'd be nice if this can be de-duplicated with what is in parameters/grouping.py.
+ user_ftp_dir = trans.user_ftp_dir
+ is_directory = False
+
+ assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
+ for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
+ for filename in filenames:
+ if ftp_path == filename:
+ path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
+ if not os.path.islink(os.path.join(dirpath, filename)):
+ full_path = os.path.abspath(os.path.join(user_ftp_dir, path))
+ break
+
+ for dirname in dirnames:
+ if ftp_path == dirname:
+ path = relpath(os.path.join(dirpath, dirname), user_ftp_dir)
+ if not os.path.islink(os.path.join(dirpath, dirname)):
+ full_path = os.path.abspath(os.path.join(user_ftp_dir, path))
+ is_directory = True
+ break
+
+ if is_directory:
+ # If the target is a directory - make sure no files under it are symbolic links
+ for (dirpath, dirnames, filenames) in os.walk(full_path):
+ for filename in filenames:
+ if ftp_path == filename:
+ path = relpath(os.path.join(dirpath, filename), full_path)
+ if not os.path.islink(os.path.join(dirpath, filename)):
+ full_path = False
+ break
+
+ for dirname in dirnames:
+ if ftp_path == dirname:
+ path = relpath(os.path.join(dirpath, filename), full_path)
+ if not os.path.islink(os.path.join(dirpath, filename)):
+ full_path = False
+ break
+
+ if not full_path:
+ raise RequestParameterInvalidException("Failed to find referenced ftp_path or symbolic link was enountered")
+
+ item["src"] = "path"
+ item["path"] = full_path
+ item["purge_source"] = purge_ftp_source
+ elif src == "url":
+ url = item["url"]
+ looks_like_url = False
+ for url_prefix in ["http://", "https://", "ftp://", "ftps://"]:
+ if url.startswith(url_prefix):
+ looks_like_url = True
+ break
+
+ if not looks_like_url:
+ raise RequestParameterInvalidException("Invalid URL [%s] found in src definition." % url)
+
+ validate_url(url, trans.app.config.fetch_url_whitelist_ips)
+ item["in_place"] = run_as_real_user
+ elif src == "files":
+ item["in_place"] = run_as_real_user
+
+ # Small disagreement with traditional uploads - we purge less by default since whether purging
+ # happens varies based on upload options in non-obvious ways.
+ # https://github.com/galaxyproject/galaxy/issues/5361
+ if "purge_source" not in item:
+ item["purge_source"] = False
+
+ _replace_request_syntax_sugar(targets)
+ _for_each_src(check_src, targets)
+
+
+def _replace_request_syntax_sugar(obj):
+ # For data libraries and hdas to make sense - allow items and items_from in place of elements
+ # and elements_from. This is destructive and modifies the supplied request.
+ if isinstance(obj, list):
+ for el in obj:
+ _replace_request_syntax_sugar(el)
+ elif isinstance(obj, dict):
+ if "items" in obj:
+ obj["elements"] = obj["items"]
+ del obj["items"]
+ if "items_from" in obj:
+ obj["elements_from"] = obj["items_from"]
+ del obj["items_from"]
+ for value in obj.values():
+ _replace_request_syntax_sugar(value)
+
+
+def _handle_invalid_link_data_only_type(item):
+ link_data_only = item.get("link_data_only", False)
+ if link_data_only:
+ raise RequestParameterInvalidException("link_data_only is invalid for src type [%s]" % item.get("src"))
+
+
+def _handle_invalid_link_data_only_elements_type(item):
+ link_data_only = item.get("link_data_only", False)
+ if link_data_only and item.get("elements_from", False) in ELEMENTS_FROM_TRANSIENT_TYPES:
+ raise RequestParameterInvalidException("link_data_only is invalid for derived elements from [%s]" % item.get("elements_from"))
+
+
+def _get_required_item(from_dict, key, message):
+ if key not in from_dict:
+ raise RequestParameterInvalidException(message)
+ return from_dict[key]
+
+
+def _for_each_src(f, obj):
+ if isinstance(obj, list):
+ for item in obj:
+ _for_each_src(f, item)
+ if isinstance(obj, dict):
+ if "src" in obj:
+ f(obj)
+ for key, value in obj.items():
+ _for_each_src(f, value)
diff --git a/lib/galaxy/webapps/galaxy/api/library_contents.py b/lib/galaxy/webapps/galaxy/api/library_contents.py
index 5b873f240878..58201190f5c8 100644
--- a/lib/galaxy/webapps/galaxy/api/library_contents.py
+++ b/lib/galaxy/webapps/galaxy/api/library_contents.py
@@ -185,7 +185,8 @@ def create(self, trans, library_id, payload, **kwd):
* upload_option: (optional) one of 'upload_file' (default), 'upload_directory' or 'upload_paths'
* server_dir: (optional, only if upload_option is
'upload_directory') relative path of the subdirectory of Galaxy
- ``library_import_dir`` to upload. All and only the files (i.e.
+ ``library_import_dir`` (if admin) or ``user_library_import_dir``
+ (if non-admin) to upload. All and only the files (i.e.
no subdirectories) contained in the specified directory will be
uploaded.
* filesystem_paths: (optional, only if upload_option is
diff --git a/lib/galaxy/webapps/galaxy/api/tools.py b/lib/galaxy/webapps/galaxy/api/tools.py
index fa0a0804b7fb..08576ee812d3 100644
--- a/lib/galaxy/webapps/galaxy/api/tools.py
+++ b/lib/galaxy/webapps/galaxy/api/tools.py
@@ -15,9 +15,14 @@
from galaxy.web import _future_expose_api_raw_anonymous_and_sessionless as expose_api_raw_anonymous_and_sessionless
from galaxy.web.base.controller import BaseAPIController
from galaxy.web.base.controller import UsesVisualizationMixin
+from ._fetch_util import validate_and_normalize_targets
log = logging.getLogger(__name__)
+# Do not allow these tools to be called directly - they (it) enforces extra security and
+# provides access via a different API endpoint.
+PROTECTED_TOOLS = ["__DATA_FETCH__"]
+
class ToolsController(BaseAPIController, UsesVisualizationMixin):
"""
@@ -361,12 +366,52 @@ def download(self, trans, id, **kwds):
trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.tgz"' % (id)
return download_file
+ @expose_api_anonymous
+ def fetch(self, trans, payload, **kwd):
+ """Adapt clean API to tool-constrained API.
+ """
+ log.info("Keywords are %s" % payload)
+ request_version = '1'
+ history_id = payload.pop("history_id")
+ clean_payload = {}
+ files_payload = {}
+ for key, value in payload.items():
+ if key == "key":
+ continue
+ if key.startswith('files_') or key.startswith('__files_'):
+ files_payload[key] = value
+ continue
+ clean_payload[key] = value
+ log.info("payload %s" % clean_payload)
+ validate_and_normalize_targets(trans, clean_payload)
+ clean_payload["check_content"] = trans.app.config.check_upload_content
+ request = dumps(clean_payload)
+ log.info(request)
+ create_payload = {
+ 'tool_id': "__DATA_FETCH__",
+ 'history_id': history_id,
+ 'inputs': {
+ 'request_version': request_version,
+ 'request_json': request,
+ },
+ }
+ create_payload.update(files_payload)
+ return self._create(trans, create_payload, **kwd)
+
@expose_api_anonymous
def create(self, trans, payload, **kwd):
"""
POST /api/tools
Executes tool using specified inputs and returns tool's outputs.
"""
+ tool_id = payload.get("tool_id")
+ if tool_id in PROTECTED_TOOLS:
+ raise exceptions.RequestParameterInvalidException("Cannot execute tool [%s] directly, must use alternative endpoint." % tool_id)
+ if tool_id is None:
+ raise exceptions.RequestParameterInvalidException("Must specify a valid tool_id to use this endpoint.")
+ return self._create(trans, payload, **kwd)
+
+ def _create(self, trans, payload, **kwd):
# HACK: for now, if action is rerun, rerun tool.
action = payload.get('action', None)
if action == 'rerun':
diff --git a/lib/galaxy/webapps/galaxy/buildapp.py b/lib/galaxy/webapps/galaxy/buildapp.py
index f5ec41c4adfe..eda00a26ee40 100644
--- a/lib/galaxy/webapps/galaxy/buildapp.py
+++ b/lib/galaxy/webapps/galaxy/buildapp.py
@@ -271,6 +271,7 @@ def populate_api_routes(webapp, app):
# ====== TOOLS API ======
# =======================
+ webapp.mapper.connect('/api/tools/fetch', action='fetch', controller='tools', conditions=dict(method=["POST"]))
webapp.mapper.connect('/api/tools/all_requirements', action='all_requirements', controller="tools")
webapp.mapper.connect('/api/tools/{id:.+?}/build', action='build', controller="tools")
webapp.mapper.connect('/api/tools/{id:.+?}/reload', action='reload', controller="tools")
diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py
index a7b4303badc3..d090ef512801 100644
--- a/lib/galaxy/workflow/modules.py
+++ b/lib/galaxy/workflow/modules.py
@@ -825,6 +825,9 @@ def execute(self, trans, progress, invocation_step, use_cached_job=False):
invocation = invocation_step.workflow_invocation
step = invocation_step.workflow_step
tool = trans.app.toolbox.get_tool(step.tool_id, tool_version=step.tool_version)
+ if not tool.is_workflow_compatible:
+ message = "Specified tool [%s] in workflow is not workflow-compatible." % tool.id
+ raise Exception(message)
tool_state = step.state
# Not strictly needed - but keep Tool state clean by stripping runtime
# metadata parameters from it.
diff --git a/lib/tool_shed/galaxy_install/tool_dependencies/recipe/step_handler.py b/lib/tool_shed/galaxy_install/tool_dependencies/recipe/step_handler.py
index f8dc0ee5cd76..917b2ead48ce 100644
--- a/lib/tool_shed/galaxy_install/tool_dependencies/recipe/step_handler.py
+++ b/lib/tool_shed/galaxy_install/tool_dependencies/recipe/step_handler.py
@@ -16,6 +16,7 @@
asbool,
download_to_file
)
+from galaxy.util.compression_utils import CompressedFile
from galaxy.util.template import fill_template
from tool_shed.galaxy_install.tool_dependencies.env_manager import EnvManager
from tool_shed.util import basic_util, tool_dependency_util
@@ -25,124 +26,6 @@
VIRTUALENV_URL = 'https://pypi.python.org/packages/d4/0c/9840c08189e030873387a73b90ada981885010dd9aea134d6de30cd24cb8/virtualenv-15.1.0.tar.gz'
-class CompressedFile(object):
-
- def __init__(self, file_path, mode='r'):
- if tarfile.is_tarfile(file_path):
- self.file_type = 'tar'
- elif zipfile.is_zipfile(file_path) and not file_path.endswith('.jar'):
- self.file_type = 'zip'
- self.file_name = os.path.splitext(os.path.basename(file_path))[0]
- if self.file_name.endswith('.tar'):
- self.file_name = os.path.splitext(self.file_name)[0]
- self.type = self.file_type
- method = 'open_%s' % self.file_type
- if hasattr(self, method):
- self.archive = getattr(self, method)(file_path, mode)
- else:
- raise NameError('File type %s specified, no open method found.' % self.file_type)
-
- def extract(self, path):
- '''Determine the path to which the archive should be extracted.'''
- contents = self.getmembers()
- extraction_path = path
- common_prefix = ''
- if len(contents) == 1:
- # The archive contains a single file, return the extraction path.
- if self.isfile(contents[0]):
- extraction_path = os.path.join(path, self.file_name)
- if not os.path.exists(extraction_path):
- os.makedirs(extraction_path)
- self.archive.extractall(extraction_path)
- else:
- # Get the common prefix for all the files in the archive. If the common prefix ends with a slash,
- # or self.isdir() returns True, the archive contains a single directory with the desired contents.
- # Otherwise, it contains multiple files and/or directories at the root of the archive.
- common_prefix = os.path.commonprefix([self.getname(item) for item in contents])
- if len(common_prefix) >= 1 and not common_prefix.endswith(os.sep) and self.isdir(self.getmember(common_prefix)):
- common_prefix += os.sep
- if not common_prefix.endswith(os.sep):
- common_prefix = ''
- extraction_path = os.path.join(path, self.file_name)
- if not os.path.exists(extraction_path):
- os.makedirs(extraction_path)
- self.archive.extractall(extraction_path)
- # Since .zip files store unix permissions separately, we need to iterate through the zip file
- # and set permissions on extracted members.
- if self.file_type == 'zip':
- for zipped_file in contents:
- filename = self.getname(zipped_file)
- absolute_filepath = os.path.join(extraction_path, filename)
- external_attributes = self.archive.getinfo(filename).external_attr
- # The 2 least significant bytes are irrelevant, the next two contain unix permissions.
- unix_permissions = external_attributes >> 16
- if unix_permissions != 0:
- if os.path.exists(absolute_filepath):
- os.chmod(absolute_filepath, unix_permissions)
- else:
- log.warning("Unable to change permission on extracted file '%s' as it does not exist" % absolute_filepath)
- return os.path.abspath(os.path.join(extraction_path, common_prefix))
-
- def getmembers_tar(self):
- return self.archive.getmembers()
-
- def getmembers_zip(self):
- return self.archive.infolist()
-
- def getname_tar(self, item):
- return item.name
-
- def getname_zip(self, item):
- return item.filename
-
- def getmember(self, name):
- for member in self.getmembers():
- if self.getname(member) == name:
- return member
-
- def getmembers(self):
- return getattr(self, 'getmembers_%s' % self.type)()
-
- def getname(self, member):
- return getattr(self, 'getname_%s' % self.type)(member)
-
- def isdir(self, member):
- return getattr(self, 'isdir_%s' % self.type)(member)
-
- def isdir_tar(self, member):
- return member.isdir()
-
- def isdir_zip(self, member):
- if member.filename.endswith(os.sep):
- return True
- return False
-
- def isfile(self, member):
- if not self.isdir(member):
- return True
- return False
-
- def open_tar(self, filepath, mode):
- return tarfile.open(filepath, mode, errorlevel=0)
-
- def open_zip(self, filepath, mode):
- return zipfile.ZipFile(filepath, mode)
-
- def zipfile_ok(self, path_to_archive):
- """
- This function is a bit pedantic and not functionally necessary. It checks whether there is
- no file pointing outside of the extraction, because ZipFile.extractall() has some potential
- security holes. See python zipfile documentation for more details.
- """
- basename = os.path.realpath(os.path.dirname(path_to_archive))
- zip_archive = zipfile.ZipFile(path_to_archive)
- for member in zip_archive.namelist():
- member_path = os.path.realpath(os.path.join(basename, member))
- if not member_path.startswith(basename):
- return False
- return True
-
-
class Download(object):
def url_download(self, install_dir, downloaded_file_name, download_url, extract=True, checksums={}):
diff --git a/scripts/api/fetch_to_library.py b/scripts/api/fetch_to_library.py
new file mode 100644
index 000000000000..6c497bcb402b
--- /dev/null
+++ b/scripts/api/fetch_to_library.py
@@ -0,0 +1,33 @@
+import argparse
+import json
+
+import requests
+import yaml
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Upload a directory into a data library')
+ parser.add_argument("-u", "--url", dest="url", required=True, help="Galaxy URL")
+ parser.add_argument("-a", "--api", dest="api_key", required=True, help="API Key")
+ parser.add_argument('target', metavar='FILE', type=str,
+ help='file describing data library to fetch')
+ args = parser.parse_args()
+ with open(args.target, "r") as f:
+ target = yaml.load(f)
+
+ histories_url = args.url + "/api/histories"
+ new_history_response = requests.post(histories_url, data={'key': args.api_key})
+
+ fetch_url = args.url + '/api/tools/fetch'
+ payload = {
+ 'key': args.api_key,
+ 'targets': json.dumps([target]),
+ 'history_id': new_history_response.json()["id"]
+ }
+
+ response = requests.post(fetch_url, data=payload)
+ print(response.content)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/api/fetch_to_library_example.yml b/scripts/api/fetch_to_library_example.yml
new file mode 100644
index 000000000000..44bc35ef43b5
--- /dev/null
+++ b/scripts/api/fetch_to_library_example.yml
@@ -0,0 +1,42 @@
+destination:
+ type: library
+ name: Training Material
+ description: Data for selected tutorials from https://training.galaxyproject.org.
+items:
+ - name: Quality Control
+ description: |
+ Data for sequence quality control tutorial at http://galaxyproject.github.io/training-material/topics/sequence-analysis/tutorials/quality-control/tutorial.html.
+
+ 10.5281/zenodo.61771
+ items:
+ - src: url
+ url: https://zenodo.org/record/61771/files/GSM461178_untreat_paired_subset_1.fastq
+ name: GSM461178_untreat_paired_subset_1
+ ext: fastqsanger
+ info: Untreated subseq of GSM461178 from 10.1186/s12864-017-3692-8
+ - src: url
+ url: https://zenodo.org/record/61771/files/GSM461182_untreat_single_subset.fastq
+ name: GSM461182_untreat_single_subset
+ ext: fastqsanger
+ info: Untreated subseq of GSM461182 from 10.1186/s12864-017-3692-8
+ - name: Small RNA-Seq
+ description: |
+ Data for small RNA-seq tutorial available at http://galaxyproject.github.io/training-material/topics/transcriptomics/tutorials/srna/tutorial.html
+
+ 10.5281/zenodo.826906
+ items:
+ - src: url
+ url: https://zenodo.org/record/826906/files/Symp_RNAi_sRNA-seq_rep1_downsampled.fastqsanger.gz
+ name: Symp RNAi sRNA Rep1
+ ext: fastqsanger.gz
+ info: Downsample rep1 from 10.1186/s12864-017-3692-8
+ - src: url
+ url: https://zenodo.org/record/826906/files/Symp_RNAi_sRNA-seq_rep2_downsampled.fastqsanger.gz
+ name: Symp RNAi sRNA Rep2
+ ext: fastqsanger.gz
+ info: Downsample rep2 from 10.1186/s12864-017-3692-8
+ - src: url
+ url: https://zenodo.org/record/826906/files/Symp_RNAi_sRNA-seq_rep3_downsampled.fastqsanger.gz
+ name: Symp RNAi sRNA Rep3
+ ext: fastqsanger.gz
+ info: Downsample rep3 from 10.1186/s12864-017-3692-8
diff --git a/test-data/1.csv b/test-data/1.csv
new file mode 100644
index 000000000000..80d519fbe2d9
--- /dev/null
+++ b/test-data/1.csv
@@ -0,0 +1 @@
+Transaction_date,Product,Price,Payment_Type,Name,City,State,Country,Account_Created,Last_Login,Latitude,Longitude
1/2/09 6:17,Product1,1200,Mastercard,carolina,Basildon,England,United Kingdom,1/2/09 6:00,1/2/09 6:08,51.5,-1.1166667
1/2/09 4:53,Product1,1200,Visa,Betina,Parkville ,MO,United States,1/2/09 4:42,1/2/09 7:49,39.195,-94.68194
1/2/09 13:08,Product1,1200,Mastercard,Federica e Andrea,Astoria ,OR,United States,1/1/09 16:21,1/3/09 12:32,46.18806,-123.83
1/3/09 14:44,Product1,1200,Visa,Gouya,Echuca,Victoria,Australia,9/25/05 21:13,1/3/09 14:22,-36.1333333,144.75
1/4/09 12:56,Product2,3600,Visa,Gerd W ,Cahaba Heights ,AL,United States,11/15/08 15:47,1/4/09 12:45,33.52056,-86.8025
1/4/09 13:19,Product1,1200,Visa,LAURENCE,Mickleton ,NJ,United States,9/24/08 15:19,1/4/09 13:04,39.79,-75.23806
1/4/09 20:11,Product1,1200,Mastercard,Fleur,Peoria ,IL,United States,1/3/09 9:38,1/4/09 19:45,40.69361,-89.58889
1/2/09 20:09,Product1,1200,Mastercard,adam,Martin ,TN,United States,1/2/09 17:43,1/4/09 20:01,36.34333,-88.85028
1/4/09 13:17,Product1,1200,Mastercard,Renee Elisabeth,Tel Aviv,Tel Aviv,Israel,1/4/09 13:03,1/4/09 22:10,32.0666667,34.7666667
1/4/09 14:11,Product1,1200,Visa,Aidan,Chatou,Ile-de-France,France,6/3/08 4:22,1/5/09 1:17,48.8833333,2.15
1/5/09 2:42,Product1,1200,Diners,Stacy,New York ,NY,United States,1/5/09 2:23,1/5/09 4:59,40.71417,-74.00639
1/5/09 5:39,Product1,1200,Amex,Heidi,Eindhoven,Noord-Brabant,Netherlands,1/5/09 4:55,1/5/09 8:15,51.45,5.4666667
1/2/09 9:16,Product1,1200,Mastercard,Sean ,Shavano Park ,TX,United States,1/2/09 8:32,1/5/09 9:05,29.42389,-98.49333
1/5/09 10:08,Product1,1200,Visa,Georgia,Eagle ,ID,United States,11/11/08 15:53,1/5/09 10:05,43.69556,-116.35306
1/2/09 14:18,Product1,1200,Visa,Richard,Riverside ,NJ,United States,12/9/08 12:07,1/5/09 11:01,40.03222,-74.95778
1/25/09 17:58,Product2,3600,Visa,carol,Ann Arbor ,MI,United States,7/5/08 9:20,2/7/09 18:51,42.27083,-83.72639
1/9/09 14:37,Product1,1200,Visa,Nona,South Jordan ,UT,United States,1/8/09 15:14,2/7/09 19:11,40.56222,-111.92889
1/25/09 2:46,Product2,3600,Visa,Family,Dubai,Dubayy,United Arab Emirates,1/8/09 1:19,2/8/09 2:06,25.2522222,55.28
1/17/09 20:46,Product2,3600,Visa,Michelle,Dubai,Dubayy,United Arab Emirates,4/13/08 2:36,2/8/09 2:12,25.2522222,55.28
1/24/09 7:18,Product2,3600,Visa,Kathryn,Kirriemuir,Scotland,United Kingdom,1/23/09 10:31,2/8/09 2:52,56.6666667,-3
1/11/09 7:09,Product1,1200,Visa,Oswald,Tramore,Waterford,Ireland,10/13/08 16:43,2/8/09 3:02,52.1588889,-7.1463889
1/8/09 4:15,Product1,1200,Visa,Elyssa,Gdansk,Pomorskie,Poland,1/7/09 15:00,2/8/09 3:50,54.35,18.6666667
1/22/09 10:47,Product1,1200,Visa,michelle,Arklow,Wicklow,Ireland,11/18/08 1:32,2/8/09 5:07,52.7930556,-6.1413889
1/26/09 20:47,Product1,1200,Mastercard,Alicia,Lincoln ,NE,United States,6/24/08 8:05,2/8/09 7:29,40.8,-96.66667
1/12/09 12:22,Product1,1200,Mastercard,JP,Tierp,Uppsala,Sweden,1/6/09 11:34,2/8/09 11:15,60.3333333,17.5
1/26/09 1:44,Product2,3600,Visa,Geraldine,Brussels,Brussels (Bruxelles),Belgium,1/31/08 13:28,2/8/09 14:39,50.8333333,4.3333333
1/18/09 12:57,Product1,1200,Mastercard,sandra,Burr Oak ,IA,United States,1/24/08 16:11,2/8/09 15:30,43.45889,-91.86528
1/24/09 21:26,Product1,1200,Visa,Olivia,Wheaton ,IL,United States,5/8/08 16:02,2/8/09 16:00,41.86611,-88.10694
1/26/09 12:26,Product2,3600,Mastercard,Tom,Killeen ,TX,United States,1/26/09 5:23,2/8/09 17:33,31.11694,-97.7275
1/5/09 7:37,Product1,1200,Visa,Annette ,Manhattan ,NY,United States,9/26/08 4:29,2/8/09 18:42,40.71417,-74.00639
1/14/09 12:33,Product1,1200,Visa,SUSAN,Oxford,England,United Kingdom,9/11/08 23:23,2/8/09 23:00,51.75,-1.25
1/14/09 0:15,Product2,3600,Visa,Michael,Paris,Ile-de-France,France,11/28/08 0:07,2/9/09 1:30,48.8666667,2.3333333
1/1/09 12:42,Product1,1200,Visa,ashton,Exeter,England,United Kingdom,12/15/08 1:16,2/9/09 2:52,50.7,-3.5333333
1/6/09 6:07,Product1,1200,Visa,Scott,Rungsted,Frederiksborg,Denmark,12/27/08 14:29,2/9/09 4:20,55.8841667,12.5419444
1/15/09 5:11,Product2,3600,Visa,Pam,London,England,United Kingdom,7/11/06 12:43,2/9/09 4:42,51.52721,0.14559
1/17/09 4:03,Product1,1200,Visa,Lisa ,Borja,Bohol,Philippines,1/17/09 2:45,2/9/09 6:09,9.9136111,124.0927778
1/19/09 10:13,Product2,3600,Mastercard,Pavel,London,England,United Kingdom,2/28/06 5:35,2/9/09 6:57,51.51334,-0.08895
1/18/09 9:42,Product1,1200,Visa,Richard,Jamestown ,RI,United States,1/18/09 9:22,2/9/09 8:30,41.49694,-71.36778
1/9/09 11:14,Product1,1200,Visa,Jasinta Jeanne,Owings Mills ,MD,United States,1/9/09 10:43,2/9/09 9:17,39.41944,-76.78056
1/10/09 13:42,Product1,1200,Visa,Rachel,Hamilton,Ontario,Canada,1/10/09 12:22,2/9/09 9:54,43.25,-79.8333333
1/7/09 7:28,Product1,1200,Amex,Cherish ,Anchorage ,AK,United States,7/28/08 7:31,2/9/09 10:50,61.21806,-149.90028
1/18/09 6:46,Product1,1200,Visa,Shona ,Mornington,Meath,Ireland,1/15/09 9:13,2/9/09 11:55,53.7233333,-6.2825
1/30/09 12:18,Product1,1200,Mastercard,Abikay,Fullerton ,CA,United States,1/26/09 13:34,2/9/09 12:53,33.87028,-117.92444
1/6/09 5:42,Product1,1200,Amex,Abikay,Atlanta ,GA,United States,10/27/08 14:16,2/9/09 13:50,33.74889,-84.38806
1/2/09 10:58,Product2,3600,Visa,Kendra,Toronto,Ontario,Canada,1/2/09 10:38,2/9/09 13:56,43.6666667,-79.4166667
1/8/09 3:29,Product1,1200,Visa,amanda,Liverpool,England,United Kingdom,12/22/08 1:41,2/9/09 14:06,53.4166667,-3
1/12/09 13:23,Product2,3600,Amex,Leila,Ponte San Nicolo,Veneto,Italy,9/13/05 8:42,2/9/09 14:09,45.3666667,11.6166667
1/19/09 9:34,Product1,1200,Amex,amanda,Las Vegas ,NV,United States,5/10/08 8:56,2/9/09 16:44,36.175,-115.13639
1/9/09 7:49,Product1,1200,Visa,Stacy,Rochester Hills ,MI,United States,7/28/08 7:18,2/9/09 17:41,42.68056,-83.13389
1/15/09 5:27,Product2,3600,Visa,Derrick,North Bay,Ontario,Canada,1/6/09 17:42,2/9/09 18:22,46.3,-79.45
1/8/09 23:40,Product1,1200,Visa,Jacob,Lindfield,New South Wales,Australia,1/8/09 17:52,2/9/09 18:31,-33.7833333,151.1666667
1/27/09 11:02,Product1,1200,Mastercard,DOREEN,Madrid,Madrid,Spain,1/24/09 8:21,2/9/09 18:42,40.4,-3.6833333
1/14/09 13:23,Product1,1200,Diners,eugenia,Wisconsin Rapids ,WI,United States,11/15/08 13:57,2/9/09 18:44,44.38361,-89.81722
1/7/09 20:01,Product1,1200,Visa,Karen,Austin ,TX,United States,1/6/09 19:16,2/9/09 19:56,30.26694,-97.74278
1/20/09 12:32,Product1,1200,Visa,Bea,Chicago ,IL,United States,1/16/09 19:08,2/9/09 20:42,41.85,-87.65
1/6/09 14:35,Product1,1200,Diners,Hilde Karin,Las Vegas ,NV,United States,12/17/08 11:59,2/9/09 22:59,36.175,-115.13639
1/4/09 6:51,Product1,1200,Visa,Rima,Mullingar,Westmeath,Ireland,1/3/09 12:34,2/10/09 0:59,53.5333333,-7.35
1/24/09 18:30,Product1,1200,Visa,Ruangrote,Melbourne,Victoria,Australia,7/17/08 5:19,2/10/09 2:12,-37.8166667,144.9666667
1/25/09 5:57,Product1,1200,Amex,pamela,Ayacucho,Buenos Aires,Argentina,1/24/09 9:29,2/10/09 6:38,-37.15,-58.4833333
1/5/09 10:02,Product2,3600,Visa,Emillie,Eagan ,MN,United States,1/5/09 9:03,2/10/09 7:29,44.80417,-93.16667
1/13/09 9:14,Product1,1200,Visa,sangeeta,Vossevangen,Hordaland,Norway,1/9/09 9:31,2/10/09 9:04,60.6333333,6.4333333
1/22/09 7:35,Product1,1200,Visa,Anja,Ferney-Voltaire,Rhone-Alpes,France,1/22/09 6:51,2/10/09 9:18,46.25,6.1166667
1/2/09 11:06,Product1,1200,Mastercard,Andrew,Sevilla,Andalucia,Spain,3/12/06 15:02,2/10/09 10:04,37.3772222,-5.9869444
1/11/09 9:50,Product1,1200,Visa,Bato,Munchengosserstadt,Thuringia,Germany,1/7/09 11:45,2/10/09 10:28,51.05,11.65
1/21/09 20:44,Product1,1200,Mastercard,Ailsa ,Lindenhurst ,NY,United States,1/21/09 7:47,2/10/09 10:51,40.68667,-73.37389
1/5/09 9:09,Product1,1200,Visa,Sophie,Bloomfield ,MI,United States,10/23/06 6:52,2/10/09 10:58,42.53778,-83.23306
1/5/09 12:41,Product1,1200,Visa,Katrin,Calgary,Alberta,Canada,12/3/08 14:49,2/10/09 11:45,51.0833333,-114.0833333
1/28/09 12:54,Product2,3600,Mastercard,Kelly ,Vancouver,British Columbia,Canada,1/27/09 21:04,2/10/09 12:09,49.25,-123.1333333
1/21/09 4:46,Product1,1200,Visa,Tomasz,Klampenborg,Kobenhavn,Denmark,6/10/08 11:25,2/10/09 12:22,55.7666667,12.6
1/7/09 13:28,Product1,1200,Visa,Elizabeth,Calne,England,United Kingdom,1/4/09 13:07,2/10/09 12:39,51.4333333,-2
1/27/09 11:18,Product2,3600,Amex,Michael,Los Angeles ,CA,United States,1/23/09 11:47,2/10/09 13:09,34.05222,-118.24278
1/7/09 12:39,Product2,3600,Visa,Natasha,Milano,Lombardy,Italy,6/2/06 13:01,2/10/09 13:19,45.4666667,9.2
1/24/09 13:54,Product2,3600,Mastercard,Meredith,Kloten,Zurich,Switzerland,1/24/09 12:30,2/10/09 13:47,47.45,8.5833333
1/30/09 6:48,Product1,1200,Mastercard,Nicole,Fayetteville ,NC,United States,1/30/09 4:51,2/10/09 14:41,35.0525,-78.87861
1/22/09 18:07,Product1,1200,Visa,Ryan,Simpsonville ,SC,United States,1/6/09 16:59,2/10/09 15:30,34.73694,-82.25444
1/29/09 15:03,Product1,1200,Visa,Mary ,Auckland,Auckland,New Zealand,2/9/06 11:14,2/10/09 16:31,-36.8666667,174.7666667
1/2/09 14:14,Product1,1200,Diners,Aaron,Reading,England,United Kingdom,11/16/08 15:49,2/10/09 16:38,51.4333333,-1
1/19/09 11:05,Product1,1200,Visa,Bertrand,North Caldwell ,NJ,United States,10/3/08 5:55,2/10/09 18:16,40.83972,-74.27694
\ No newline at end of file
diff --git a/test-data/example-bag.zip b/test-data/example-bag.zip
new file mode 100644
index 000000000000..ee4de52c265b
Binary files /dev/null and b/test-data/example-bag.zip differ
diff --git a/test-data/testdir1.zip b/test-data/testdir1.zip
new file mode 100644
index 000000000000..0f71e2ce96aa
Binary files /dev/null and b/test-data/testdir1.zip differ
diff --git a/test/api/test_dataset_collections.py b/test/api/test_dataset_collections.py
index ca8950d8bb0c..87752d2b9096 100644
--- a/test/api/test_dataset_collections.py
+++ b/test/api/test_dataset_collections.py
@@ -188,6 +188,78 @@ def test_enforces_unique_names(self):
create_response = self._post("dataset_collections", payload)
self._assert_status_code_is(create_response, 400)
+ def test_upload_collection(self):
+ elements = [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]
+ targets = [{
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ "name": "Test upload",
+ }]
+ payload = {
+ "history_id": self.history_id,
+ "targets": json.dumps(targets),
+ "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
+ }
+ self.dataset_populator.fetch(payload)
+ hdca = self._assert_one_collection_created_in_history()
+ self.assertEquals(hdca["name"], "Test upload")
+ assert len(hdca["elements"]) == 1, hdca
+ element0 = hdca["elements"][0]
+ assert element0["element_identifier"] == "4.bed"
+ assert element0["object"]["file_size"] == 61
+
+ def test_upload_nested(self):
+ elements = [{"name": "samp1", "elements": [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]}]
+ targets = [{
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list:list",
+ "name": "Test upload",
+ }]
+ payload = {
+ "history_id": self.history_id,
+ "targets": json.dumps(targets),
+ "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
+ }
+ self.dataset_populator.fetch(payload)
+ hdca = self._assert_one_collection_created_in_history()
+ self.assertEquals(hdca["name"], "Test upload")
+ assert len(hdca["elements"]) == 1, hdca
+ element0 = hdca["elements"][0]
+ assert element0["element_identifier"] == "samp1"
+
+ def test_upload_collection_from_url(self):
+ elements = [{"src": "url", "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed", "info": "my cool bed"}]
+ targets = [{
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }]
+ payload = {
+ "history_id": self.history_id,
+ "targets": json.dumps(targets),
+ "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
+ }
+ self.dataset_populator.fetch(payload)
+ hdca = self._assert_one_collection_created_in_history()
+ assert len(hdca["elements"]) == 1, hdca
+ element0 = hdca["elements"][0]
+ assert element0["element_identifier"] == "4.bed"
+ assert element0["object"]["file_size"] == 61
+
+ def _assert_one_collection_created_in_history(self):
+ contents_response = self._get("histories/%s/contents/dataset_collections" % self.history_id)
+ self._assert_status_code_is(contents_response, 200)
+ contents = contents_response.json()
+ assert len(contents) == 1
+ hdca = contents[0]
+ assert hdca["history_content_type"] == "dataset_collection"
+ hdca_id = hdca["id"]
+ collection_response = self._get("histories/%s/contents/dataset_collections/%s" % (self.history_id, hdca_id))
+ self._assert_status_code_is(collection_response, 200)
+ return collection_response.json()
+
def _check_create_response(self, create_response):
self._assert_status_code_is(create_response, 200)
dataset_collection = create_response.json()
diff --git a/test/api/test_libraries.py b/test/api/test_libraries.py
index 2a715f50fcbb..ae0303a8074f 100644
--- a/test/api/test_libraries.py
+++ b/test/api/test_libraries.py
@@ -1,3 +1,5 @@
+import json
+
from base import api
from base.populators import (
DatasetCollectionPopulator,
@@ -95,6 +97,94 @@ def test_create_dataset(self):
assert library_dataset["peek"].find("create_test") >= 0
assert library_dataset["file_ext"] == "txt", library_dataset["file_ext"]
+ def test_fetch_upload_to_folder(self):
+ history_id, library, destination = self._setup_fetch_to_folder("flat_zip")
+ items = [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]
+ targets = [{
+ "destination": destination,
+ "items": items
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+ assert dataset["file_size"] == 61, dataset
+ assert dataset["genome_build"] == "hg19", dataset
+ assert dataset["misc_info"] == "my cool bed", dataset
+ assert dataset["file_ext"] == "bed", dataset
+
+ def test_fetch_zip_to_folder(self):
+ history_id, library, destination = self._setup_fetch_to_folder("flat_zip")
+ bed_test_data_path = self.test_data_resolver.get_filename("4.bed.zip")
+ targets = [{
+ "destination": destination,
+ "items_from": "archive", "src": "files",
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ "__files": {"files_0|file_data": open(bed_test_data_path)}
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+ assert dataset["file_size"] == 61, dataset
+
+ def test_fetch_single_url_to_folder(self):
+ history_id, library, destination = self._setup_fetch_to_folder("single_url")
+ items = [{"src": "url", "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed"}]
+ targets = [{
+ "destination": destination,
+ "items": items
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+ assert dataset["file_size"] == 61, dataset
+
+ def test_fetch_url_archive_to_folder(self):
+ history_id, library, destination = self._setup_fetch_to_folder("single_url")
+ targets = [{
+ "destination": destination,
+ "items_from": "archive",
+ "src": "url",
+ "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed.zip",
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+ assert dataset["file_size"] == 61, dataset
+
+ def test_fetch_bagit_archive_to_folder(self):
+ history_id, library, destination = self._setup_fetch_to_folder("bagit_archive")
+ example_bag_path = self.test_data_resolver.get_filename("example-bag.zip")
+ targets = [{
+ "destination": destination,
+ "items_from": "bagit_archive", "src": "files",
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ "__files": {"files_0|file_data": open(example_bag_path)},
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/README.txt")
+ assert dataset["file_size"] == 66, dataset
+
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/bdbag-profile.json")
+ assert dataset["file_size"] == 723, dataset
+
+ def _setup_fetch_to_folder(self, test_name):
+ return self.library_populator.setup_fetch_to_folder(test_name)
+
def test_create_dataset_in_folder(self):
library = self.library_populator.new_private_library("ForCreateDatasets")
folder_response = self._create_folder(library)
diff --git a/test/api/test_tools_upload.py b/test/api/test_tools_upload.py
index a05fe9dce49f..cb67b84507dd 100644
--- a/test/api/test_tools_upload.py
+++ b/test/api/test_tools_upload.py
@@ -1,3 +1,5 @@
+import json
+
from base import api
from base.constants import (
ONE_TO_SIX_ON_WINDOWS,
@@ -33,19 +35,28 @@ def test_upload1_paste_bad_datatype(self):
self._assert_has_keys(create, 'err_msg')
assert file_type in create['err_msg']
- def test_upload_posix_newline_fixes(self):
+ # upload1 rewrites content with posix lines by default but this can be disabled by setting
+ # to_posix_lines=None in the request. Newer fetch API does not do this by default prefering
+ # to keep content unaltered if possible but it can be enabled with a simple JSON boolean switch
+ # of the same name (to_posix_lines).
+ def test_upload_posix_newline_fixes_by_default(self):
windows_content = ONE_TO_SIX_ON_WINDOWS
result_content = self._upload_and_get_content(windows_content)
self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)
+ def test_fetch_posix_unaltered(self):
+ windows_content = ONE_TO_SIX_ON_WINDOWS
+ result_content = self._upload_and_get_content(windows_content, api="fetch")
+ self.assertEquals(result_content, ONE_TO_SIX_ON_WINDOWS)
+
def test_upload_disable_posix_fix(self):
windows_content = ONE_TO_SIX_ON_WINDOWS
result_content = self._upload_and_get_content(windows_content, to_posix_lines=None)
self.assertEquals(result_content, windows_content)
- def test_upload_tab_to_space(self):
- table = ONE_TO_SIX_WITH_SPACES
- result_content = self._upload_and_get_content(table, space_to_tab="Yes")
+ def test_fetch_post_lines_option(self):
+ windows_content = ONE_TO_SIX_ON_WINDOWS
+ result_content = self._upload_and_get_content(windows_content, api="fetch", to_posix_lines=True)
self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)
def test_upload_tab_to_space_off_by_default(self):
@@ -53,6 +64,21 @@ def test_upload_tab_to_space_off_by_default(self):
result_content = self._upload_and_get_content(table)
self.assertEquals(result_content, table)
+ def test_fetch_tab_to_space_off_by_default(self):
+ table = ONE_TO_SIX_WITH_SPACES
+ result_content = self._upload_and_get_content(table, api='fetch')
+ self.assertEquals(result_content, table)
+
+ def test_upload_tab_to_space(self):
+ table = ONE_TO_SIX_WITH_SPACES
+ result_content = self._upload_and_get_content(table, space_to_tab="Yes")
+ self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)
+
+ def test_fetch_tab_to_space(self):
+ table = ONE_TO_SIX_WITH_SPACES
+ result_content = self._upload_and_get_content(table, api="fetch", space_to_tab=True)
+ self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)
+
@skip_without_datatype("rdata")
def test_rdata_not_decompressed(self):
# Prevent regression of https://github.com/galaxyproject/galaxy/issues/753
@@ -60,6 +86,30 @@ def test_rdata_not_decompressed(self):
rdata_metadata = self._upload_and_get_details(open(rdata_path, "rb"), file_type="auto")
self.assertEquals(rdata_metadata["file_ext"], "rdata")
+ @skip_without_datatype("csv")
+ def test_csv_upload(self):
+ csv_path = TestDataResolver().get_filename("1.csv")
+ csv_metadata = self._upload_and_get_details(open(csv_path, "rb"), file_type="csv")
+ self.assertEquals(csv_metadata["file_ext"], "csv")
+
+ @skip_without_datatype("csv")
+ def test_csv_upload_auto(self):
+ csv_path = TestDataResolver().get_filename("1.csv")
+ csv_metadata = self._upload_and_get_details(open(csv_path, "rb"), file_type="auto")
+ self.assertEquals(csv_metadata["file_ext"], "csv")
+
+ @skip_without_datatype("csv")
+ def test_csv_fetch(self):
+ csv_path = TestDataResolver().get_filename("1.csv")
+ csv_metadata = self._upload_and_get_details(open(csv_path, "rb"), api="fetch", ext="csv", to_posix_lines=True)
+ self.assertEquals(csv_metadata["file_ext"], "csv")
+
+ @skip_without_datatype("csv")
+ def test_csv_sniff_fetch(self):
+ csv_path = TestDataResolver().get_filename("1.csv")
+ csv_metadata = self._upload_and_get_details(open(csv_path, "rb"), api="fetch", ext="auto", to_posix_lines=True)
+ self.assertEquals(csv_metadata["file_ext"], "csv")
+
@skip_without_datatype("velvet")
def test_composite_datatype(self):
with self.dataset_populator.test_history() as history_id:
@@ -113,6 +163,11 @@ def test_upload_dbkey(self):
datasets = run_response.json()["outputs"]
assert datasets[0].get("genome_build") == "hg19", datasets[0]
+ def test_fetch_dbkey(self):
+ table = ONE_TO_SIX_WITH_SPACES
+ details = self._upload_and_get_details(table, api='fetch', dbkey="hg19")
+ assert details.get("genome_build") == "hg19"
+
def test_upload_multiple_files_1(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
@@ -329,8 +384,23 @@ def _upload_and_get_details(self, content, **upload_kwds):
history_id, new_dataset = self._upload(content, **upload_kwds)
return self.dataset_populator.get_history_dataset_details(history_id, dataset=new_dataset)
- def _upload(self, content, **upload_kwds):
+ def _upload(self, content, api="upload1", **upload_kwds):
history_id = self.dataset_populator.new_history()
- new_dataset = self.dataset_populator.new_dataset(history_id, content=content, **upload_kwds)
+ if api == "upload1":
+ new_dataset = self.dataset_populator.new_dataset(history_id, content=content, **upload_kwds)
+ else:
+ assert api == "fetch"
+ element = dict(src="files", **upload_kwds)
+ target = {
+ "destination": {"type": "hdas"},
+ "elements": [element],
+ }
+ targets = json.dumps([target])
+ payload = {
+ "history_id": history_id,
+ "targets": targets,
+ "__files": {"files_0|file_data": content}
+ }
+ new_dataset = self.dataset_populator.fetch(payload).json()["outputs"][0]
self.dataset_populator.wait_for_history(history_id, assert_ok=upload_kwds.get("assert_ok", True))
return history_id, new_dataset
diff --git a/test/base/driver_util.py b/test/base/driver_util.py
index 638eb7f96250..7c31e7df7eee 100644
--- a/test/base/driver_util.py
+++ b/test/base/driver_util.py
@@ -198,6 +198,7 @@ def setup_galaxy_config(
enable_beta_tool_formats=True,
expose_dataset_path=True,
file_path=file_path,
+ ftp_upload_purge=False,
galaxy_data_manager_data_path=galaxy_data_manager_data_path,
id_secret='changethisinproductiontoo',
job_config_file=job_config_file,
diff --git a/test/base/integration_util.py b/test/base/integration_util.py
index 363b19a32bff..540d6bf1c108 100644
--- a/test/base/integration_util.py
+++ b/test/base/integration_util.py
@@ -8,6 +8,7 @@
from unittest import skip, TestCase
from galaxy.tools.deps.commands import which
+from galaxy.tools.verify.test_data import TestDataResolver
from .api import UsesApiTestCaseMixin
from .driver_util import GalaxyTestDriver
@@ -56,6 +57,7 @@ def tearDownClass(cls):
cls._app_available = False
def setUp(self):
+ self.test_data_resolver = TestDataResolver()
# Setup attributes needed for API testing...
server_wrapper = self._test_driver.server_wrappers[0]
host = server_wrapper.host
diff --git a/test/base/populators.py b/test/base/populators.py
index 9bff5a08e9fe..22d1d15d87d3 100644
--- a/test/base/populators.py
+++ b/test/base/populators.py
@@ -148,14 +148,30 @@ def new_dataset_request(self, history_id, content=None, wait=False, **kwds):
self.wait_for_tool_run(history_id, run_response, assert_ok=kwds.get('assert_ok', True))
return run_response
+ def fetch(self, payload, assert_ok=True, timeout=DEFAULT_TIMEOUT):
+ tool_response = self._post("tools/fetch", data=payload)
+ if assert_ok:
+ job = self.check_run(tool_response)
+ self.wait_for_job(job["id"], timeout=timeout)
+
+ job = tool_response.json()["jobs"][0]
+ details = self.get_job_details(job["id"]).json()
+ assert details["state"] == "ok", details
+
+ return tool_response
+
def wait_for_tool_run(self, history_id, run_response, timeout=DEFAULT_TIMEOUT, assert_ok=True):
- run = run_response.json()
- assert run_response.status_code == 200, run
- job = run["jobs"][0]
+ job = self.check_run(run_response)
self.wait_for_job(job["id"], timeout=timeout)
self.wait_for_history(history_id, assert_ok=assert_ok, timeout=timeout)
return run_response
+ def check_run(self, run_response):
+ run = run_response.json()
+ assert run_response.status_code == 200, run
+ job = run["jobs"][0]
+ return job
+
def wait_for_history(self, history_id, assert_ok=False, timeout=DEFAULT_TIMEOUT):
try:
return wait_on_state(lambda: self._get("histories/%s" % history_id), assert_ok=assert_ok, timeout=timeout)
@@ -266,8 +282,8 @@ def run_tool(self, tool_id, inputs, history_id, assert_ok=True, **kwds):
else:
return tool_response
- def tools_post(self, payload):
- tool_response = self._post("tools", data=payload)
+ def tools_post(self, payload, url="tools"):
+ tool_response = self._post(url, data=payload)
return tool_response
def get_history_dataset_content(self, history_id, wait=True, filename=None, **kwds):
@@ -463,6 +479,11 @@ class LibraryPopulator(object):
def __init__(self, galaxy_interactor):
self.galaxy_interactor = galaxy_interactor
+ self.dataset_populator = DatasetPopulator(galaxy_interactor)
+
+ def get_libraries(self):
+ get_response = self.galaxy_interactor.get("libraries")
+ return get_response.json()
def new_private_library(self, name):
library = self.new_library(name)
@@ -514,6 +535,8 @@ def create_dataset_request(self, library, **kwds):
"file_type": kwds.get("file_type", "auto"),
"db_key": kwds.get("db_key", "?"),
}
+ if kwds.get("link_data"):
+ create_data["link_data_only"] = "link_to_files"
if upload_option == "upload_file":
files = {
@@ -532,7 +555,9 @@ def new_library_dataset(self, name, **create_dataset_kwds):
library = self.new_private_library(name)
payload, files = self.create_dataset_request(library, **create_dataset_kwds)
dataset = self.raw_library_contents_create(library["id"], payload, files=files).json()[0]
+ return self.wait_on_library_dataset(library, dataset)
+ def wait_on_library_dataset(self, library, dataset):
def show():
return self.galaxy_interactor.get("libraries/%s/contents/%s" % (library["id"], dataset["id"]))
@@ -563,6 +588,24 @@ def show():
return library, library_dataset
+ def get_library_contents_with_path(self, library_id, path):
+ all_contents_response = self.galaxy_interactor.get("libraries/%s/contents" % library_id)
+ api_asserts.assert_status_code_is(all_contents_response, 200)
+ all_contents = all_contents_response.json()
+ matching = [c for c in all_contents if c["name"] == path]
+ if len(matching) == 0:
+ raise Exception("Failed to find library contents with path [%s], contents are %s" % (path, all_contents))
+ get_response = self.galaxy_interactor.get(matching[0]["url"])
+ api_asserts.assert_status_code_is(get_response, 200)
+ return get_response.json()
+
+ def setup_fetch_to_folder(self, test_name):
+ history_id = self.dataset_populator.new_history()
+ library = self.new_private_library(test_name)
+ folder_id = library["root_folder_id"][1:]
+ destination = {"type": "library_folder", "library_folder_id": folder_id}
+ return history_id, library, destination
+
class BaseDatasetCollectionPopulator(object):
diff --git a/test/functional/tools/sample_datatypes_conf.xml b/test/functional/tools/sample_datatypes_conf.xml
index a37897791eed..29917074a99c 100644
--- a/test/functional/tools/sample_datatypes_conf.xml
+++ b/test/functional/tools/sample_datatypes_conf.xml
@@ -4,6 +4,7 @@
+
@@ -14,6 +15,12 @@
+
+
+
+
+
+
diff --git a/test/integration/test_upload_configuration_options.py b/test/integration/test_upload_configuration_options.py
index 441fd32f3d17..fa9548c230ae 100644
--- a/test/integration/test_upload_configuration_options.py
+++ b/test/integration/test_upload_configuration_options.py
@@ -19,6 +19,7 @@
framework but tested here for FTP uploads.
"""
+import json
import os
import re
import shutil
@@ -54,6 +55,59 @@ def setUp(self):
self.library_populator = LibraryPopulator(self.galaxy_interactor)
self.history_id = self.dataset_populator.new_history()
+ def fetch_target(self, target, assert_ok=False, attach_test_file=False):
+ payload = {
+ "history_id": self.history_id,
+ "targets": json.dumps([target]),
+ }
+ if attach_test_file:
+ payload["__files"] = {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))}
+
+ response = self.dataset_populator.fetch(payload, assert_ok=assert_ok)
+ return response
+
+ @classmethod
+ def temp_config_dir(cls, name):
+ # realpath here to get around problems with symlinks being blocked.
+ return os.path.realpath(os.path.join(cls._test_driver.galaxy_test_tmp_dir, name))
+
+ def _write_file(self, dir_path, content, filename="test"):
+ """Helper for writing ftp/server dir files."""
+ self._ensure_directory(dir_path)
+ path = os.path.join(dir_path, filename)
+ with open(path, "w") as f:
+ f.write(content)
+ return path
+
+ def _ensure_directory(self, path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+
+class InvalidFetchRequestsTestCase(BaseUploadContentConfigurationTestCase):
+
+ def test_in_place_not_allowed(self):
+ elements = [{"src": "files", "in_place": False}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target, attach_test_file=True)
+ self._assert_status_code_is(response, 400)
+ assert 'in_place' in response.json()["err_msg"]
+
+ def test_files_not_attached(self):
+ elements = [{"src": "files"}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 400)
+ assert 'Failed to find uploaded file matching target' in response.json()["err_msg"]
+
class NonAdminsCannotPasteFilePathTestCase(BaseUploadContentConfigurationTestCase):
@@ -72,6 +126,8 @@ def test_disallowed_for_primary_file(self):
@skip_without_datatype("velvet")
def test_disallowed_for_composite_file(self):
+ path = os.path.join(TEST_DATA_DIRECTORY, "1.txt")
+ assert os.path.exists(path)
payload = self.dataset_populator.upload_payload(
self.history_id,
"sequences content",
@@ -79,7 +135,7 @@ def test_disallowed_for_composite_file(self):
extra_inputs={
"files_1|url_paste": "roadmaps content",
"files_1|type": "upload_dataset",
- "files_2|url_paste": "file://%s/1.txt" % TEST_DATA_DIRECTORY,
+ "files_2|url_paste": "file://%s" % path,
"files_2|type": "upload_dataset",
},
)
@@ -87,12 +143,42 @@ def test_disallowed_for_composite_file(self):
# Ideally this would be 403 but the tool API endpoint isn't using
# the newer API decorator that handles those details.
assert create_response.status_code >= 400
+ assert os.path.exists(path)
def test_disallowed_for_libraries(self):
+ path = os.path.join(TEST_DATA_DIRECTORY, "1.txt")
+ assert os.path.exists(path)
library = self.library_populator.new_private_library("pathpastedisallowedlibraries")
- payload, files = self.library_populator.create_dataset_request(library, upload_option="upload_paths", paths="%s/1.txt" % TEST_DATA_DIRECTORY)
+ payload, files = self.library_populator.create_dataset_request(library, upload_option="upload_paths", paths=path)
response = self.library_populator.raw_library_contents_create(library["id"], payload, files=files)
assert response.status_code == 403, response.json()
+ assert os.path.exists(path)
+
+ def test_disallowed_for_fetch(self):
+ path = os.path.join(TEST_DATA_DIRECTORY, "1.txt")
+ assert os.path.exists(path)
+ elements = [{"src": "path", "path": path}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 403)
+ assert os.path.exists(path)
+
+ def test_disallowed_for_fetch_urls(self):
+ path = os.path.join(TEST_DATA_DIRECTORY, "1.txt")
+ assert os.path.exists(path)
+ elements = [{"src": "url", "url": "file://%s" % path}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 403)
+ assert os.path.exists(path)
class AdminsCanPasteFilePathsTestCase(BaseUploadContentConfigurationTestCase):
@@ -113,10 +199,38 @@ def test_admin_path_paste(self):
def test_admin_path_paste_libraries(self):
library = self.library_populator.new_private_library("pathpasteallowedlibraries")
- payload, files = self.library_populator.create_dataset_request(library, upload_option="upload_paths", paths="%s/1.txt" % TEST_DATA_DIRECTORY)
+ path = "%s/1.txt" % TEST_DATA_DIRECTORY
+ assert os.path.exists(path)
+ payload, files = self.library_populator.create_dataset_request(library, upload_option="upload_paths", paths=path)
response = self.library_populator.raw_library_contents_create(library["id"], payload, files=files)
# Was 403 for non-admin above.
assert response.status_code == 200
+ # Test regression where this was getting deleted in this mode.
+ assert os.path.exists(path)
+
+ def test_admin_fetch(self):
+ path = os.path.join(TEST_DATA_DIRECTORY, "1.txt")
+ elements = [{"src": "path", "path": path}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 200)
+ assert os.path.exists(path)
+
+ def test_admin_fetch_file_url(self):
+ path = os.path.join(TEST_DATA_DIRECTORY, "1.txt")
+ elements = [{"src": "url", "url": "file://%s" % path}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 200)
+ assert os.path.exists(path)
class DefaultBinaryContentFiltersTestCase(BaseUploadContentConfigurationTestCase):
@@ -212,6 +326,16 @@ def test_blocked_url_for_composite_file(self):
# the newer API decorator that handles those details.
assert create_response.status_code >= 400
+ def test_blocked_url_for_fetch(self):
+ elements = [{"src": "url", "url": "http://localhost"}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 403)
+
class BaseFtpUploadConfigurationTestCase(BaseUploadContentConfigurationTestCase):
@@ -228,7 +352,7 @@ def handle_extra_ftp_config(cls, config):
@classmethod
def ftp_dir(cls):
- return os.path.join(cls._test_driver.galaxy_test_tmp_dir, "ftp")
+ return cls.temp_config_dir("ftp")
def _check_content(self, dataset, content, ext="txt"):
dataset = self.dataset_populator.get_history_dataset_details(self.history_id, dataset=dataset)
@@ -251,6 +375,22 @@ def _ensure_directory(self, path):
if not os.path.exists(path):
os.makedirs(path)
+ def _run_purgable_upload(self):
+ # Purge setting is actually used with a fairly specific set of parameters - see:
+ # https://github.com/galaxyproject/galaxy/issues/5361
+ content = "hello world\n"
+ ftp_path = self._write_ftp_file(content)
+ ftp_files = self.dataset_populator.get_remote_files()
+ assert len(ftp_files) == 1
+ assert ftp_files[0]["path"] == "test"
+ assert os.path.exists(ftp_path)
+ # gotta set to_posix_lines to None currently to force purging of non-binary data.
+ dataset = self.dataset_populator.new_dataset(
+ self.history_id, ftp_files="test", file_type="txt", to_posix_lines=None, wait=True
+ )
+ self._check_content(dataset, content)
+ return ftp_path
+
class SimpleFtpUploadConfigurationTestCase(BaseFtpUploadConfigurationTestCase):
@@ -267,7 +407,29 @@ def test_ftp_upload(self):
self.history_id, ftp_files="test", file_type="txt", to_posix_lines=None, wait=True
)
self._check_content(dataset, content)
- assert not os.path.exists(ftp_path)
+
+ def test_ftp_fetch(self):
+ content = "hello world\n"
+ ftp_path = self._write_ftp_file(content)
+ ftp_files = self.dataset_populator.get_remote_files()
+ assert len(ftp_files) == 1, ftp_files
+ assert ftp_files[0]["path"] == "test"
+ assert os.path.exists(ftp_path)
+ elements = [{"src": "ftp_import", "ftp_path": ftp_files[0]["path"]}]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ "name": "cool collection",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 200)
+ response_object = response.json()
+ assert "output_collections" in response_object
+ output_collections = response_object["output_collections"]
+ assert len(output_collections) == 1, response_object
+ dataset = self.dataset_populator.get_history_dataset_details(self.history_id, hid=2)
+ self._check_content(dataset, content)
class ExplicitEmailAsIdentifierFtpUploadConfigurationTestCase(SimpleFtpUploadConfigurationTestCase):
@@ -305,21 +467,66 @@ def handle_extra_ftp_config(cls, config):
config["ftp_upload_purge"] = "False"
def test_ftp_uploads_not_purged(self):
- content = "hello world\n"
- ftp_path = self._write_ftp_file(content)
- ftp_files = self.dataset_populator.get_remote_files()
- assert len(ftp_files) == 1
- assert ftp_files[0]["path"] == "test"
- assert os.path.exists(ftp_path)
- # gotta set to_posix_lines to None currently to force purging of non-binary data.
- dataset = self.dataset_populator.new_dataset(
- self.history_id, ftp_files="test", file_type="txt", to_posix_lines=None, wait=True
- )
- self._check_content(dataset, content)
+ ftp_path = self._run_purgable_upload()
# Purge is disabled, this better still be here.
assert os.path.exists(ftp_path)
+class EnableFtpPurgeUploadConfigurationTestCase(BaseFtpUploadConfigurationTestCase):
+
+ @classmethod
+ def handle_extra_ftp_config(cls, config):
+ config["ftp_upload_purge"] = "True"
+
+ def test_ftp_uploads_not_purged(self):
+ ftp_path = self._run_purgable_upload()
+ assert not os.path.exists(ftp_path)
+
+
+class AdvancedFtpUploadFetchTestCase(BaseFtpUploadConfigurationTestCase):
+
+ def test_fetch_ftp_directory(self):
+ dir_path = self._get_user_ftp_path()
+ self._write_file(os.path.join(dir_path, "subdir"), "content 1", filename="1")
+ self._write_file(os.path.join(dir_path, "subdir"), "content 22", filename="2")
+ self._write_file(os.path.join(dir_path, "subdir"), "content 333", filename="3")
+ target = {
+ "destination": {"type": "hdca"},
+ "elements_from": "directory",
+ "src": "ftp_import",
+ "ftp_path": "subdir",
+ "collection_type": "list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 200)
+ hdca = self.dataset_populator.get_history_collection_details(self.history_id, hid=1)
+ assert len(hdca["elements"]) == 3, hdca
+ element0 = hdca["elements"][0]
+ assert element0["element_identifier"] == "1"
+ assert element0["object"]["file_size"] == 9
+
+ def test_fetch_nested_elements_from(self):
+ dir_path = self._get_user_ftp_path()
+ self._write_file(os.path.join(dir_path, "subdir1"), "content 1", filename="1")
+ self._write_file(os.path.join(dir_path, "subdir1"), "content 22", filename="2")
+ self._write_file(os.path.join(dir_path, "subdir2"), "content 333", filename="3")
+ elements = [
+ {"name": "subdirel1", "src": "ftp_import", "ftp_path": "subdir1", "elements_from": "directory", "collection_type": "list"},
+ {"name": "subdirel2", "src": "ftp_import", "ftp_path": "subdir2", "elements_from": "directory", "collection_type": "list"},
+ ]
+ target = {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list:list",
+ }
+ response = self.fetch_target(target)
+ self._assert_status_code_is(response, 200)
+ hdca = self.dataset_populator.get_history_collection_details(self.history_id, hid=1)
+ assert len(hdca["elements"]) == 2, hdca
+ element0 = hdca["elements"][0]
+ assert element0["element_identifier"] == "subdirel1"
+
+
class UploadOptionsFtpUploadConfigurationTestCase(BaseFtpUploadConfigurationTestCase):
def test_upload_api_option_space_to_tab(self):
@@ -428,7 +635,7 @@ def _copy_to_user_ftp_file(self, test_data_path):
shutil.copyfile(input_path, os.path.join(target_dir, test_data_path))
def _write_user_ftp_file(self, path, content):
- return self._write_ftp_file(content, filename=path)
+ return self._write_file(os.path.join(self.ftp_dir(), TEST_USER), content, filename=path)
class ServerDirectoryOffByDefaultTestCase(BaseUploadContentConfigurationTestCase):
@@ -448,27 +655,51 @@ def test_server_dir_uploads_403_if_dir_not_set(self):
class ServerDirectoryValidUsageTestCase(BaseUploadContentConfigurationTestCase):
+ # This tests the library contents API - I think equivalent functionality is available via library datasets API
+ # and should also be tested.
require_admin_user = True
@classmethod
def handle_galaxy_config_kwds(cls, config):
- library_import_dir = os.path.join(cls._test_driver.galaxy_test_tmp_dir, "library_import_dir")
- config["library_import_dir"] = library_import_dir
- cls.dir_to_import = 'library'
- full_dir_path = os.path.join(library_import_dir, cls.dir_to_import)
+ server_dir = cls.server_dir()
+ os.makedirs(server_dir)
+ config["library_import_dir"] = server_dir
+
+ def test_valid_server_dir_uploads_okay(self):
+ dir_to_import = 'library'
+ full_dir_path = os.path.join(self.server_dir(), dir_to_import)
os.makedirs(full_dir_path)
- cls.file_content = "create_test"
+ file_content = "hello world\n"
with tempfile.NamedTemporaryFile(dir=full_dir_path, delete=False) as fh:
- fh.write(cls.file_content)
- cls.file_to_import = fh.name
+ fh.write(file_content)
+ file_to_import = fh.name
- def test_valid_server_dir_uploads_okay(self):
- self.library_populator.new_library_dataset("serverdirupload", upload_option="upload_directory", server_dir=self.dir_to_import)
+ library_dataset = self.library_populator.new_library_dataset("serverdirupload", upload_option="upload_directory", server_dir=dir_to_import)
# Check the file is still there and was not modified
- with open(self.file_to_import) as fh:
+ with open(file_to_import) as fh:
read_content = fh.read()
- assert read_content == self.file_content
+ assert read_content == file_content
+
+ assert library_dataset["file_size"] == 12, library_dataset
+
+ def link_data_only(self):
+ content = "hello world\n"
+ dir_path = os.path.join(self.server_dir(), "lib1")
+ file_path = self._write_file(dir_path, content)
+ library = self.library_populator.new_private_library("serverdirupload")
+ # upload $GALAXY_ROOT/test-data/library
+ payload, files = self.library_populator.create_dataset_request(library, upload_option="upload_directory", server_dir="lib1", link_data=True)
+ response = self.library_populator.raw_library_contents_create(library["id"], payload, files=files)
+ assert response.status_code == 200, response.json()
+ dataset = response.json()[0]
+ ok_dataset = self.library_populator.wait_on_library_dataset(library, dataset)
+ assert ok_dataset["file_size"] == 12, ok_dataset
+ assert ok_dataset["file_name"] == file_path, ok_dataset
+
+ @classmethod
+ def server_dir(cls):
+ return cls.temp_config_dir("server")
class ServerDirectoryRestrictedToAdminsUsageTestCase(BaseUploadContentConfigurationTestCase):
@@ -483,3 +714,130 @@ def test_library_import_dir_not_available_to_non_admins(self):
payload, files = self.library_populator.create_dataset_request(library, upload_option="upload_directory", server_dir="library")
response = self.library_populator.raw_library_contents_create(library["id"], payload, files=files)
assert response.status_code == 403, response.json()
+
+
+class FetchByPathTestCase(BaseUploadContentConfigurationTestCase):
+
+ require_admin_user = True
+
+ @classmethod
+ def handle_galaxy_config_kwds(cls, config):
+ config["allow_path_paste"] = True
+
+ def test_fetch_path_to_folder(self):
+ history_id, library, destination = self.library_populator.setup_fetch_to_folder("simple_fetch")
+ bed_test_data_path = self.test_data_resolver.get_filename("4.bed")
+ assert os.path.exists(bed_test_data_path)
+ items = [{"src": "path", "path": bed_test_data_path, "info": "my cool bed"}]
+ targets = [{
+ "destination": destination,
+ "items": items
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+ assert dataset["file_size"] == 61, dataset
+ assert os.path.exists(bed_test_data_path)
+
+ def test_fetch_link_data_only(self):
+ history_id, library, destination = self.library_populator.setup_fetch_to_folder("fetch_and_link")
+ bed_test_data_path = self.test_data_resolver.get_filename("4.bed")
+ assert os.path.exists(bed_test_data_path)
+ items = [{"src": "path", "path": bed_test_data_path, "info": "my cool bed", "link_data_only": True}]
+ targets = [{
+ "destination": destination,
+ "items": items
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+ assert dataset["file_size"] == 61, dataset
+ assert dataset["file_name"] == bed_test_data_path, dataset
+ assert os.path.exists(bed_test_data_path)
+
+ def test_fetch_recursive_archive(self):
+ history_id, library, destination = self.library_populator.setup_fetch_to_folder("recursive_archive")
+ archive_test_data_path = self.test_data_resolver.get_filename("testdir1.zip")
+ targets = [{
+ "destination": destination,
+ "items_from": "archive", "src": "path", "path": archive_test_data_path,
+ }]
+ payload = {
+ "history_id": history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/file1")
+ assert dataset["file_size"] == 6, dataset
+
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/file2")
+ assert dataset["file_size"] == 6, dataset
+
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/dir1/file3")
+ assert dataset["file_size"] == 11, dataset
+
+ def test_fetch_history_compressed_type(self):
+ destination = {"type": "hdas"}
+ archive = self.test_data_resolver.get_filename("1.fastqsanger.gz")
+ targets = [{
+ "destination": destination,
+ "items": [{"src": "path", "path": archive, "ext": "fastqsanger.gz"}],
+ }]
+ payload = {
+ "history_id": self.history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ fetch_response = self.dataset_populator.fetch(payload)
+ self._assert_status_code_is(fetch_response, 200)
+ outputs = fetch_response.json()["outputs"]
+ assert len(outputs) == 1
+ output = outputs[0]
+ assert output["name"] == "1.fastqsanger.gz"
+ contents_response = self.dataset_populator._get_contents_request(self.history_id)
+ assert contents_response.status_code == 200
+ contents = contents_response.json()
+ assert len(contents) == 1, contents
+ assert contents[0]["extension"] == "fastqsanger.gz", contents[0]
+ assert contents[0]["name"] == "1.fastqsanger.gz", contents[0]
+ assert contents[0]["hid"] == 1, contents[0]
+
+ def test_fetch_recursive_archive_history(self):
+ destination = {"type": "hdas"}
+ archive = self.test_data_resolver.get_filename("testdir1.zip")
+ targets = [{
+ "destination": destination,
+ "items_from": "archive", "src": "path", "path": archive,
+ }]
+ payload = {
+ "history_id": self.history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ contents_response = self.dataset_populator._get_contents_request(self.history_id)
+ assert contents_response.status_code == 200
+ contents = contents_response.json()
+ assert len(contents) == 3
+
+ def test_fetch_recursive_archive_to_library(self):
+ bed_test_data_path = self.test_data_resolver.get_filename("testdir1.zip")
+ targets = [{
+ "destination": {"type": "library", "name": "My Cool Library"},
+ "items_from": "archive", "src": "path", "path": bed_test_data_path,
+ }]
+ payload = {
+ "history_id": self.history_id, # TODO: Shouldn't be needed :(
+ "targets": json.dumps(targets),
+ }
+ self.dataset_populator.fetch(payload)
+ libraries = self.library_populator.get_libraries()
+ matching = [l for l in libraries if l["name"] == "My Cool Library"]
+ assert len(matching) == 1
+ library = matching[0]
+ dataset = self.library_populator.get_library_contents_with_path(library["id"], "/file1")
+ assert dataset["file_size"] == 6, dataset
diff --git a/tools/data_source/upload.py b/tools/data_source/upload.py
index 59d8f46036f0..4e0cdcdb0429 100644
--- a/tools/data_source/upload.py
+++ b/tools/data_source/upload.py
@@ -18,8 +18,12 @@
from galaxy import util
from galaxy.datatypes import sniff
-from galaxy.datatypes.binary import Binary
from galaxy.datatypes.registry import Registry
+from galaxy.datatypes.upload_util import (
+ handle_sniffable_binary_check,
+ handle_unsniffable_binary_check,
+ UploadProblemException,
+)
from galaxy.util.checkers import (
check_binary,
check_bz2,
@@ -36,12 +40,6 @@
assert sys.version_info[:2] >= (2, 7)
-class UploadProblemException(Exception):
-
- def __init__(self, message):
- self.message = message
-
-
def file_err(msg, dataset, json_file):
json_file.write(dumps(dict(type='dataset',
ext='data',
@@ -83,7 +81,10 @@ def add_file(dataset, registry, json_file, output_path):
line_count = None
converted_path = None
stdout = None
- link_data_only = dataset.get('link_data_only', 'copy_files') != 'copy_files'
+ link_data_only_str = dataset.get('link_data_only', 'copy_files')
+ if link_data_only_str not in ['link_data_only', 'copy_files']:
+ raise UploadProblemException("Invalid setting for option link_data_only - upload request misconfigured.")
+ link_data_only = link_data_only_str == 'link_data_only'
# run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
# If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
@@ -120,26 +121,21 @@ def add_file(dataset, registry, json_file, output_path):
if dataset.type == 'url':
try:
- page = urlopen(dataset.path) # page will be .close()ed by sniff methods
- temp_name = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
+ dataset.path = sniff.stream_url_to_file(dataset.path)
except Exception as e:
raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, str(e)))
- dataset.path = temp_name
+
# See if we have an empty file
if not os.path.exists(dataset.path):
raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path)
+
if not os.path.getsize(dataset.path) > 0:
raise UploadProblemException('The uploaded file is empty')
+
# Is dataset content supported sniffable binary?
is_binary = check_binary(dataset.path)
if is_binary:
- # Sniff the data type
- guessed_ext = sniff.guess_ext(dataset.path, registry.sniff_order)
- # Set data_type only if guessed_ext is a binary datatype
- datatype = registry.get_datatype_by_extension(guessed_ext)
- if isinstance(datatype, Binary):
- data_type = guessed_ext
- ext = guessed_ext
+ data_type, ext = handle_sniffable_binary_check(data_type, ext, dataset.path, registry)
if not data_type:
root_datatype = registry.get_datatype_by_extension(dataset.file_type)
if getattr(root_datatype, 'compressed', False):
@@ -262,18 +258,9 @@ def add_file(dataset, registry, json_file, output_path):
dataset.name = uncompressed_name
data_type = 'zip'
if not data_type:
- if is_binary or registry.is_extension_unsniffable_binary(dataset.file_type):
- # We have a binary dataset, but it is not Bam, Sff or Pdf
- data_type = 'binary'
- parts = dataset.name.split(".")
- if len(parts) > 1:
- ext = parts[-1].strip().lower()
- is_ext_unsniffable_binary = registry.is_extension_unsniffable_binary(ext)
- if check_content and not is_ext_unsniffable_binary:
- raise UploadProblemException('The uploaded binary file contains inappropriate content')
- elif is_ext_unsniffable_binary and dataset.file_type != ext:
- err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext, ext)
- raise UploadProblemException(err_msg)
+ data_type, ext = handle_unsniffable_binary_check(
+ data_type, ext, dataset.path, dataset.name, is_binary, dataset.file_type, check_content, registry
+ )
if not data_type:
# We must have a text file
if check_content and check_html(dataset.path):
@@ -290,7 +277,7 @@ def add_file(dataset, registry, json_file, output_path):
else:
line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
if dataset.file_type == 'auto':
- ext = sniff.guess_ext(dataset.path, registry.sniff_order)
+ ext = sniff.guess_ext(converted_path or dataset.path, registry.sniff_order)
else:
ext = dataset.file_type
data_type = ext
@@ -302,7 +289,7 @@ def add_file(dataset, registry, json_file, output_path):
if ext == 'auto':
ext = 'data'
datatype = registry.get_datatype_by_extension(ext)
- if dataset.type in ('server_dir', 'path_paste') and link_data_only:
+ if link_data_only:
# Never alter a file that will not be copied to Galaxy's local file store.
if datatype.dataset_content_needs_grooming(dataset.path):
err_msg = 'The uploaded files need grooming, so change your Copy data into Galaxy? selection to be ' + \