From bdb8d119ca657677df0252896ea1cc62745e1018 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 17 Dec 2020 16:45:41 -0500 Subject: [PATCH] private objectstores & dataset.sharable Setup abstractions to prevent sharing transient or private per-user objects in an objectstore. --- client/src/mvc/library/library-model.js | 2 +- lib/galaxy/job_execution/output_collect.py | 2 +- lib/galaxy/jobs/__init__.py | 4 +- lib/galaxy/jobs/runners/__init__.py | 7 +- lib/galaxy/model/__init__.py | 45 ++++++- lib/galaxy/model/security.py | 54 +++++++-- lib/galaxy/objectstore/__init__.py | 90 +++++++++++--- lib/galaxy/objectstore/azure_blob.py | 1 + lib/galaxy/objectstore/cloud.py | 1 + lib/galaxy/objectstore/irods.py | 2 + lib/galaxy/objectstore/pithos.py | 2 + lib/galaxy/objectstore/s3.py | 2 + lib/galaxy/tools/actions/upload_common.py | 4 +- .../webapps/galaxy/api/history_contents.py | 7 ++ .../webapps/galaxy/controllers/dataset.py | 10 +- .../api/test_dataset_collections.py | 2 +- lib/galaxy_test/api/test_libraries.py | 9 +- lib/galaxy_test/api/test_tools.py | 2 +- lib/galaxy_test/base/populators.py | 27 ++++- .../objectstore/test_private_handling.py | 50 ++++++++ test/unit/data/test_galaxy_mapping.py | 114 ++++++++++++++++-- test/unit/objectstore/test_objectstore.py | 24 +++- 22 files changed, 403 insertions(+), 58 deletions(-) create mode 100644 test/integration/objectstore/test_private_handling.py diff --git a/client/src/mvc/library/library-model.js b/client/src/mvc/library/library-model.js index d9cccef37128..da48ab9154f0 100644 --- a/client/src/mvc/library/library-model.js +++ b/client/src/mvc/library/library-model.js @@ -140,7 +140,7 @@ var HistoryContents = Backbone.Collection.extend({ this.id = options.id; }, url: function () { - return `${this.urlRoot + this.id}/contents`; + return `${this.urlRoot + this.id}/contents?sharable=true`; }, model: HistoryItem, }); diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index ec57e16e6e2f..fb41e1686146 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -260,7 +260,7 @@ def add_library_dataset_to_folder(self, library_folder, ld): # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset trans.app.security_agent.copy_library_permissions(trans, ld, ldda) # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset - trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user)) + trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), flush=False, new=True) library_folder.add_library_dataset(ld, genome_build=ldda.dbkey) trans.sa_session.add(library_folder) trans.sa_session.flush() diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index bc679123ca62..6d1e896295f3 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1472,6 +1472,8 @@ def _set_object_store_ids(self, job): object_store_populator = ObjectStorePopulator(self.app, job.user) object_store_id = self.get_destination_configuration("object_store_id", None) + require_sharable = job.requires_sharable_storage(self.app.security_agent) + if object_store_id: object_store_populator.object_store_id = object_store_id @@ -1483,7 +1485,7 @@ def _set_object_store_ids(self, job): # afterward. State below needs to happen the same way. for dataset_assoc in job.output_datasets + job.output_library_datasets: dataset = dataset_assoc.dataset - object_store_populator.set_object_store_id(dataset) + object_store_populator.set_object_store_id(dataset, require_sharable=require_sharable) job.object_store_id = object_store_populator.object_store_id self._setup_working_directory(job=job) diff --git a/lib/galaxy/jobs/runners/__init__.py b/lib/galaxy/jobs/runners/__init__.py index ee7b97589746..3dac81864508 100644 --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -148,7 +148,12 @@ def put(self, job_wrapper): """Add a job to the queue (by job identifier), indicate that the job is ready to run. """ put_timer = ExecutionTimer() - job_wrapper.enqueue() + try: + job_wrapper.enqueue() + except Exception as e: + job_wrapper.fail(str(e), exception=e) + log.debug(f"Job [{job_wrapper.job_id}] failed to queue {put_timer}") + return self.mark_as_queued(job_wrapper) log.debug(f"Job [{job_wrapper.job_id}] queued {put_timer}") diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 403c151b6206..433686d21b22 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -87,6 +87,7 @@ JOB_METRIC_SCALE = 7 # Tags that get automatically propagated from inputs to outputs when running jobs. AUTO_PROPAGATED_TAGS = ["name"] +CANNOT_SHARE_PRIVATE_DATASET_MESSAGE = "Attempting to share a non-sharable dataset." class RepresentById: @@ -1100,6 +1101,19 @@ def remap_objects(p, k, obj): job_attrs['params'] = params_dict return job_attrs + def requires_sharable_storage(self, security_agent): + # An easy optimization would be to calculate this in galaxy.tools.actions when the + # job is created and all the output permissions are already known. Having to reload + # these permissions in the job code shouldn't strictly be needed. + + requires_sharing = False + for dataset_assoc in self.output_datasets + self.output_library_datasets: + if not security_agent.dataset_is_private_to_a_user(dataset_assoc.dataset.dataset): + requires_sharing = True + break + + return requires_sharing + def to_dict(self, view='collection', system_details=False): rval = super().to_dict(view=view) rval['tool_id'] = self.tool_id @@ -2099,7 +2113,12 @@ def __filter_contents(self, content_class, **kwds): visible = galaxy.util.string_as_bool_or_none(kwds.get('visible', None)) if visible is not None: query = query.filter(content_class.visible == visible) + if 'object_store_ids' in kwds: + if content_class == HistoryDatasetAssociation: + query = query.join(content_class.dataset).filter(Dataset.table.c.object_store_id.in_(kwds.get("object_store_ids"))) + # else ignoring object_store_ids on HDCAs... if 'ids' in kwds: + assert 'object_store_ids' not in kwds ids = kwds['ids'] max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH) if len(ids) < max_in_filter_length: @@ -2358,11 +2377,24 @@ def is_new(self): def in_ready_state(self): return self.state in self.ready_states + @property + def sharable(self): + """Return True if placed into an objectstore not labeled as ``private``.""" + if self.external_filename: + return True + else: + object_store = self._assert_object_store_set() + return not object_store.is_private(self) + + def ensure_sharable(self): + if not self.sharable: + raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) + def get_file_name(self): if not self.external_filename: - assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id - if self.object_store.exists(self): - return self.object_store.get_filename(self) + object_store = self._assert_object_store_set() + if object_store.exists(self): + return object_store.get_filename(self) else: return '' else: @@ -2377,6 +2409,10 @@ def set_file_name(self, filename): self.external_filename = filename file_name = property(get_file_name, set_file_name) + def _assert_object_store_set(self): + assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id + return self.object_store + def get_extra_files_path(self): # Unlike get_file_name - external_extra_files_path is not backed by an # actual database column so if SA instantiates this object - the @@ -3229,6 +3265,9 @@ def to_library_dataset_dataset_association(self, trans, target_folder, replace_d """ Copy this HDA to a library optionally replacing an existing LDDA. """ + if not self.dataset.sharable: + raise Exception("Attempting to share a non-sharable dataset.") + if replace_dataset: # The replace_dataset param ( when not None ) refers to a LibraryDataset that # is being replaced with a new version. diff --git a/lib/galaxy/model/security.py b/lib/galaxy/model/security.py index 9e05283202ee..d01c25a713cd 100644 --- a/lib/galaxy/model/security.py +++ b/lib/galaxy/model/security.py @@ -810,16 +810,23 @@ def set_all_dataset_permissions(self, dataset, permissions={}, new=False, flush= """ # Make sure that DATASET_MANAGE_PERMISSIONS is associated with at least 1 role has_dataset_manage_permissions = False - for action, roles in permissions.items(): - if isinstance(action, Action): - if action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS and roles: - has_dataset_manage_permissions = True - break - elif action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS.action and roles: - has_dataset_manage_permissions = True - break + for action, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_MANAGE_PERMISSIONS): + has_dataset_manage_permissions = True + break if not has_dataset_manage_permissions: return "At least 1 role must be associated with manage permissions on this dataset." + + # If this is new, the objectstore likely hasn't been set yet - defer check until + # the job handler assigns it. + if not new and not dataset.sharable: + # ensure dataset not shared. + dataset_access_roles = [] + for action, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_ACCESS): + dataset_access_roles.extend(roles) + + if len(dataset_access_roles) != 1 or dataset_access_roles[0].type != self.model.Role.types.PRIVATE: + return galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE + flush_needed = False # Delete all of the current permissions on the dataset if not new: @@ -847,6 +854,12 @@ def set_dataset_permission(self, dataset, permission={}): Set a specific permission on a dataset, leaving all other current permissions on the dataset alone. Permission looks like: { Action.action : [ Role, Role ] } """ + + # if modifying access - ensure it is sharable. + for action, roles in _walk_action_roles(permission, self.permitted_actions.DATASET_ACCESS): + dataset.ensure_sharable() + break + flush_needed = False for action, roles in permission.items(): if isinstance(action, Action): @@ -886,6 +899,7 @@ def copy_dataset_permissions(self, src, dst): self.set_all_dataset_permissions(dst, self.get_permissions(src)) def privately_share_dataset(self, dataset, users=[]): + dataset.ensure_sharable() intersect = None for user in users: roles = [ura.role for ura in user.roles if ura.role.type == self.model.Role.types.SHARING] @@ -1056,6 +1070,19 @@ def dataset_is_private_to_user(self, trans, dataset): else: return False + def dataset_is_private_to_a_user(self, dataset): + """ + If the Dataset object has exactly one access role and that is + the current user's private role then we consider the dataset private. + """ + access_roles = dataset.get_access_roles(self) + + if len(access_roles) != 1: + return False + else: + access_role = access_roles[0] + return access_role.type == self.model.Role.types.PRIVATE + def datasets_are_public(self, trans, datasets): ''' Given a transaction object and a list of Datasets, return @@ -1084,6 +1111,8 @@ def datasets_are_public(self, trans, datasets): def make_dataset_public(self, dataset): # A dataset is considered public if there are no "access" actions associated with it. Any # other actions ( 'manage permissions', 'edit metadata' ) are irrelevant. + dataset.ensure_sharable() + flush_needed = False for dp in dataset.actions: if dp.action == self.permitted_actions.DATASET_ACCESS.action: @@ -1463,3 +1492,12 @@ def set_dataset_permissions(self, hda, user, site): hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization(hda=hda, user=user, site=site) self.sa_session.add(hdadaa) self.sa_session.flush() + + +def _walk_action_roles(permissions, query_action): + for action, roles in permissions.items(): + if isinstance(action, Action): + if action == query_action and roles: + yield action, roles + elif action == query_action.action and roles: + yield action, roles diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index d967c0df1a80..2f6937cab1a8 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -18,6 +18,7 @@ from galaxy.exceptions import ObjectInvalid, ObjectNotFound from galaxy.util import ( + asbool, directory_hash_id, force_symlink, parse_xml, @@ -31,6 +32,7 @@ from galaxy.util.sleeper import Sleeper NO_SESSION_ERROR_MESSAGE = "Attempted to 'create' object store entity in configuration with no database session present." +DEFAULT_PRIVATE = False log = logging.getLogger(__name__) @@ -91,6 +93,9 @@ def create(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_a This method will create a proper directory structure for the file if the directory does not already exist. + + The method returns the concrete objectstore the supplied object is stored + in. """ raise NotImplementedError() @@ -193,6 +198,17 @@ def get_concrete_store_description_markdown(self, obj): be returned for the ConcreteObjectStore corresponding to the object. """ + @abc.abstractmethod + def is_private(self, obj): + """Return True iff supplied object is stored in private ConcreteObjectStore.""" + + def object_store_ids(self, private=None): + """Return IDs of all concrete object stores - either private ones or non-private ones. + + This should just return an empty list for non-nested object stores. + """ + return [] + @abc.abstractmethod def get_store_usage_percent(self): """Return the percentage indicating how full the store is.""" @@ -233,6 +249,8 @@ def __init__(self, config, config_dict=None, **kwargs): extra_dirs.update({ e['type']: e['path'] for e in config_dict.get('extra_dirs', [])}) self.extra_dirs = extra_dirs + # Annotate this as true to prevent sharing of data. + self.private = config_dict.get("private", DEFAULT_PRIVATE) def shutdown(self): """Close any connections for this ObjectStore.""" @@ -264,10 +282,13 @@ def to_dict(self): extra_dirs = [] for extra_dir_type, extra_dir_path in self.extra_dirs.items(): extra_dirs.append({"type": extra_dir_type, "path": extra_dir_path}) + private = self.private + store_type = self.store_type return { 'config': config_to_dict(self.config), 'extra_dirs': extra_dirs, - 'type': self.store_type, + 'type': store_type, + 'private': private } def _get_object_id(self, obj): @@ -324,6 +345,16 @@ def get_store_usage_percent(self): def get_store_by(self, obj, **kwargs): return self._invoke('get_store_by', obj, **kwargs) + def is_private(self, obj): + return self._invoke('is_private', obj) + + @classmethod + def parse_private_from_config_xml(clazz, config_xml): + private = DEFAULT_PRIVATE + if config_xml is not None: + private = asbool(config_xml.attrib.get('private', DEFAULT_PRIVATE)) + return private + class ConcreteObjectStore(BaseObjectStore): """Subclass of ObjectStore for stores that don't delegate (non-nested). @@ -368,6 +399,9 @@ def _get_concrete_store_description_markdown(self, obj): def _get_store_by(self, obj): return self.store_by + def _is_private(self, obj): + return self.private + class DiskObjectStore(ConcreteObjectStore): """ @@ -380,7 +414,7 @@ class DiskObjectStore(ConcreteObjectStore): >>> file_path=tempfile.mkdtemp() >>> obj = Bunch(id=1) >>> s = DiskObjectStore(Bunch(umask=0o077, jobs_directory=file_path, new_file_path=file_path, object_store_check_old_style=False), dict(files_dir=file_path)) - >>> s.create(obj) + >>> o = s.create(obj) >>> s.exists(obj) True >>> assert s.get_filename(obj) == file_path + '/000/dataset_1.dat' @@ -426,6 +460,7 @@ def parse_xml(clazz, config_xml): extra_dirs.append({"type": e.get('type'), "path": e.get('path')}) config_dict["extra_dirs"] = extra_dirs + config_dict["private"] = BaseObjectStore.parse_private_from_config_xml(config_xml) return config_dict def to_dict(self): @@ -537,6 +572,7 @@ def _create(self, obj, **kwargs): if not dir_only: open(path, 'w').close() # Should be rb? umask_fix_perms(path, self.config.umask, 0o666) + return self def _empty(self, obj, **kwargs): """Override `ObjectStore`'s stub by checking file size on disk.""" @@ -667,7 +703,8 @@ def file_ready(self, obj, **kwargs): def _create(self, obj, **kwargs): """Create a backing file in a random backend.""" - random.choice(list(self.backends.values())).create(obj, **kwargs) + objectstore = random.choice(list(self.backends.values())) + return objectstore.create(obj, **kwargs) def _empty(self, obj, **kwargs): """For the first backend that has this `obj`, determine if it is empty.""" @@ -701,10 +738,22 @@ def _get_object_url(self, obj, **kwargs): return self._call_method('_get_object_url', obj, None, False, **kwargs) def _get_concrete_store_name(self, obj): - return self._call_method('_get_concrete_store_name', obj, None, True) + return self._call_method('_get_concrete_store_name', obj, ObjectNotFound, True) def _get_concrete_store_description_markdown(self, obj): - return self._call_method('_get_concrete_store_description_markdown', obj, None, True) + return self._call_method('_get_concrete_store_description_markdown', obj, ObjectNotFound, True) + + def _is_private(self, obj): + return self._call_method('_is_private', obj, ObjectNotFound, True) + + def object_store_ids(self, private=None): + object_store_ids = [] + for backend_id, backend in self.backends.items(): + print(backend_id) + object_store_ids.extend(backend.object_store_ids(private=private)) + if backend.private is private or private is None: + object_store_ids.append(backend_id) + return object_store_ids def _get_store_by(self, obj): return self._call_method('_get_store_by', obj, None, False) @@ -874,20 +923,24 @@ def __filesystem_monitor(self): def _create(self, obj, **kwargs): """The only method in which obj.object_store_id may be None.""" - if obj.object_store_id is None or not self._exists(obj, **kwargs): - if obj.object_store_id is None or obj.object_store_id not in self.backends: + object_store_id = obj.object_store_id + if object_store_id is None or not self._exists(obj, **kwargs): + if object_store_id is None or object_store_id not in self.backends: try: - obj.object_store_id = random.choice(self.weighted_backend_ids) + object_store_id = random.choice(self.weighted_backend_ids) + obj.object_store_id = object_store_id except IndexError: raise ObjectInvalid('objectstore.create, could not generate ' - 'obj.object_store_id: %s, kwargs: %s' + 'object_store_id: %s, kwargs: %s' % (str(obj), str(kwargs))) log.debug("Selected backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id)) + % (object_store_id, obj.__class__.__name__, obj.id)) else: log.debug("Using preferred backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id)) - self.backends[obj.object_store_id].create(obj, **kwargs) + % (object_store_id, obj.__class__.__name__, obj.id)) + return self.backends[object_store_id].create(obj, **kwargs) + else: + return self.backends[object_store_id] def _call_method(self, method, obj, default, default_is_exception, **kwargs): object_store_id = self.__get_store_id_for(obj, **kwargs) @@ -948,7 +1001,9 @@ def parse_xml(clazz, config_xml): backend_config_dict["type"] = store_type backends_list.append(backend_config_dict) - return {"backends": backends_list} + config_dict = {"backends": backends_list} + config_dict["private"] = BaseObjectStore.parse_private_from_config_xml(config_xml) + return config_dict def to_dict(self): as_dict = super().to_dict() @@ -968,7 +1023,7 @@ def _exists(self, obj, **kwargs): def _create(self, obj, **kwargs): """Call the primary object store.""" - self.backends[0].create(obj, **kwargs) + return self.backends[0].create(obj, **kwargs) def type_to_object_store_class(store, fsmon=False): @@ -1122,13 +1177,16 @@ def __init__(self, app, user): self.object_store_id = None self.user = user - def set_object_store_id(self, data): + def set_object_store_id(self, data, require_sharable=False): # Create an empty file immediately. The first dataset will be # created in the "default" store, all others will be created in # the same store as the first. data.dataset.object_store_id = self.object_store_id try: - self.object_store.create(data.dataset) + ensure_non_private = require_sharable + concrete_store = self.object_store.create(data.dataset, ensure_non_private=ensure_non_private) + if concrete_store.private and require_sharable: + raise Exception("Attempted to create shared output datasets in objectstore with sharing disabled") except ObjectInvalid: raise Exception('Unable to create output dataset: object store is full') self.object_store_id = data.dataset.object_store_id # these will be the same thing after the first output diff --git a/lib/galaxy/objectstore/azure_blob.py b/lib/galaxy/objectstore/azure_blob.py index e77dedaa6794..ce2457c4a65c 100644 --- a/lib/galaxy/objectstore/azure_blob.py +++ b/lib/galaxy/objectstore/azure_blob.py @@ -74,6 +74,7 @@ def parse_config_xml(config_xml): 'path': staging_path, }, 'extra_dirs': extra_dirs, + 'private': ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. diff --git a/lib/galaxy/objectstore/cloud.py b/lib/galaxy/objectstore/cloud.py index 3cd34fbbef79..7b7ca7ac67f0 100644 --- a/lib/galaxy/objectstore/cloud.py +++ b/lib/galaxy/objectstore/cloud.py @@ -571,6 +571,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % self._get_object_id(obj)) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='') + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index eed996b37b0b..7ef40a8fb4d3 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -105,6 +105,7 @@ def parse_config_xml(config_xml): 'path': staging_path, }, 'extra_dirs': extra_dirs, + 'private': DiskObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -515,6 +516,7 @@ def _create(self, obj, **kwargs): open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_irods(rel_path, from_string='') log.debug("irods_pt _create: %s", ipt_timer) + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/pithos.py b/lib/galaxy/objectstore/pithos.py index 9cab7fffc020..a079739751c4 100644 --- a/lib/galaxy/objectstore/pithos.py +++ b/lib/galaxy/objectstore/pithos.py @@ -67,6 +67,7 @@ def parse_config_xml(config_xml): raise Exception(msg) r['extra_dirs'] = [ {k: e.get(k) for k in attrs} for e in extra_dirs] + r['private'] = ConcreteObjectStore.parse_private_from_config_xml(config_xml) if 'job_work' not in (d['type'] for d in r['extra_dirs']): msg = f'No value for {tag}:type="job_work" in XML tree' log.error(msg) @@ -287,6 +288,7 @@ def _create(self, obj, **kwargs): new_file = os.path.join(self.staging_path, rel_path) open(new_file, 'w').close() self.pithos.upload_from_string(rel_path, '') + return self def _empty(self, obj, **kwargs): """ diff --git a/lib/galaxy/objectstore/s3.py b/lib/galaxy/objectstore/s3.py index 5759cacb59c6..bb10037d8e02 100644 --- a/lib/galaxy/objectstore/s3.py +++ b/lib/galaxy/objectstore/s3.py @@ -96,6 +96,7 @@ def parse_config_xml(config_xml): 'path': staging_path, }, 'extra_dirs': extra_dirs, + 'private': ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -571,6 +572,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % self._get_object_id(obj)) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='') + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py index ce763c00c6c2..990a9a3cb924 100644 --- a/lib/galaxy/tools/actions/upload_common.py +++ b/lib/galaxy/tools/actions/upload_common.py @@ -188,7 +188,7 @@ def __new_history_upload(trans, uploaded_dataset, history=None, state=None): trans.sa_session.flush() history.add_dataset(hda, genome_build=uploaded_dataset.dbkey, quota=False) permissions = trans.app.security_agent.history_get_default_permissions(history) - trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions) + trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions, new=True, flush=False) trans.sa_session.flush() return hda @@ -251,7 +251,7 @@ def __new_library_upload(trans, cntrller, uploaded_dataset, library_bunch, tag_h trans.app.security_agent.copy_dataset_permissions(library_bunch.replace_dataset.library_dataset_dataset_association.dataset, ldda.dataset) else: # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset - trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user)) + trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), new=True) folder.add_library_dataset(ld, genome_build=uploaded_dataset.dbkey) trans.sa_session.add(folder) trans.sa_session.flush() diff --git a/lib/galaxy/webapps/galaxy/api/history_contents.py b/lib/galaxy/webapps/galaxy/api/history_contents.py index 2882f9351d9e..217a1038b120 100644 --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -102,6 +102,13 @@ def index(self, trans, history_id, ids=None, v=None, **kwd): else: contents_kwds['deleted'] = kwd.get('deleted', None) contents_kwds['visible'] = kwd.get('visible', None) + sharable = kwd.get('sharable', None) + if sharable is not None: + sharable = util.string_as_bool(sharable) + object_store_ids = self.app.object_store.object_store_ids(private=not sharable) + if object_store_ids: + contents_kwds['object_store_ids'] = object_store_ids + # details param allows a mixed set of summary and detailed hdas # Ever more convoluted due to backwards compat..., details # should be considered deprecated in favor of more specific diff --git a/lib/galaxy/webapps/galaxy/controllers/dataset.py b/lib/galaxy/webapps/galaxy/controllers/dataset.py index 7a46670b5f78..41465ba56a32 100644 --- a/lib/galaxy/webapps/galaxy/controllers/dataset.py +++ b/lib/galaxy/webapps/galaxy/controllers/dataset.py @@ -274,7 +274,15 @@ def get_edit(self, trans, dataset_id=None, **kwd): permission_disable = True permission_inputs = list() if trans.user: - if data.dataset.actions: + if not data.dataset.sharable: + permission_message = 'The dataset is stored on private storage to you and cannot be shared.' + permission_inputs.append({ + 'name' : 'not_sharable', + 'type' : 'hidden', + 'label' : permission_message, + 'readonly' : True + }) + elif data.dataset.actions: in_roles = {} for action, roles in trans.app.security_agent.get_permissions(data.dataset).items(): in_roles[action.action] = [trans.security.encode_id(role.id) for role in roles] diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index 07d26763d7c6..f4720039c620 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -160,7 +160,7 @@ def test_list_list_list_download(self): assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist def test_hda_security(self): - element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id) + element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id, wait=True) self.dataset_populator.make_private(self.history_id, element_identifiers[0]["id"]) with self._different_user(): history_id = self.dataset_populator.new_history() diff --git a/lib/galaxy_test/api/test_libraries.py b/lib/galaxy_test/api/test_libraries.py index 907b2b22f3d7..ee0fe56c9e74 100644 --- a/lib/galaxy_test/api/test_libraries.py +++ b/lib/galaxy_test/api/test_libraries.py @@ -83,7 +83,7 @@ def test_create_dataset_denied(self): self._assert_status_code_is(folder_response, 200) folder_id = folder_response.json()[0]['id'] history_id = self.dataset_populator.new_history() - hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id'] + hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3", wait=True)['id'] with self._different_user(): payload = {'from_hda_id': hda_id} create_response = self._post("folders/%s/contents" % folder_id, payload) @@ -258,7 +258,7 @@ def test_create_dataset_in_folder(self): self._assert_status_code_is(folder_response, 200) folder_id = folder_response.json()[0]['id'] history_id = self.dataset_populator.new_history() - hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id'] + hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3", wait=True)['id'] payload = {'from_hda_id': hda_id} create_response = self._post("folders/%s/contents" % folder_id, payload) self._assert_status_code_is(create_response, 200) @@ -274,7 +274,7 @@ def test_create_dataset_in_subfolder(self): print(subfolder_response.json()) subfolder_id = subfolder_response.json()['id'] history_id = self.dataset_populator.new_history() - hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3 sub")['id'] + hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3 sub", wait=True)['id'] payload = {'from_hda_id': hda_id} create_response = self._post("folders/%s/contents" % subfolder_id, payload) self._assert_status_code_is(create_response, 200) @@ -432,7 +432,8 @@ def _create_dataset_in_folder_in_library(self, library_name): self._assert_status_code_is(folder_response, 200) folder_id = folder_response.json()[0]['id'] history_id = self.dataset_populator.new_history() - hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id'] + hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3", wait=True)['id'] payload = {'from_hda_id': hda_id, 'create_type': 'file', 'folder_id': folder_id} ld = self._post("libraries/%s/contents" % folder_id, payload) + ld.raise_for_status() return ld diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 1ba097a5117a..781bf128628a 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -748,7 +748,7 @@ def assert_inputs(inputs, can_be_used=True): else: self._assert_dataset_permission_denied_response(response) - new_dataset = self.dataset_populator.new_dataset(history_id, content='Cat1Test') + new_dataset = self.dataset_populator.new_dataset(history_id, content='Cat1Test', wait=True) inputs = dict( input1=dataset_to_param(new_dataset), ) diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 4c29b96d3526..da3a1dcc25df 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -575,10 +575,23 @@ def make_private(self, history_id, dataset_id): "access": json.dumps([role_id]), "manage": json.dumps([role_id]), } + response = self.update_permissions_raw(history_id, dataset_id, payload) + response.raise_for_status() + return response.json() + + def make_public_raw(self, history_id, dataset_id): + role_id = self.user_private_role_id() + payload = { + "access": json.dumps([]), + "manage": json.dumps([role_id]), + } + response = self.update_permissions_raw(history_id, dataset_id, payload) + return response + + def update_permissions_raw(self, history_id, dataset_id, payload): url = f"histories/{history_id}/contents/{dataset_id}/permissions" update_response = self.galaxy_interactor._put(url, payload, admin=True) - assert update_response.status_code == 200, update_response.content - return update_response.json() + return update_response def validate_dataset(self, history_id, dataset_id): url = f"histories/{history_id}/contents/{dataset_id}/validate" @@ -1354,8 +1367,8 @@ def __create_payload_collection(self, history_id, identifiers_func, collection_t ) return payload - def pair_identifiers(self, history_id, contents=None): - hda1, hda2 = self.__datasets(history_id, count=2, contents=contents) + def pair_identifiers(self, history_id, contents=None, wait=False): + hda1, hda2 = self.__datasets(history_id, count=2, contents=contents, wait=wait) element_identifiers = [ dict(name="forward", src="hda", id=hda1["id"]), @@ -1389,10 +1402,12 @@ def __create(self, payload): else: return self.dataset_populator.fetch(payload) - def __datasets(self, history_id, count, contents=None): + def __datasets(self, history_id, count, contents=None, wait=False): datasets = [] for i in range(count): - new_kwds = {} + new_kwds = { + 'wait': wait, + } if contents: new_kwds["content"] = contents[i] datasets.append(self.dataset_populator.new_dataset(history_id, **new_kwds)) diff --git a/test/integration/objectstore/test_private_handling.py b/test/integration/objectstore/test_private_handling.py new file mode 100644 index 000000000000..efc0eb84be2d --- /dev/null +++ b/test/integration/objectstore/test_private_handling.py @@ -0,0 +1,50 @@ +"""Integration tests for mixing store_by.""" + +import string + +from galaxy_test.base import api_asserts +from ._base import BaseObjectStoreIntegrationTestCase + +PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE = string.Template(""" + + + + + +""") + +TEST_INPUT_FILES_CONTENT = "1 2 3" + + +class PrivatePreventsSharingObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = True + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + """Test each object store configures files correctly. + """ + with self.dataset_populator.test_history() as history_id: + hda = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True) + content = self.dataset_populator.get_history_dataset_content(history_id, hda["id"]) + assert content.startswith(TEST_INPUT_FILES_CONTENT) + response = self.dataset_populator.make_public_raw(history_id, hda["id"]) + assert response.status_code != 200 + api_asserts.assert_error_message_contains(response, "Attempting to share a non-sharable dataset.") + + +class PrivateCannotWritePublicDataObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = False + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + with self.dataset_populator.test_history() as history_id: + response = self.dataset_populator.new_dataset_request(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True, assert_ok=False) + job = response.json()["jobs"][0] + final_state = self.dataset_populator.wait_for_job(job["id"]) + assert final_state == "error" diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index 2b0b5ecc301d..7c8af04c0e5b 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -23,6 +23,7 @@ not os.environ.get('GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE'), reason="GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE not set" ) +PRIVATE_OBJECT_STORE_ID = "my_private_data" class BaseModelTestCase(unittest.TestCase): @@ -204,11 +205,13 @@ def assert_display_name_converts_to_unicode(item, name): assert history.get_display_name() == 'Hello₩◎ґʟⅾ' def test_hda_to_library_dataset_dataset_association(self): + model = self.model u = self.model.User(email="mary@example.com", password="password") - hda = self.model.HistoryDatasetAssociation(name='hda_name') + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation(name='hda_name', create_dataset=True, history=h1, sa_session=model.session) self.persist(hda) trans = collections.namedtuple('trans', 'user') - target_folder = self.model.LibraryFolder(name='library_folder') + target_folder = model.LibraryFolder(name='library_folder') ldda = hda.to_library_dataset_dataset_association( trans=trans(user=u), target_folder=target_folder, @@ -233,6 +236,21 @@ def test_hda_to_library_dataset_dataset_association(self): assert new_ldda.library_dataset.expired_datasets[0] == ldda assert target_folder.item_count == 1 + def test_hda_to_library_dataset_dataset_association_fails_if_private(self): + model = self.model + u = model.User(email="mary2@example.com", password="password") + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation(name='hda_name', create_dataset=True, history=h1, sa_session=model.session) + hda.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self.persist(hda) + trans = collections.namedtuple('trans', 'user') + target_folder = model.LibraryFolder(name='library_folder') + with pytest.raises(Exception): + hda.to_library_dataset_dataset_association( + trans=trans(user=u), + target_folder=target_folder, + ) + def test_tags(self): model = self.model @@ -433,8 +451,8 @@ def test_history_contents(self): self.persist(u, h1, expunge=False) d1 = self.new_hda(h1, name="1") - d2 = self.new_hda(h1, name="2", visible=False) - d3 = self.new_hda(h1, name="3", deleted=True) + d2 = self.new_hda(h1, name="2", visible=False, object_store_id="foobar") + d3 = self.new_hda(h1, name="3", deleted=True, object_store_id="three_store") d4 = self.new_hda(h1, name="4", visible=False, deleted=True) self.session().flush() @@ -448,8 +466,11 @@ def contents_iter_names(**kwds): self.assertEqual(contents_iter_names(), ["1", "2", "3", "4"]) assert contents_iter_names(deleted=False) == ["1", "2"] assert contents_iter_names(visible=True) == ["1", "3"] + assert contents_iter_names(visible=True, object_store_ids=["three_store"]) == ["3"] assert contents_iter_names(visible=False) == ["2", "4"] assert contents_iter_names(deleted=True, visible=False) == ["4"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar"]) == ["2"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar2"]) == [] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id]) == ["1", "2", "3", "4"] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id], max_in_filter_length=1) == ["1", "2", "3", "4"] @@ -738,6 +759,69 @@ def test_can_manage_private_dataset(self): assert security_agent.can_manage_dataset(u_from.all_roles(), d1.dataset) assert not security_agent.can_manage_dataset(u_other.all_roles(), d1.dataset) + def test_cannot_make_private_objectstore_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_public") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + self._make_owned(security_agent, u_from, d1) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_objectstore_dataset_shared(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_shared") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.privately_share_dataset(d1.dataset, [u_to]) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_set_dataset_permisson_on_private(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_set_permissions_on_private") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + role = security_agent.get_private_user_role(u_to, auto_create=True) + access_action = security_agent.permitted_actions.DATASET_ACCESS.action + + with pytest.raises(Exception) as exec_info: + security_agent.set_dataset_permission(d1.dataset, {access_action: [role]}) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, u_other = self._three_users("cannot_make_private_dataset_public") + + h = self.model.History(name="History for Annotation", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.make_dataset_public(d1.dataset) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + def _three_users(self, suffix): email_from = f"user_{suffix}e1@example.com" email_to = f"user_{suffix}e2@example.com" @@ -754,16 +838,26 @@ def _make_private(self, security_agent, user, hda): access_action = security_agent.permitted_actions.DATASET_ACCESS.action manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {access_action: [role], manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) def _make_owned(self, security_agent, user, hda): role = security_agent.get_private_user_role(user, auto_create=True) manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) + + def _set_permissions(self, security_agent, dataset, permissions): + # TODO: refactor set_all_dataset_permissions to actually throw an exception :| + error = security_agent.set_all_dataset_permissions(dataset, permissions) + if error: + raise Exception(error) def new_hda(self, history, **kwds): - return history.add_dataset(self.model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds)) + object_store_id = kwds.pop('object_store_id', None) + hda = self.model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds) + if object_store_id is not None: + hda.dataset.object_store_id = object_store_id + return history.add_dataset(hda) @skip_if_not_postgres_base @@ -795,6 +889,12 @@ def get_filename(self, *args, **kwds): def get_store_by(self, *args, **kwds): return 'id' + def is_private(self, object): + if object.object_store_id == PRIVATE_OBJECT_STORE_ID: + return True + else: + return False + def get_suite(): suite = unittest.TestSuite() diff --git a/test/unit/objectstore/test_objectstore.py b/test/unit/objectstore/test_objectstore.py index 7102dac0abf4..b500ab304289 100644 --- a/test/unit/objectstore/test_objectstore.py +++ b/test/unit/objectstore/test_objectstore.py @@ -273,15 +273,15 @@ def test_hierarchical_store(): _assert_key_has_value(as_dict, "type", "hierarchical") -MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG = """ - +MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG = """ + - + @@ -292,11 +292,23 @@ def test_hierarchical_store(): def test_mixed_store_by(): - with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): as_dict = object_store.to_dict() assert as_dict["backends"][0]["store_by"] == "id" assert as_dict["backends"][1]["store_by"] == "uuid" + ids = object_store.object_store_ids() + print(ids) + assert len(ids) == 2 + + ids = object_store.object_store_ids(private=True) + assert len(ids) == 1 + assert ids[0] == "files2" + + ids = object_store.object_store_ids(private=False) + assert len(ids) == 1 + assert ids[0] == "files1" + DISTRIBUTED_TEST_CONFIG = """ @@ -456,7 +468,7 @@ def test_config_parse_pithos(): assert len(extra_dirs) == 2 -S3_TEST_CONFIG = """ +S3_TEST_CONFIG = """ @@ -468,6 +480,7 @@ def test_config_parse_pithos(): S3_TEST_CONFIG_YAML = """ type: s3 +private: true auth: access_key: access_moo secret_key: secret_cow @@ -491,6 +504,7 @@ def test_config_parse_pithos(): def test_config_parse_s3(): for config_str in [S3_TEST_CONFIG, S3_TEST_CONFIG_YAML]: with TestConfig(config_str, clazz=UnitializeS3ObjectStore) as (directory, object_store): + assert object_store.private assert object_store.access_key == "access_moo" assert object_store.secret_key == "secret_cow"