From 77b8810b5ed927afed0d53ac82ad282ccc9a10d0 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 9 Jun 2022 09:41:34 -0400 Subject: [PATCH] private objectstores & dataset.sharable Setup abstractions to prevent sharing transient or private per-user objects in an objectstore. --- .../DatasetStorage/DatasetStorage.test.js | 8 +- .../Dataset/DatasetStorage/DatasetStorage.vue | 17 ++- .../ObjectStoreRestrictionSpan.test.js | 27 +++++ .../ObjectStoreRestrictionSpan.vue | 38 ++++++ client/src/mvc/library/library-model.js | 2 +- lib/galaxy/app.py | 2 + lib/galaxy/job_execution/output_collect.py | 2 +- lib/galaxy/jobs/__init__.py | 4 +- lib/galaxy/jobs/runners/__init__.py | 7 +- lib/galaxy/model/__init__.py | 45 ++++++- lib/galaxy/model/security.py | 54 +++++++-- lib/galaxy/objectstore/__init__.py | 100 ++++++++++++--- lib/galaxy/objectstore/azure_blob.py | 1 + lib/galaxy/objectstore/cloud.py | 1 + lib/galaxy/objectstore/irods.py | 2 + lib/galaxy/objectstore/pithos.py | 2 + lib/galaxy/objectstore/s3.py | 2 + lib/galaxy/tools/actions/upload_common.py | 4 +- lib/galaxy/webapps/galaxy/api/datasets.py | 1 + .../webapps/galaxy/api/history_contents.py | 19 +++ .../webapps/galaxy/controllers/dataset.py | 10 +- .../api/test_dataset_collections.py | 2 +- lib/galaxy_test/api/test_libraries.py | 5 +- lib/galaxy_test/api/test_tools.py | 2 +- lib/galaxy_test/base/populators.py | 26 +++- .../objectstore/test_private_handling.py | 50 ++++++++ test/unit/data/test_galaxy_mapping.py | 114 ++++++++++++++++-- test/unit/objectstore/test_objectstore.py | 57 ++++++++- 28 files changed, 548 insertions(+), 56 deletions(-) create mode 100644 client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js create mode 100644 client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue create mode 100644 test/integration/objectstore/test_private_handling.py diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js index 6b34492d590a..229c7e170c0c 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js @@ -10,14 +10,17 @@ const localVue = getLocalVue(); const TEST_STORAGE_API_RESPONSE_WITHOUT_ID = { object_store_id: null, + private: false, }; const TEST_STORAGE_API_RESPONSE_WITH_ID = { object_store_id: "foobar", + private: false, }; const TEST_STORAGE_API_RESPONSE_WITH_NAME = { object_store_id: "foobar", name: "my cool storage", description: "My cool **markdown**", + private: true, }; const TEST_DATASET_ID = "1"; const TEST_STORAGE_URL = `/api/datasets/${TEST_DATASET_ID}/storage`; @@ -46,9 +49,6 @@ describe("Dataset Storage", () => { wrapper = shallowMount(DatasetStorage, { propsData: { datasetId: TEST_DATASET_ID }, localVue, - stubs: { - "loading-span": true, - }, }); } @@ -102,6 +102,7 @@ describe("Dataset Storage", () => { expect(byIdSpan.length).toBe(1); const byNameSpan = wrapper.findAll(".display-os-by-name"); expect(byNameSpan.length).toBe(0); + expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeFalsy(); }); it("test dataset storage with object store name", async () => { @@ -116,6 +117,7 @@ describe("Dataset Storage", () => { expect(byIdSpan.length).toBe(0); const byNameSpan = wrapper.findAll(".display-os-by-name"); expect(byNameSpan.length).toBe(1); + expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeTruthy(); }); afterEach(() => { diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue index 37f661fe0a18..4aebe7321fb7 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue @@ -7,12 +7,16 @@

This dataset is stored in - a Galaxy object store named {{ storageInfo.name }} + a Galaxy object store named + {{ storageInfo.name }} - a Galaxy object store with id {{ storageInfo.object_store_id }} + a Galaxy object store with id + {{ storageInfo.object_store_id }} - the default configured Galaxy object store . + + the default configured Galaxy object store .

@@ -25,10 +29,12 @@ import { getAppRoot } from "onload/loadConfig"; import LoadingSpan from "components/LoadingSpan"; import MarkdownIt from "markdown-it"; import { errorMessageAsString } from "utils/simple-error"; +import ObjectStoreRestrictionSpan from "./ObjectStoreRestrictionSpan"; export default { components: { LoadingSpan, + ObjectStoreRestrictionSpan, }, props: { datasetId: { @@ -60,6 +66,11 @@ export default { this.errorMessage = errorMessageAsString(errorMessage); }); }, + computed: { + isPrivate() { + return this.storageInfo.private; + }, + }, methods: { handleResponse(response) { const storageInfo = response.data; diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js new file mode 100644 index 000000000000..0f199a2a8f51 --- /dev/null +++ b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js @@ -0,0 +1,27 @@ +import { shallowMount } from "@vue/test-utils"; +import { getLocalVue } from "jest/helpers"; +import ObjectStoreRestrictionSpan from "./ObjectStoreRestrictionSpan"; + +const localVue = getLocalVue(); + +describe("ObjectStoreRestrictionSpan", () => { + let wrapper; + + it("should render info about private storage if isPrivate", () => { + wrapper = shallowMount(ObjectStoreRestrictionSpan, { + propsData: { isPrivate: true }, + localVue, + }); + expect(wrapper.find(".stored-how").text()).toBe("private"); + expect(wrapper.find(".stored-how").attributes("title")).toBeTruthy(); + }); + + it("should render info about unrestricted storage if not isPrivate", () => { + wrapper = shallowMount(ObjectStoreRestrictionSpan, { + propsData: { isPrivate: false }, + localVue, + }); + expect(wrapper.find(".stored-how").text()).toBe("unrestricted"); + expect(wrapper.find(".stored-how").attributes("title")).toBeTruthy(); + }); +}); diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue new file mode 100644 index 000000000000..29d313a72142 --- /dev/null +++ b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue @@ -0,0 +1,38 @@ + + + + + diff --git a/client/src/mvc/library/library-model.js b/client/src/mvc/library/library-model.js index d409e1ed9dab..3824003ab801 100644 --- a/client/src/mvc/library/library-model.js +++ b/client/src/mvc/library/library-model.js @@ -144,7 +144,7 @@ var HistoryContents = Backbone.Collection.extend({ this.id = options.id; }, url: function () { - return `${this.urlRoot + this.id}/contents`; + return `${this.urlRoot + this.id}/contents?sharable=true`; }, model: HistoryItem, }); diff --git a/lib/galaxy/app.py b/lib/galaxy/app.py index db4b507609ed..774b5d209148 100644 --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -39,6 +39,7 @@ from galaxy.model.database_heartbeat import DatabaseHeartbeat from galaxy.model.mapping import GalaxyModelMapping from galaxy.model.tags import GalaxyTagHandler +from galaxy.objectstore import ObjectStore from galaxy.queue_worker import ( GalaxyQueueWorker, send_local_control_task, @@ -146,6 +147,7 @@ def __init__(self, fsmon=False, configure_logging=True, **kwargs) -> None: if configure_logging: config.configure_logging(self.config) self._configure_object_store(fsmon=True) + self._register_singleton(ObjectStore, self.object_store) config_file = kwargs.get('global_conf', {}).get('__file__', None) if config_file: log.debug('Using "galaxy.ini" config file: %s', config_file) diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index e06dea082f3b..c22da0055a2e 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -279,7 +279,7 @@ def add_library_dataset_to_folder(self, library_folder, ld): # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset trans.app.security_agent.copy_library_permissions(trans, ld, ldda) # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset - trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user)) + trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), flush=False, new=True) library_folder.add_library_dataset(ld, genome_build=ldda.dbkey) trans.sa_session.add(library_folder) trans.sa_session.flush() diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 82614ed7b420..36593e6ff2b5 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1514,6 +1514,8 @@ def _set_object_store_ids(self, job): object_store_populator = ObjectStorePopulator(self.app, job.user) object_store_id = self.get_destination_configuration("object_store_id", None) + require_sharable = job.requires_sharable_storage(self.app.security_agent) + if object_store_id: object_store_populator.object_store_id = object_store_id @@ -1525,7 +1527,7 @@ def _set_object_store_ids(self, job): # afterward. State below needs to happen the same way. for dataset_assoc in job.output_datasets + job.output_library_datasets: dataset = dataset_assoc.dataset - object_store_populator.set_object_store_id(dataset) + object_store_populator.set_object_store_id(dataset, require_sharable=require_sharable) job.object_store_id = object_store_populator.object_store_id self._setup_working_directory(job=job) diff --git a/lib/galaxy/jobs/runners/__init__.py b/lib/galaxy/jobs/runners/__init__.py index 0408c5ea7703..a9220023c7ab 100644 --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -156,7 +156,12 @@ def put(self, job_wrapper): """Add a job to the queue (by job identifier), indicate that the job is ready to run. """ put_timer = ExecutionTimer() - job_wrapper.enqueue() + try: + job_wrapper.enqueue() + except Exception as e: + job_wrapper.fail(str(e), exception=e) + log.debug(f"Job [{job_wrapper.job_id}] failed to queue {put_timer}") + return self.mark_as_queued(job_wrapper) log.debug(f"Job [{job_wrapper.job_id}] queued {put_timer}") diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 328bb841a55e..e0ffbb25e5b0 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -136,6 +136,7 @@ # Tags that get automatically propagated from inputs to outputs when running jobs. AUTO_PROPAGATED_TAGS = ["name"] YIELD_PER_ROWS = 100 +CANNOT_SHARE_PRIVATE_DATASET_MESSAGE = "Attempting to share a non-sharable dataset." if TYPE_CHECKING: @@ -1368,6 +1369,19 @@ def remap_objects(p, k, obj): job_attrs['params'] = params_dict return job_attrs + def requires_sharable_storage(self, security_agent): + # An easy optimization would be to calculate this in galaxy.tools.actions when the + # job is created and all the output permissions are already known. Having to reload + # these permissions in the job code shouldn't strictly be needed. + + requires_sharing = False + for dataset_assoc in self.output_datasets + self.output_library_datasets: + if not security_agent.dataset_is_private_to_a_user(dataset_assoc.dataset.dataset): + requires_sharing = True + break + + return requires_sharing + def to_dict(self, view='collection', system_details=False): if view == 'admin_job_list': rval = super().to_dict(view='collection') @@ -2790,7 +2804,12 @@ def __filter_contents(self, content_class, **kwds): visible = galaxy.util.string_as_bool_or_none(kwds.get('visible', None)) if visible is not None: query = query.filter(content_class.visible == visible) + if 'object_store_ids' in kwds: + if content_class == HistoryDatasetAssociation: + query = query.join(content_class.dataset).filter(Dataset.table.c.object_store_id.in_(kwds.get("object_store_ids"))) + # else ignoring object_store_ids on HDCAs... if 'ids' in kwds: + assert 'object_store_ids' not in kwds ids = kwds['ids'] max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH) if len(ids) < max_in_filter_length: @@ -3226,11 +3245,24 @@ def is_new(self): def in_ready_state(self): return self.state in self.ready_states + @property + def sharable(self): + """Return True if placed into an objectstore not labeled as ``private``.""" + if self.external_filename: + return True + else: + object_store = self._assert_object_store_set() + return not object_store.is_private(self) + + def ensure_sharable(self): + if not self.sharable: + raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) + def get_file_name(self): if not self.external_filename: - assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" - if self.object_store.exists(self): - return self.object_store.get_filename(self) + object_store = self._assert_object_store_set() + if object_store.exists(self): + return object_store.get_filename(self) else: return '' else: @@ -3245,6 +3277,10 @@ def set_file_name(self, filename): self.external_filename = filename file_name = property(get_file_name, set_file_name) + def _assert_object_store_set(self): + assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" + return self.object_store + def get_extra_files_path(self): # Unlike get_file_name - external_extra_files_path is not backed by an # actual database column so if SA instantiates this object - the @@ -4134,6 +4170,9 @@ def to_library_dataset_dataset_association(self, trans, target_folder, replace_d """ Copy this HDA to a library optionally replacing an existing LDDA. """ + if not self.dataset.sharable: + raise Exception("Attempting to share a non-sharable dataset.") + if replace_dataset: # The replace_dataset param ( when not None ) refers to a LibraryDataset that # is being replaced with a new version. diff --git a/lib/galaxy/model/security.py b/lib/galaxy/model/security.py index ddad0310c0e1..7a65a5b81c01 100644 --- a/lib/galaxy/model/security.py +++ b/lib/galaxy/model/security.py @@ -814,16 +814,23 @@ def set_all_dataset_permissions(self, dataset, permissions=None, new=False, flus # Make sure that DATASET_MANAGE_PERMISSIONS is associated with at least 1 role has_dataset_manage_permissions = False permissions = permissions or {} - for action, roles in permissions.items(): - if isinstance(action, Action): - if action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS and roles: - has_dataset_manage_permissions = True - break - elif action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS.action and roles: - has_dataset_manage_permissions = True - break + for _ in _walk_action_roles(permissions, self.permitted_actions.DATASET_MANAGE_PERMISSIONS): + has_dataset_manage_permissions = True + break if not has_dataset_manage_permissions: return "At least 1 role must be associated with manage permissions on this dataset." + + # If this is new, the objectstore likely hasn't been set yet - defer check until + # the job handler assigns it. + if not new and not dataset.sharable: + # ensure dataset not shared. + dataset_access_roles = [] + for _, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_ACCESS): + dataset_access_roles.extend(roles) + + if len(dataset_access_roles) != 1 or dataset_access_roles[0].type != self.model.Role.types.PRIVATE: + return galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE + flush_needed = False # Delete all of the current permissions on the dataset if not new: @@ -852,6 +859,12 @@ def set_dataset_permission(self, dataset, permission=None): Permission looks like: { Action.action : [ Role, Role ] } """ permission = permission or {} + + # if modifying access - ensure it is sharable. + for _ in _walk_action_roles(permission, self.permitted_actions.DATASET_ACCESS): + dataset.ensure_sharable() + break + flush_needed = False for action, roles in permission.items(): if isinstance(action, Action): @@ -891,6 +904,7 @@ def copy_dataset_permissions(self, src, dst): self.set_all_dataset_permissions(dst, self.get_permissions(src)) def privately_share_dataset(self, dataset, users=None): + dataset.ensure_sharable() intersect = None users = users or [] for user in users: @@ -1064,6 +1078,19 @@ def dataset_is_private_to_user(self, trans, dataset): else: return False + def dataset_is_private_to_a_user(self, dataset): + """ + If the Dataset object has exactly one access role and that is + the current user's private role then we consider the dataset private. + """ + access_roles = dataset.get_access_roles(self) + + if len(access_roles) != 1: + return False + else: + access_role = access_roles[0] + return access_role.type == self.model.Role.types.PRIVATE + def datasets_are_public(self, trans, datasets): ''' Given a transaction object and a list of Datasets, return @@ -1092,6 +1119,8 @@ def datasets_are_public(self, trans, datasets): def make_dataset_public(self, dataset): # A dataset is considered public if there are no "access" actions associated with it. Any # other actions ( 'manage permissions', 'edit metadata' ) are irrelevant. + dataset.ensure_sharable() + flush_needed = False for dp in dataset.actions: if dp.action == self.permitted_actions.DATASET_ACCESS.action: @@ -1481,3 +1510,12 @@ def set_dataset_permissions(self, hda, user, site): hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization(hda=hda, user=user, site=site) self.sa_session.add(hdadaa) self.sa_session.flush() + + +def _walk_action_roles(permissions, query_action): + for action, roles in permissions.items(): + if isinstance(action, Action): + if action == query_action and roles: + yield action, roles + elif action == query_action.action and roles: + yield action, roles diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 14638c305263..a87d6407308b 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -17,6 +17,7 @@ from galaxy.exceptions import ObjectInvalid, ObjectNotFound from galaxy.util import ( + asbool, directory_hash_id, force_symlink, parse_xml, @@ -30,6 +31,7 @@ from galaxy.util.sleeper import Sleeper NO_SESSION_ERROR_MESSAGE = "Attempted to 'create' object store entity in configuration with no database session present." +DEFAULT_PRIVATE = False log = logging.getLogger(__name__) @@ -90,6 +92,9 @@ def create(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_a This method will create a proper directory structure for the file if the directory does not already exist. + + The method returns the concrete objectstore the supplied object is stored + in. """ raise NotImplementedError() @@ -198,6 +203,19 @@ def get_concrete_store_description_markdown(self, obj): yet been set, this may return ``None``. """ + @abc.abstractmethod + def is_private(self, obj): + """Return True iff supplied object is stored in private ConcreteObjectStore.""" + + def object_store_ids(self, private=None): + """Return IDs of all concrete object stores - either private ones or non-private ones. + + This should just return an empty list for non-DistributedObjectStore object stores, + i.e. concrete objectstores and the HierarchicalObjectStore since these do not + use the object_store_id column for objects (Galaxy Datasets). + """ + return [] + @abc.abstractmethod def get_store_usage_percent(self): """Return the percentage indicating how full the store is.""" @@ -269,10 +287,11 @@ def to_dict(self): extra_dirs = [] for extra_dir_type, extra_dir_path in self.extra_dirs.items(): extra_dirs.append({"type": extra_dir_type, "path": extra_dir_path}) + store_type = self.store_type return { 'config': config_to_dict(self.config), 'extra_dirs': extra_dirs, - 'type': self.store_type, + 'type': store_type, } def _get_object_id(self, obj): @@ -329,6 +348,16 @@ def get_store_usage_percent(self): def get_store_by(self, obj, **kwargs): return self._invoke('get_store_by', obj, **kwargs) + def is_private(self, obj): + return self._invoke('is_private', obj) + + @classmethod + def parse_private_from_config_xml(clazz, config_xml): + private = DEFAULT_PRIVATE + if config_xml is not None: + private = asbool(config_xml.attrib.get('private', DEFAULT_PRIVATE)) + return private + class ConcreteObjectStore(BaseObjectStore): """Subclass of ObjectStore for stores that don't delegate (non-nested). @@ -356,9 +385,12 @@ def __init__(self, config, config_dict=None, **kwargs): self.store_by = config_dict.get("store_by", None) or getattr(config, "object_store_store_by", "id") self.name = config_dict.get("name", None) self.description = config_dict.get("description", None) + # Annotate this as true to prevent sharing of data. + self.private = config_dict.get("private", DEFAULT_PRIVATE) def to_dict(self): rval = super().to_dict() + rval['private'] = self.private rval["store_by"] = self.store_by rval["name"] = self.name rval["description"] = self.description @@ -373,6 +405,9 @@ def _get_concrete_store_description_markdown(self, obj): def _get_store_by(self, obj): return self.store_by + def _is_private(self, obj): + return self.private + class DiskObjectStore(ConcreteObjectStore): """ @@ -385,7 +420,7 @@ class DiskObjectStore(ConcreteObjectStore): >>> file_path=tempfile.mkdtemp() >>> obj = Bunch(id=1) >>> s = DiskObjectStore(Bunch(umask=0o077, jobs_directory=file_path, new_file_path=file_path, object_store_check_old_style=False), dict(files_dir=file_path)) - >>> s.create(obj) + >>> o = s.create(obj) >>> s.exists(obj) True >>> assert s.get_filename(obj) == file_path + '/000/dataset_1.dat' @@ -431,6 +466,7 @@ def parse_xml(clazz, config_xml): extra_dirs.append({"type": e.get('type'), "path": e.get('path')}) config_dict["extra_dirs"] = extra_dirs + config_dict["private"] = BaseObjectStore.parse_private_from_config_xml(config_xml) return config_dict def to_dict(self): @@ -542,6 +578,7 @@ def _create(self, obj, **kwargs): if not dir_only: open(path, 'w').close() # Should be rb? umask_fix_perms(path, self.config.umask, 0o666) + return self def _empty(self, obj, **kwargs): """Override `ObjectStore`'s stub by checking file size on disk.""" @@ -676,7 +713,8 @@ def file_ready(self, obj, **kwargs): def _create(self, obj, **kwargs): """Create a backing file in a random backend.""" - random.choice(list(self.backends.values())).create(obj, **kwargs) + objectstore = random.choice(list(self.backends.values())) + return objectstore.create(obj, **kwargs) def _empty(self, obj, **kwargs): """For the first backend that has this `obj`, determine if it is empty.""" @@ -715,6 +753,9 @@ def _get_concrete_store_name(self, obj): def _get_concrete_store_description_markdown(self, obj): return self._call_method('_get_concrete_store_description_markdown', obj, None, False) + def _is_private(self, obj): + return self._call_method('_is_private', obj, ObjectNotFound, True) + def _get_store_by(self, obj): return self._call_method('_get_store_by', obj, None, False) @@ -883,20 +924,24 @@ def __filesystem_monitor(self): def _create(self, obj, **kwargs): """The only method in which obj.object_store_id may be None.""" - if obj.object_store_id is None or not self._exists(obj, **kwargs): - if obj.object_store_id is None or obj.object_store_id not in self.backends: + object_store_id = obj.object_store_id + if object_store_id is None or not self._exists(obj, **kwargs): + if object_store_id is None or object_store_id not in self.backends: try: - obj.object_store_id = random.choice(self.weighted_backend_ids) + object_store_id = random.choice(self.weighted_backend_ids) + obj.object_store_id = object_store_id except IndexError: raise ObjectInvalid('objectstore.create, could not generate ' - 'obj.object_store_id: %s, kwargs: %s' + 'object_store_id: %s, kwargs: %s' % (str(obj), str(kwargs))) log.debug("Selected backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id)) + % (object_store_id, obj.__class__.__name__, obj.id)) else: log.debug("Using preferred backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id)) - self.backends[obj.object_store_id].create(obj, **kwargs) + % (object_store_id, obj.__class__.__name__, obj.id)) + return self.backends[object_store_id].create(obj, **kwargs) + else: + return self.backends[object_store_id] def _call_method(self, method, obj, default, default_is_exception, **kwargs): object_store_id = self.__get_store_id_for(obj, **kwargs) @@ -926,6 +971,14 @@ def __get_store_id_for(self, obj, **kwargs): return id return None + def object_store_ids(self, private=None): + object_store_ids = [] + for backend_id, backend in self.backends.items(): + object_store_ids.extend(backend.object_store_ids(private=private)) + if backend.private is private or private is None: + object_store_ids.append(backend_id) + return object_store_ids + class HierarchicalObjectStore(NestedObjectStore): @@ -942,22 +995,31 @@ def __init__(self, config, config_dict, fsmon=False): super().__init__(config, config_dict) backends = {} + is_private = config_dict.get("private", DEFAULT_PRIVATE) for order, backend_def in enumerate(config_dict["backends"]): + backend_is_private = backend_def.get("private") + if backend_is_private is not None: + assert is_private == backend_is_private, "The private attribute must be defined on the HierarchicalObjectStore and not contained concrete objectstores." backends[order] = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) self.backends = backends + self.private = is_private @classmethod def parse_xml(clazz, config_xml): backends_list = [] + is_private = BaseObjectStore.parse_private_from_config_xml(config_xml) for b in sorted(config_xml.find('backends'), key=lambda b: int(b.get('order'))): store_type = b.get("type") objectstore_class, _ = type_to_object_store_class(store_type) backend_config_dict = objectstore_class.parse_xml(b) + backend_config_dict["private"] = is_private backend_config_dict["type"] = store_type backends_list.append(backend_config_dict) - return {"backends": backends_list} + config_dict = {"backends": backends_list} + config_dict["private"] = is_private + return config_dict def to_dict(self): as_dict = super().to_dict() @@ -966,6 +1028,7 @@ def to_dict(self): backend_as_dict = backend.to_dict() backends.append(backend_as_dict) as_dict["backends"] = backends + as_dict["private"] = self.private return as_dict def _exists(self, obj, **kwargs): @@ -977,7 +1040,13 @@ def _exists(self, obj, **kwargs): def _create(self, obj, **kwargs): """Call the primary object store.""" - self.backends[0].create(obj, **kwargs) + return self.backends[0].create(obj, **kwargs) + + def _is_private(self, obj): + # Unlink the DistributedObjectStore - the HierarchicalObjectStore does not use + # object_store_id - so all the contained object stores need to define is_private + # the same way. + return self.private def type_to_object_store_class(store, fsmon=False): @@ -1131,13 +1200,16 @@ def __init__(self, app, user): self.object_store_id = None self.user = user - def set_object_store_id(self, data): + def set_object_store_id(self, data, require_sharable=False): # Create an empty file immediately. The first dataset will be # created in the "default" store, all others will be created in # the same store as the first. data.dataset.object_store_id = self.object_store_id try: - self.object_store.create(data.dataset) + ensure_non_private = require_sharable + concrete_store = self.object_store.create(data.dataset, ensure_non_private=ensure_non_private) + if concrete_store.private and require_sharable: + raise Exception("Attempted to create shared output datasets in objectstore with sharing disabled") except ObjectInvalid: raise Exception('Unable to create output dataset: object store is full') self.object_store_id = data.dataset.object_store_id # these will be the same thing after the first output diff --git a/lib/galaxy/objectstore/azure_blob.py b/lib/galaxy/objectstore/azure_blob.py index 51405dc4fa71..f1f87cbc12ee 100644 --- a/lib/galaxy/objectstore/azure_blob.py +++ b/lib/galaxy/objectstore/azure_blob.py @@ -75,6 +75,7 @@ def parse_config_xml(config_xml): 'path': staging_path, }, 'extra_dirs': extra_dirs, + 'private': ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. diff --git a/lib/galaxy/objectstore/cloud.py b/lib/galaxy/objectstore/cloud.py index f53efbfcefac..43bb809bd1d1 100644 --- a/lib/galaxy/objectstore/cloud.py +++ b/lib/galaxy/objectstore/cloud.py @@ -572,6 +572,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else f"dataset_{self._get_object_id(obj)}.dat") open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='') + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index 9b9bfa4bf5c0..43d5d7f03fb6 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -103,6 +103,7 @@ def parse_config_xml(config_xml): 'path': staging_path, }, 'extra_dirs': extra_dirs, + 'private': DiskObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -505,6 +506,7 @@ def _create(self, obj, **kwargs): open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_irods(rel_path, from_string='') log.debug("irods_pt _create: %s", ipt_timer) + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/pithos.py b/lib/galaxy/objectstore/pithos.py index 501e21afd17a..f29454ffc041 100644 --- a/lib/galaxy/objectstore/pithos.py +++ b/lib/galaxy/objectstore/pithos.py @@ -66,6 +66,7 @@ def parse_config_xml(config_xml): raise Exception(msg) r['extra_dirs'] = [ {k: e.get(k) for k in attrs} for e in extra_dirs] + r['private'] = ConcreteObjectStore.parse_private_from_config_xml(config_xml) if 'job_work' not in (d['type'] for d in r['extra_dirs']): msg = f'No value for {tag}:type="job_work" in XML tree' log.error(msg) @@ -285,6 +286,7 @@ def _create(self, obj, **kwargs): new_file = os.path.join(self.staging_path, rel_path) open(new_file, 'w').close() self.pithos.upload_from_string(rel_path, '') + return self def _empty(self, obj, **kwargs): """ diff --git a/lib/galaxy/objectstore/s3.py b/lib/galaxy/objectstore/s3.py index 0f7079fe78f4..060f98d0fda0 100644 --- a/lib/galaxy/objectstore/s3.py +++ b/lib/galaxy/objectstore/s3.py @@ -97,6 +97,7 @@ def parse_config_xml(config_xml): 'path': staging_path, }, 'extra_dirs': extra_dirs, + 'private': ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -577,6 +578,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else f"dataset_{self._get_object_id(obj)}.dat") open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='') + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py index 08401661eb4f..bffc3f9446cd 100644 --- a/lib/galaxy/tools/actions/upload_common.py +++ b/lib/galaxy/tools/actions/upload_common.py @@ -190,7 +190,7 @@ def __new_history_upload(trans, uploaded_dataset, history=None, state=None): trans.sa_session.flush() history.add_dataset(hda, genome_build=uploaded_dataset.dbkey, quota=False) permissions = trans.app.security_agent.history_get_default_permissions(history) - trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions) + trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions, new=True, flush=False) trans.sa_session.flush() return hda @@ -253,7 +253,7 @@ def __new_library_upload(trans, cntrller, uploaded_dataset, library_bunch, tag_h trans.app.security_agent.copy_dataset_permissions(library_bunch.replace_dataset.library_dataset_dataset_association.dataset, ldda.dataset) else: # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset - trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user)) + trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), new=True) folder.add_library_dataset(ld, genome_build=uploaded_dataset.dbkey) trans.sa_session.add(folder) trans.sa_session.flush() diff --git a/lib/galaxy/webapps/galaxy/api/datasets.py b/lib/galaxy/webapps/galaxy/api/datasets.py index 70af10fbb0b0..4b33781f202b 100644 --- a/lib/galaxy/webapps/galaxy/api/datasets.py +++ b/lib/galaxy/webapps/galaxy/api/datasets.py @@ -184,6 +184,7 @@ def show_storage(self, trans, dataset_id, hda_ldda='hda', **kwd): return { 'object_store_id': object_store_id, + 'sharable': dataset.sharable, 'name': name, 'description': description, 'percent_used': percent_used, diff --git a/lib/galaxy/webapps/galaxy/api/history_contents.py b/lib/galaxy/webapps/galaxy/api/history_contents.py index 6a49c36d29af..13a14a899e45 100644 --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -48,6 +48,7 @@ LibraryDataset, ) from galaxy.model.security import GalaxyRBACAgent +from galaxy.objectstore import ObjectStore from galaxy.schema import FilterQueryParams from galaxy.schema.fields import ( EncodedDatabaseIdField, @@ -159,6 +160,11 @@ class HistoryContentsIndexLegacyParams(HistoryContentsIndexLegacyParamsBase): "instead of the *summary* default information." ), ) + sharable: Optional[bool] = Field( + default=None, + title="Sharable", + description="Whether to return only sharable or not sharable datasets. Leave unset for both.", + ) class ParsedHistoryContentsLegacyParams(HistoryContentsIndexLegacyParamsBase): @@ -180,6 +186,11 @@ class ParsedHistoryContentsLegacyParams(HistoryContentsIndexLegacyParamsBase): "information or the value `all` to return (full) details for all datasets." ), ) + object_store_ids: Optional[List[str]] = Field( + default=None, + title="Object store IDs", + description="A list of object store ids to filter this down to.", + ) class CreateHistoryContentPayloadBase(Model): @@ -287,6 +298,7 @@ class HistoriesContentsService(ServiceBase): def __init__( self, security: IdEncodingHelper, + object_store: ObjectStore, history_manager: histories.HistoryManager, history_contents_manager: history_contents.HistoryContentsManager, hda_manager: hdas.HDAManager, @@ -299,6 +311,7 @@ def __init__( history_contents_filters: history_contents.HistoryContentsFilters, ): super().__init__(security) + self.object_store = object_store self.history_manager = history_manager self.history_contents_manager = history_contents_manager self.hda_manager = hda_manager @@ -1082,12 +1095,18 @@ def _parse_legacy_contents_params(self, params: HistoryContentsIndexLegacyParams if details and details != 'all': details = set(util.listify(details)) + object_store_ids = None + sharable = params.sharable + if sharable is not None: + object_store_ids = self.object_store.object_store_ids(private=not sharable) + return ParsedHistoryContentsLegacyParams( types=types, ids=ids, deleted=params.deleted, visible=params.visible, dataset_details=details, + object_store_ids=object_store_ids, ).dict(exclude_defaults=True) def __collection_dict(self, trans, dataset_collection_instance, **kwds): diff --git a/lib/galaxy/webapps/galaxy/controllers/dataset.py b/lib/galaxy/webapps/galaxy/controllers/dataset.py index 9d43d140650b..0e5e2b677f54 100644 --- a/lib/galaxy/webapps/galaxy/controllers/dataset.py +++ b/lib/galaxy/webapps/galaxy/controllers/dataset.py @@ -278,7 +278,15 @@ def get_edit(self, trans, dataset_id=None, **kwd): permission_disable = True permission_inputs = list() if trans.user: - if data.dataset.actions: + if not data.dataset.sharable: + permission_message = 'The dataset is stored on private storage to you and cannot be shared.' + permission_inputs.append({ + 'name': 'not_sharable', + 'type': 'hidden', + 'label': permission_message, + 'readonly': True + }) + elif data.dataset.actions: in_roles = {} for action, roles in trans.app.security_agent.get_permissions(data.dataset).items(): in_roles[action.action] = [trans.security.encode_id(role.id) for role in roles] diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index ace5cdb0661b..38dcd8dde879 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -160,7 +160,7 @@ def test_list_list_list_download(self): assert len(namelist) == 3, f"Expected 3 elements in [{namelist}]" def test_hda_security(self): - element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id) + element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id, wait=True) self.dataset_populator.make_private(self.history_id, element_identifiers[0]["id"]) with self._different_user(): history_id = self.dataset_populator.new_history() diff --git a/lib/galaxy_test/api/test_libraries.py b/lib/galaxy_test/api/test_libraries.py index f965733b9a02..ec53fb7eb050 100644 --- a/lib/galaxy_test/api/test_libraries.py +++ b/lib/galaxy_test/api/test_libraries.py @@ -342,7 +342,7 @@ def test_create_dataset_in_folder(self): self._assert_status_code_is(folder_response, 200) folder_id = folder_response.json()[0]['id'] history_id = self.dataset_populator.new_history() - hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id'] + hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3", wait=True)['id'] payload = {'from_hda_id': hda_id} create_response = self._post(f"folders/{folder_id}/contents", payload) self._assert_status_code_is(create_response, 200) @@ -358,7 +358,7 @@ def test_create_dataset_in_subfolder(self): print(subfolder_response.json()) subfolder_id = subfolder_response.json()['id'] history_id = self.dataset_populator.new_history() - hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3 sub")['id'] + hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3 sub", wait=True)['id'] payload = {'from_hda_id': hda_id} create_response = self._post(f"folders/{subfolder_id}/contents", payload) self._assert_status_code_is(create_response, 200) @@ -518,4 +518,5 @@ def _create_dataset_in_folder_in_library(self, library_name, content="1 2 3", wa hda_id = self.dataset_populator.new_dataset(history_id, content=content, wait=wait)['id'] payload = {'from_hda_id': hda_id, 'create_type': 'file', 'folder_id': folder_id} ld = self._post(f"libraries/{folder_id}/contents", payload) + ld.raise_for_status() return ld diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 28e9953a4f26..feec5575d9cf 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -893,7 +893,7 @@ def assert_inputs(inputs, can_be_used=True): else: self._assert_dataset_permission_denied_response(response) - new_dataset = self.dataset_populator.new_dataset(history_id, content='Cat1Test') + new_dataset = self.dataset_populator.new_dataset(history_id, content='Cat1Test', wait=True) inputs = dict( input1=dataset_to_param(new_dataset), ) diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 8fa7b04e8bdf..ad2cc10fbb11 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -651,9 +651,23 @@ def make_private(self, history_id: str, dataset_id: str) -> dict: "access": [role_id], "manage": [role_id], } + response = self.update_permissions_raw(history_id, dataset_id, payload) + response.raise_for_status() + return response.json() + + def make_public_raw(self, history_id, dataset_id): + role_id = self.user_private_role_id() + payload = { + "access": json.dumps([]), + "manage": json.dumps([role_id]), + } + response = self.update_permissions_raw(history_id, dataset_id, payload) + return response + + def update_permissions_raw(self, history_id, dataset_id, payload): url = f"histories/{history_id}/contents/{dataset_id}/permissions" update_response = self._put(url, payload, admin=True, json=True) - assert update_response.status_code == 200, update_response.content + update_response.raise_for_status() return update_response.json() def validate_dataset(self, history_id, dataset_id): @@ -1585,8 +1599,8 @@ def __create_payload_collection(self, history_id, identifiers_func, collection_t ) return payload - def pair_identifiers(self, history_id, contents=None): - hda1, hda2 = self.__datasets(history_id, count=2, contents=contents) + def pair_identifiers(self, history_id, contents=None, wait=False): + hda1, hda2 = self.__datasets(history_id, count=2, contents=contents, wait=wait) element_identifiers = [ dict(name="forward", src="hda", id=hda1["id"]), @@ -1620,10 +1634,12 @@ def __create(self, payload): else: return self.dataset_populator.fetch(payload) - def __datasets(self, history_id, count, contents=None): + def __datasets(self, history_id, count, contents=None, wait=False): datasets = [] for i in range(count): - new_kwds = {} + new_kwds = { + 'wait': wait, + } if contents: new_kwds["content"] = contents[i] datasets.append(self.dataset_populator.new_dataset(history_id, **new_kwds)) diff --git a/test/integration/objectstore/test_private_handling.py b/test/integration/objectstore/test_private_handling.py new file mode 100644 index 000000000000..efc0eb84be2d --- /dev/null +++ b/test/integration/objectstore/test_private_handling.py @@ -0,0 +1,50 @@ +"""Integration tests for mixing store_by.""" + +import string + +from galaxy_test.base import api_asserts +from ._base import BaseObjectStoreIntegrationTestCase + +PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE = string.Template(""" + + + + + +""") + +TEST_INPUT_FILES_CONTENT = "1 2 3" + + +class PrivatePreventsSharingObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = True + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + """Test each object store configures files correctly. + """ + with self.dataset_populator.test_history() as history_id: + hda = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True) + content = self.dataset_populator.get_history_dataset_content(history_id, hda["id"]) + assert content.startswith(TEST_INPUT_FILES_CONTENT) + response = self.dataset_populator.make_public_raw(history_id, hda["id"]) + assert response.status_code != 200 + api_asserts.assert_error_message_contains(response, "Attempting to share a non-sharable dataset.") + + +class PrivateCannotWritePublicDataObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = False + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + with self.dataset_populator.test_history() as history_id: + response = self.dataset_populator.new_dataset_request(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True, assert_ok=False) + job = response.json()["jobs"][0] + final_state = self.dataset_populator.wait_for_job(job["id"]) + assert final_state == "error" diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index b054cea93bf7..e418f8aff466 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -26,6 +26,7 @@ not os.environ.get('GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE'), reason="GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE not set" ) +PRIVATE_OBJECT_STORE_ID = "my_private_data" class BaseModelTestCase(unittest.TestCase): @@ -205,11 +206,13 @@ def assert_display_name_converts_to_unicode(item, name): assert history.get_display_name() == 'Hello₩◎ґʟⅾ' def test_hda_to_library_dataset_dataset_association(self): + model = self.model u = self.model.User(email="mary@example.com", password="password") - hda = self.model.HistoryDatasetAssociation(name='hda_name') + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation(name='hda_name', create_dataset=True, history=h1, sa_session=model.session) self.persist(hda) trans = collections.namedtuple('trans', 'user') - target_folder = self.model.LibraryFolder(name='library_folder') + target_folder = model.LibraryFolder(name='library_folder') ldda = hda.to_library_dataset_dataset_association( trans=trans(user=u), target_folder=target_folder, @@ -234,6 +237,21 @@ def test_hda_to_library_dataset_dataset_association(self): assert new_ldda.library_dataset.expired_datasets[0] == ldda assert target_folder.item_count == 1 + def test_hda_to_library_dataset_dataset_association_fails_if_private(self): + model = self.model + u = model.User(email="mary2@example.com", password="password") + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation(name='hda_name', create_dataset=True, history=h1, sa_session=model.session) + hda.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self.persist(hda) + trans = collections.namedtuple('trans', 'user') + target_folder = model.LibraryFolder(name='library_folder') + with pytest.raises(Exception): + hda.to_library_dataset_dataset_association( + trans=trans(user=u), + target_folder=target_folder, + ) + def test_tags(self): model = self.model @@ -601,8 +619,8 @@ def test_history_contents(self): self.persist(u, h1, expunge=False) d1 = self.new_hda(h1, name="1") - d2 = self.new_hda(h1, name="2", visible=False) - d3 = self.new_hda(h1, name="3", deleted=True) + d2 = self.new_hda(h1, name="2", visible=False, object_store_id="foobar") + d3 = self.new_hda(h1, name="3", deleted=True, object_store_id="three_store") d4 = self.new_hda(h1, name="4", visible=False, deleted=True) self.session().flush() @@ -616,8 +634,11 @@ def contents_iter_names(**kwds): self.assertEqual(contents_iter_names(), ["1", "2", "3", "4"]) assert contents_iter_names(deleted=False) == ["1", "2"] assert contents_iter_names(visible=True) == ["1", "3"] + assert contents_iter_names(visible=True, object_store_ids=["three_store"]) == ["3"] assert contents_iter_names(visible=False) == ["2", "4"] assert contents_iter_names(deleted=True, visible=False) == ["4"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar"]) == ["2"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar2"]) == [] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id]) == ["1", "2", "3", "4"] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id], max_in_filter_length=1) == ["1", "2", "3", "4"] @@ -946,6 +967,69 @@ def test_can_manage_private_dataset(self): assert security_agent.can_manage_dataset(u_from.all_roles(), d1.dataset) assert not security_agent.can_manage_dataset(u_other.all_roles(), d1.dataset) + def test_cannot_make_private_objectstore_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_public") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + self._make_owned(security_agent, u_from, d1) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_objectstore_dataset_shared(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_shared") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.privately_share_dataset(d1.dataset, [u_to]) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_set_dataset_permisson_on_private(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_set_permissions_on_private") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + role = security_agent.get_private_user_role(u_to, auto_create=True) + access_action = security_agent.permitted_actions.DATASET_ACCESS.action + + with pytest.raises(Exception) as exec_info: + security_agent.set_dataset_permission(d1.dataset, {access_action: [role]}) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, u_other = self._three_users("cannot_make_private_dataset_public") + + h = self.model.History(name="History for Annotation", user=u_from) + d1 = self.model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=self.model.session) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.make_dataset_public(d1.dataset) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + def _three_users(self, suffix): email_from = f"user_{suffix}e1@example.com" email_to = f"user_{suffix}e2@example.com" @@ -962,16 +1046,26 @@ def _make_private(self, security_agent, user, hda): access_action = security_agent.permitted_actions.DATASET_ACCESS.action manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {access_action: [role], manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) def _make_owned(self, security_agent, user, hda): role = security_agent.get_private_user_role(user, auto_create=True) manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) + + def _set_permissions(self, security_agent, dataset, permissions): + # TODO: refactor set_all_dataset_permissions to actually throw an exception :| + error = security_agent.set_all_dataset_permissions(dataset, permissions) + if error: + raise Exception(error) def new_hda(self, history, **kwds): - return history.add_dataset(self.model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds)) + object_store_id = kwds.pop('object_store_id', None) + hda = self.model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds) + if object_store_id is not None: + hda.dataset.object_store_id = object_store_id + return history.add_dataset(hda) @skip_if_not_postgres_base @@ -1006,6 +1100,12 @@ def get_store_by(self, *args, **kwds): def update_from_file(self, *arg, **kwds): pass + def is_private(self, object): + if object.object_store_id == PRIVATE_OBJECT_STORE_ID: + return True + else: + return False + def get_suite(): suite = unittest.TestSuite() diff --git a/test/unit/objectstore/test_objectstore.py b/test/unit/objectstore/test_objectstore.py index 13d7104d748d..6b8d74018392 100644 --- a/test/unit/objectstore/test_objectstore.py +++ b/test/unit/objectstore/test_objectstore.py @@ -301,8 +301,26 @@ def test_concrete_name_without_objectstore_id(): assert files1_name is None +MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG = """ + + + + + + + + + + + + + + +""" + + MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG = """ - + @@ -320,12 +338,45 @@ def test_concrete_name_without_objectstore_id(): def test_mixed_store_by(): + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): + as_dict = object_store.to_dict() + assert as_dict["backends"][0]["store_by"] == "id" + assert as_dict["backends"][1]["store_by"] == "uuid" + with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): as_dict = object_store.to_dict() assert as_dict["backends"][0]["store_by"] == "id" assert as_dict["backends"][1]["store_by"] == "uuid" +def test_mixed_private(): + # Distributed object store can combine private and non-private concrete objectstores + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): + ids = object_store.object_store_ids() + print(ids) + assert len(ids) == 2 + + ids = object_store.object_store_ids(private=True) + assert len(ids) == 1 + assert ids[0] == "files2" + + ids = object_store.object_store_ids(private=False) + assert len(ids) == 1 + assert ids[0] == "files1" + + as_dict = object_store.to_dict() + assert not as_dict["backends"][0]["private"] + assert as_dict["backends"][1]["private"] + + with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): + as_dict = object_store.to_dict() + assert as_dict["backends"][0]["private"] + assert as_dict["backends"][1]["private"] + + assert object_store.private + assert as_dict["private"] is True + + DISTRIBUTED_TEST_CONFIG = """ @@ -484,7 +535,7 @@ def test_config_parse_pithos(): assert len(extra_dirs) == 2 -S3_TEST_CONFIG = """ +S3_TEST_CONFIG = """ @@ -496,6 +547,7 @@ def test_config_parse_pithos(): S3_TEST_CONFIG_YAML = """ type: s3 +private: true auth: access_key: access_moo secret_key: secret_cow @@ -519,6 +571,7 @@ def test_config_parse_pithos(): def test_config_parse_s3(): for config_str in [S3_TEST_CONFIG, S3_TEST_CONFIG_YAML]: with TestConfig(config_str, clazz=UnitializeS3ObjectStore) as (directory, object_store): + assert object_store.private assert object_store.access_key == "access_moo" assert object_store.secret_key == "secret_cow"