diff --git a/lib/galaxy/authnz/__init__.py b/lib/galaxy/authnz/__init__.py index c2c5685e3180..071b09784bfe 100644 --- a/lib/galaxy/authnz/__init__.py +++ b/lib/galaxy/authnz/__init__.py @@ -43,7 +43,7 @@ def __init__(self, provider, config, backend_config): def refresh(self, trans, token): raise NotImplementedError() - def authenticate(self, provider, trans): + def authenticate(self, trans, idphint=None): """Runs for authentication process. Checks the database if a valid identity exists in the database; if yes, then the user is authenticated, if not, it generates a provider-specific @@ -72,7 +72,7 @@ def callback(self, state_token: str, authz_code: str, trans, login_redirect_url) """ raise NotImplementedError() - def disconnect(self, provider, trans, disconnect_redirect_url=None): + def disconnect(self, provider, trans, disconnect_redirect_url=None, email=None, association_id=None): raise NotImplementedError() def logout(self, trans, post_user_logout_href=None): diff --git a/lib/galaxy/authnz/custos_authnz.py b/lib/galaxy/authnz/custos_authnz.py index 806f6e1cafba..d6fdf22c8b31 100644 --- a/lib/galaxy/authnz/custos_authnz.py +++ b/lib/galaxy/authnz/custos_authnz.py @@ -343,7 +343,7 @@ def create_user(self, token, trans, login_redirect_url): trans.sa_session.commit() return login_redirect_url, user - def disconnect(self, provider, trans, email=None, disconnect_redirect_url=None): + def disconnect(self, provider, trans, disconnect_redirect_url=None, email=None, association_id=None): try: user = trans.user index = 0 diff --git a/lib/galaxy/authnz/managers.py b/lib/galaxy/authnz/managers.py index 93fc3c010ac3..7ec758fd3cac 100644 --- a/lib/galaxy/authnz/managers.py +++ b/lib/galaxy/authnz/managers.py @@ -432,7 +432,7 @@ def disconnect(self, provider, trans, email=None, disconnect_redirect_url=None, if success is False: return False, message, None elif provider in KEYCLOAK_BACKENDS: - return backend.disconnect(provider, trans, email, disconnect_redirect_url) + return backend.disconnect(provider, trans, disconnect_redirect_url, email=email) return backend.disconnect(provider, trans, disconnect_redirect_url) except Exception: msg = f"An error occurred when disconnecting authentication with `{provider}` identity provider for user `{trans.user.username}`" diff --git a/lib/galaxy/authnz/psa_authnz.py b/lib/galaxy/authnz/psa_authnz.py index b8a34b68e710..c222e76ebe09 100644 --- a/lib/galaxy/authnz/psa_authnz.py +++ b/lib/galaxy/authnz/psa_authnz.py @@ -193,7 +193,7 @@ def refresh(self, trans, user_authnz_token): return True return False - def authenticate(self, trans): + def authenticate(self, trans, idphint=None): on_the_fly_config(trans.sa_session) strategy = Strategy(trans.request, trans.session, Storage, self.config) backend = self._load_backend(strategy, self.config["redirect_uri"]) @@ -224,7 +224,7 @@ def callback(self, state_token, authz_code, trans, login_redirect_url): return redirect_url, self.config.get("user", None) - def disconnect(self, provider, trans, disconnect_redirect_url=None, association_id=None): + def disconnect(self, provider, trans, disconnect_redirect_url=None, email=None, association_id=None): on_the_fly_config(trans.sa_session) self.config[setting_name("DISCONNECT_REDIRECT_URL")] = ( disconnect_redirect_url if disconnect_redirect_url is not None else () diff --git a/lib/galaxy/job_execution/setup.py b/lib/galaxy/job_execution/setup.py index 99385986993a..433350ab3aed 100644 --- a/lib/galaxy/job_execution/setup.py +++ b/lib/galaxy/job_execution/setup.py @@ -31,7 +31,7 @@ MetadataFile, ) from galaxy.util import safe_makedirs -from galaxy.util.dictifiable import Dictifiable +from galaxy.util.dictifiable import UsesDictVisibleKeys TOOL_PROVIDED_JOB_METADATA_FILE = "galaxy.json" TOOL_PROVIDED_JOB_METADATA_KEYS = ["name", "info", "dbkey", "created_from_basename"] @@ -62,7 +62,7 @@ def set_job_outputs(self, job_outputs: List[JobOutput]) -> None: self.output_hdas_and_paths = {t.output_name: (t.dataset, t.dataset_path) for t in job_outputs} -class JobIO(Dictifiable): +class JobIO(UsesDictVisibleKeys): dict_collection_visible_keys = ( "job_id", "working_directory", @@ -168,7 +168,7 @@ def from_dict(cls, io_dict, sa_session): return cls(sa_session=sa_session, **io_dict) def to_dict(self): - io_dict = super().to_dict() + io_dict = super()._dictify_view_keys() # dict_for will always add `model_class`, we don't need or want it io_dict.pop("model_class") io_dict["user_context"] = self.user_context.to_dict() diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index df9a6fafa687..61a32ce8b802 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1319,7 +1319,7 @@ def working_directory(self): ) return self.__working_directory - def working_directory_exists(self): + def working_directory_exists(self) -> bool: job = self.get_job() return self.app.object_store.exists(job, base_dir="job_work", dir_only=True, obj_dir=True) @@ -2117,7 +2117,7 @@ def check_tool_output(self, tool_stdout, tool_stderr, tool_exit_code, job, job_s return state - def cleanup(self, delete_files=True): + def cleanup(self, delete_files: bool = True) -> None: # At least one of these tool cleanup actions (job import), is needed # for the tool to work properly, that is why one might want to run # cleanup but not delete files. @@ -2130,9 +2130,7 @@ def cleanup(self, delete_files=True): if e.errno != errno.ENOENT: raise if delete_files: - self.object_store.delete( - self.get_job(), base_dir="job_work", entire_dir=True, dir_only=True, obj_dir=True - ) + self.object_store.delete(self.get_job(), base_dir="job_work", entire_dir=True, obj_dir=True) except Exception: log.exception("Unable to cleanup job %d", self.job_id) diff --git a/lib/galaxy/jobs/runners/aws.py b/lib/galaxy/jobs/runners/aws.py index 5f301b078975..6003fb01d5e1 100644 --- a/lib/galaxy/jobs/runners/aws.py +++ b/lib/galaxy/jobs/runners/aws.py @@ -401,11 +401,13 @@ def recover(self, job, job_wrapper): msg = "(name!r/runner!r) is still in {state!s} state, adding to the runner monitor queue" job_id = job.get_job_runner_external_id() job_name = self.JOB_NAME_PREFIX + job_wrapper.get_id_tag() - ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory, job_wrapper=job_wrapper) - ajs.job_id = str(job_id) - ajs.job_name = job_name - ajs.job_wrapper = job_wrapper - ajs.job_destination = job_wrapper.job_destination + ajs = AsynchronousJobState( + files_dir=job_wrapper.working_directory, + job_wrapper=job_wrapper, + job_id=str(job_id), + job_name=job_name, + job_destination=job_wrapper.job_destination, + ) if job.state in (model.Job.states.RUNNING, model.Job.states.STOPPED): log.debug(msg.format(name=job.id, runner=job.job_runner_name, state=job.state)) ajs.old_state = model.Job.states.RUNNING @@ -417,14 +419,20 @@ def recover(self, job, job_wrapper): ajs.running = False self.monitor_queue.put(ajs) - def fail_job(self, job_state, exception=False): + def fail_job(self, job_state: JobState, exception=False, message="Job failed", full_status=None): if getattr(job_state, "stop_job", True): self.stop_job(job_state.job_wrapper) job_state.job_wrapper.reclaim_ownership() self._handle_runner_state("failure", job_state) if not job_state.runner_state_handled: - job_state.job_wrapper.fail(getattr(job_state, "fail_message", "Job failed"), exception=exception) - self._finish_or_resubmit_job(job_state, "", job_state.fail_message, job_id=job_state.job_id) + full_status = full_status or {} + tool_stdout = full_status.get("stdout") + tool_stderr = full_status.get("stderr") + fail_message = getattr(job_state, "fail_message", message) + job_state.job_wrapper.fail( + fail_message, tool_stdout=tool_stdout, tool_stderr=tool_stderr, exception=exception + ) + self._finish_or_resubmit_job(job_state, "", fail_message) if job_state.job_wrapper.cleanup_job == "always": job_state.cleanup() diff --git a/lib/galaxy/managers/base.py b/lib/galaxy/managers/base.py index 6629e6911267..3d4c39e1cd4b 100644 --- a/lib/galaxy/managers/base.py +++ b/lib/galaxy/managers/base.py @@ -697,7 +697,7 @@ def serialize(self, item, keys, **context): try: returned[key] = self.serializers[key](item, key, **context) except SkipAttribute: - # dont add this key if the deserializer threw this + # don't add this key if the serializer threw this pass elif key in self.serializable_keyset: returned[key] = self.default_serializer(item, key, **context) diff --git a/lib/galaxy/managers/configuration.py b/lib/galaxy/managers/configuration.py index 7a133b3f0fb2..d0ccfa8d53c2 100644 --- a/lib/galaxy/managers/configuration.py +++ b/lib/galaxy/managers/configuration.py @@ -104,8 +104,8 @@ def __init__(self, app): self.default_view = "all" self.add_view("all", list(self.serializers.keys())) - def default_serializer(self, config, key): - return getattr(config, key, None) + def default_serializer(self, item, key, **context): + return getattr(item, key, None) def add_serializers(self): def _defaults_to(default) -> base.Serializer: diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 3a23a25e19a1..132a2eacd3aa 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -81,26 +81,26 @@ def create(self, manage_roles=None, access_roles=None, flush=True, **kwargs): session.commit() return dataset - def copy(self, dataset, **kwargs): + def copy(self, item, **kwargs): raise exceptions.NotImplemented("Datasets cannot be copied") - def purge(self, dataset, flush=True): + def purge(self, item, flush=True, **kwargs): """ Remove the object_store/file for this dataset from storage and mark as purged. :raises exceptions.ConfigDoesNotAllowException: if the instance doesn't allow """ - self.error_unless_dataset_purge_allowed(dataset) + self.error_unless_dataset_purge_allowed(item) # the following also marks dataset as purged and deleted - dataset.full_delete() - self.session().add(dataset) + item.full_delete() + self.session().add(item) if flush: session = self.session() with transaction(session): session.commit() - return dataset + return item def purge_datasets(self, request: PurgeDatasetsTaskRequest): """ @@ -376,7 +376,7 @@ def delete(self, item, flush: bool = True, stop_job: bool = False, **kwargs): self.stop_creating_job(item, flush=flush) return item - def purge(self, dataset_assoc, flush=True): + def purge(self, item, flush=True, **kwargs): """ Purge this DatasetInstance and the dataset underlying it. """ @@ -388,15 +388,15 @@ def purge(self, dataset_assoc, flush=True): # so that job cleanup associated with stop_creating_job will see # the dataset as purged. flush_required = not self.app.config.track_jobs_in_database - super().purge(dataset_assoc, flush=flush or flush_required) + super().purge(item, flush=flush or flush_required, **kwargs) - # stop any jobs outputing the dataset_assoc - self.stop_creating_job(dataset_assoc, flush=True) + # stop any jobs outputing the dataset association + self.stop_creating_job(item, flush=True) # more importantly, purge underlying dataset as well - if dataset_assoc.dataset.user_can_purge: - self.dataset_manager.purge(dataset_assoc.dataset) - return dataset_assoc + if item.dataset.user_can_purge: + self.dataset_manager.purge(item.dataset, flush=flush, **kwargs) + return item def by_user(self, user): raise exceptions.NotImplemented("Abstract Method") @@ -782,7 +782,7 @@ def add_serializers(self): # remove the single nesting key here del self.serializers["metadata"] - def serialize(self, dataset_assoc, keys, **context): + def serialize(self, item, keys, **context): """ Override to add metadata as flattened keys on the serialized DatasetInstance. """ @@ -790,11 +790,11 @@ def serialize(self, dataset_assoc, keys, **context): # TODO: remove these when metadata is sub-object KEYS_HANDLED_SEPARATELY = ("metadata",) left_to_handle = self._pluck_from_list(keys, KEYS_HANDLED_SEPARATELY) - serialized = super().serialize(dataset_assoc, keys, **context) + serialized = super().serialize(item, keys, **context) # add metadata directly to the dict instead of as a sub-object if "metadata" in left_to_handle: - metadata = self._prefixed_metadata(dataset_assoc) + metadata = self._prefixed_metadata(item) serialized.update(metadata) return serialized diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index f69a20c6cc4c..08ffca33fbb6 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -228,14 +228,14 @@ def copy( return copy # .... deletion and purging - def purge(self, hda, flush=True, **kwargs): + def purge(self, item, flush=True, **kwargs): if self.app.config.enable_celery_tasks: from galaxy.celery.tasks import purge_hda user = kwargs.get("user") - return purge_hda.delay(hda_id=hda.id, task_user_id=getattr(user, "id", None)) + return purge_hda.delay(hda_id=item.id, task_user_id=getattr(user, "id", None)) else: - self._purge(hda, flush=flush) + self._purge(item, flush=flush) def _purge(self, hda, flush=True): """ diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 84db5dd38c2f..1cc3bf6e8937 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -289,19 +289,19 @@ def most_recent(self, user, filters=None, current_history=None): return self.session().scalars(stmt).first() # .... purgable - def purge(self, history, flush=True, **kwargs): + def purge(self, item, flush=True, **kwargs): """ Purge this history and all HDAs, Collections, and Datasets inside this history. """ - self.error_unless_mutable(history) + self.error_unless_mutable(item) self.hda_manager.dataset_manager.error_unless_dataset_purge_allowed() # First purge all the datasets - for hda in history.datasets: + for hda in item.datasets: if not hda.purged: self.hda_manager.purge(hda, flush=True, **kwargs) # Now mark the history as purged - super().purge(history, flush=flush, **kwargs) + super().purge(item, flush=flush, **kwargs) # .... current # TODO: make something to bypass the anon user + current history permissions issue diff --git a/lib/galaxy/managers/rbac_secured.py b/lib/galaxy/managers/rbac_secured.py index a63cd036447b..41f24da24809 100644 --- a/lib/galaxy/managers/rbac_secured.py +++ b/lib/galaxy/managers/rbac_secured.py @@ -42,10 +42,10 @@ def error_unless_permitted(self, item, user, trans=None): error_info = dict(model_class=item.__class__, id=getattr(item, "id", None)) raise self.permission_failed_error_class(**error_info) - def grant(self, item, user, flush=True): + def grant(self, item, user, flush: bool = True): raise NotImplementedError("abstract parent class") - def revoke(self, item, user, flush=True): + def revoke(self, item, user, flush: bool = True): raise NotImplementedError("abstract parent class") def _role_is_permitted(self, item, role): @@ -197,13 +197,13 @@ def is_permitted(self, dataset, user, trans=None): return True return False - def grant(self, dataset, user, flush=True): + def grant(self, item, user, flush: bool = True): private_role = self._user_private_role(user) - return self._grant_role(dataset, private_role, flush=flush) + return self._grant_role(item, private_role, flush=flush) - def revoke(self, dataset, user, flush=True): + def revoke(self, item, user, flush: bool = True): private_role = self._user_private_role(user) - return self._revoke_role(dataset, private_role, flush=flush) + return self._revoke_role(item, private_role, flush=flush) # ---- private def _role_is_permitted(self, dataset, role): @@ -253,13 +253,13 @@ def is_permitted(self, dataset, user, trans=None): or self._user_has_all_roles(user, current_roles) ) - def grant(self, item, user): + def grant(self, item, user, flush: bool = True): pass # not so easy # need to check for a sharing role # then add the new user to it - def revoke(self, item, user): + def revoke(self, item, user, flush: bool = True): pass # not so easy diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 7f3159b131e7..eaf78862fd44 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -29,6 +29,7 @@ from typing import ( Any, cast, + ClassVar, Dict, Generic, Iterable, @@ -159,7 +160,6 @@ ) from galaxy.model.orm.now import now from galaxy.model.orm.util import add_object_to_object_session -from galaxy.objectstore import ObjectStorePopulator from galaxy.objectstore.templates import ( ObjectStoreConfiguration, ObjectStoreTemplate, @@ -219,6 +219,10 @@ from galaxy.util.sanitize_html import sanitize_html if TYPE_CHECKING: + from galaxy.objectstore import ( + BaseObjectStore, + ObjectStorePopulator, + ) from galaxy.schema.invocation import InvocationMessageUnion log = logging.getLogger(__name__) @@ -2708,11 +2712,12 @@ def dataset(self): class FakeDatasetAssociation: fake_dataset_association = True - def __init__(self, dataset=None): + def __init__(self, dataset: Optional["Dataset"] = None) -> None: self.dataset = dataset - self.metadata = {} + self.metadata: Dict = {} - def get_file_name(self, sync_cache=True): + def get_file_name(self, sync_cache: bool = True) -> str: + assert self.dataset return self.dataset.get_file_name(sync_cache) def __eq__(self, other): @@ -4041,7 +4046,7 @@ def flush(self): sa_session.commit() -def setup_global_object_store_for_models(object_store): +def setup_global_object_store_for_models(object_store: "BaseObjectStore") -> None: Dataset.object_store = object_store @@ -4133,7 +4138,9 @@ class conversion_messages(str, Enum): permitted_actions = get_permitted_actions(filter="DATASET") file_path = "/tmp/" - object_store = None # This get initialized in mapping.py (method init) by app.py + object_store: ClassVar[Optional["BaseObjectStore"]] = ( + None # This get initialized in mapping.py (method init) by app.py + ) engine = None def __init__( @@ -4167,7 +4174,7 @@ def in_ready_state(self): return self.state in self.ready_states @property - def shareable(self): + def shareable(self) -> bool: """Return True if placed into an objectstore not labeled as ``private``.""" if self.external_filename: return True @@ -4179,7 +4186,7 @@ def ensure_shareable(self): if not self.shareable: raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) - def get_file_name(self, sync_cache=True): + def get_file_name(self, sync_cache: bool = True) -> str: if self.purged: log.warning(f"Attempt to get file name of purged dataset {self.id}") return "" @@ -4225,20 +4232,19 @@ def set_file_name(self, filename): else: self.external_filename = filename - def _assert_object_store_set(self): - assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" + def _assert_object_store_set(self) -> "BaseObjectStore": + assert self.object_store is not None, "Object Store has not been initialized" return self.object_store - def get_extra_files_path(self): + def get_extra_files_path(self) -> str: # Unlike get_file_name - external_extra_files_path is not backed by an # actual database column so if SA instantiates this object - the # attribute won't exist yet. if not getattr(self, "external_extra_files_path", None): - if self.object_store.exists(self, dir_only=True, extra_dir=self._extra_files_rel_path): - return self.object_store.get_filename(self, dir_only=True, extra_dir=self._extra_files_rel_path) - return self.object_store.construct_path( - self, dir_only=True, extra_dir=self._extra_files_rel_path, in_cache=True - ) + object_store = self._assert_object_store_set() + if object_store.exists(self, dir_only=True, extra_dir=self._extra_files_rel_path): + return object_store.get_filename(self, dir_only=True, extra_dir=self._extra_files_rel_path) + return object_store.construct_path(self, dir_only=True, extra_dir=self._extra_files_rel_path, in_cache=True) else: return os.path.abspath(self.external_extra_files_path) @@ -4283,7 +4289,7 @@ def _calculate_size(self) -> int: except OSError: return 0 assert self.object_store - return self.object_store.size(self) # type:ignore[unreachable] + return self.object_store.size(self) @overload def get_size(self, nice_size: Literal[False], calculate_size: bool = True) -> int: ... @@ -4663,7 +4669,7 @@ def get_quota_source_label(self): quota_source_label = property(get_quota_source_label) - def set_skipped(self, object_store_populator: ObjectStorePopulator): + def set_skipped(self, object_store_populator: "ObjectStorePopulator") -> None: assert self.dataset object_store_populator.set_object_store_id(self) self.extension = "expression.json" @@ -4674,7 +4680,7 @@ def set_skipped(self, object_store_populator: ObjectStorePopulator): out.write(json.dumps(None)) self.set_total_size() - def get_file_name(self, sync_cache=True) -> str: + def get_file_name(self, sync_cache: bool = True) -> str: if self.dataset.purged: return "" return self.dataset.get_file_name(sync_cache=sync_cache) @@ -9692,16 +9698,17 @@ def update_from_file(self, file_name): alt_name=os.path.basename(self.get_file_name()), ) - def get_file_name(self, sync_cache=True): + def get_file_name(self, sync_cache: bool = True) -> str: # Ensure the directory structure and the metadata file object exist try: da = self.history_dataset or self.library_dataset - if self.object_store_id is None and da is not None: + assert da is not None + if self.object_store_id is None: self.object_store_id = da.dataset.object_store_id object_store = da.dataset.object_store store_by = object_store.get_store_by(da.dataset) if store_by == "id" and self.id is None: - self.flush() + self.flush() # type:ignore[unreachable] identifier = getattr(self, store_by) alt_name = f"metadata_{identifier}.dat" if not object_store.exists(self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name): @@ -9710,7 +9717,7 @@ def get_file_name(self, sync_cache=True): self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name, sync_cache=sync_cache ) return path - except AttributeError: + except (AssertionError, AttributeError): assert ( self.id is not None ), "ID must be set before MetadataFile used without an HDA/LDDA (commit the object)" diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py index d73517fb756a..a241d1418c1c 100644 --- a/lib/galaxy/model/deferred.py +++ b/lib/galaxy/model/deferred.py @@ -177,7 +177,10 @@ def ensure_materialized( return materialized_dataset_instance def _stream_source(self, target_source: DatasetSource, datatype) -> str: - path = stream_url_to_file(target_source.source_uri, file_sources=self._file_sources) + source_uri = target_source.source_uri + if source_uri is None: + raise Exception("Cannot stream from dataset source without specified source_uri") + path = stream_url_to_file(source_uri, file_sources=self._file_sources) transform = target_source.transform or [] to_posix_lines = False spaces_to_tabs = False diff --git a/lib/galaxy/model/mapping.py b/lib/galaxy/model/mapping.py index 053e177edd9f..e1d975e5be5a 100644 --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -3,6 +3,7 @@ from typing import ( Optional, Type, + TYPE_CHECKING, ) from galaxy import model @@ -16,6 +17,9 @@ from galaxy.model.security import GalaxyRBACAgent from galaxy.model.triggers.update_audit_table import install as install_timestamp_triggers +if TYPE_CHECKING: + from galaxy.objectstore import BaseObjectStore + log = logging.getLogger(__name__) metadata = mapper_registry.metadata @@ -99,8 +103,11 @@ def _build_model_mapping(engine, map_install_models, thread_local_log) -> Galaxy def init_models_from_config( - config: GalaxyAppConfiguration, map_install_models=False, object_store=None, trace_logger=None -): + config: GalaxyAppConfiguration, + map_install_models: bool = False, + object_store: Optional["BaseObjectStore"] = None, + trace_logger=None, +) -> GalaxyModelMapping: model = init( config.file_path, config.database_connection, diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 4a4d639bf927..f38860b7c2c2 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -143,19 +143,43 @@ class ObjectStore(metaclass=abc.ABCMeta): """ @abc.abstractmethod - def exists(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def exists( + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + ) -> bool: """Return True if the object identified by `obj` exists, False otherwise.""" raise NotImplementedError() @abc.abstractmethod def construct_path( - self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None - ): + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + in_cache: bool = False, + ) -> str: raise NotImplementedError() @abc.abstractmethod def create( - self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, ): """ Mark the object (`obj`) as existing in the store, but with no content. @@ -169,7 +193,9 @@ def create( raise NotImplementedError() @abc.abstractmethod - def empty(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False): + def empty( + self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir: bool = False + ) -> bool: """ Test if the object identified by `obj` has content. @@ -178,7 +204,7 @@ def empty(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt raise NotImplementedError() @abc.abstractmethod - def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False) -> int: + def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir: bool = False) -> int: """ Return size of the object identified by `obj`. @@ -190,13 +216,13 @@ def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_ def delete( self, obj, - entire_dir=False, + entire_dir: bool = False, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, - obj_dir=False, - ): + obj_dir: bool = False, + ) -> bool: """ Delete the object identified by `obj`. @@ -218,7 +244,7 @@ def get_data( extra_dir=None, extra_dir_at_root=False, alt_name=None, - obj_dir=False, + obj_dir: bool = False, ): """ Fetch `count` bytes of data offset by `start` bytes using `obj.id`. @@ -235,8 +261,16 @@ def get_data( @abc.abstractmethod def get_filename( - self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False - ): + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + sync_cache: bool = True, + ) -> str: """ Get the expected filename with absolute path for object with id `obj.id`. @@ -252,11 +286,11 @@ def update_from_file( extra_dir=None, extra_dir_at_root=False, alt_name=None, - obj_dir=False, + obj_dir: bool = False, file_name=None, - create=False, - preserve_symlinks=False, - ): + create: bool = False, + preserve_symlinks: bool = False, + ) -> None: """ Inform the store that the file associated with `obj.id` has been updated. @@ -275,7 +309,7 @@ def update_from_file( raise NotImplementedError() @abc.abstractmethod - def get_object_url(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False): + def get_object_url(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir: bool = False): """ Return the URL for direct access if supported, otherwise return None. @@ -310,7 +344,7 @@ def get_concrete_store_badges(self, obj) -> List[BadgeDict]: """Return a list of dictified badges summarizing the object store configuration.""" @abc.abstractmethod - def is_private(self, obj): + def is_private(self, obj) -> bool: """Return True iff supplied object is stored in private ConcreteObjectStore.""" def object_store_ids(self, private=None): @@ -450,35 +484,190 @@ def _get_object_id(self, obj): def _invoke(self, delegate, obj=None, **kwargs): return self.__getattribute__(f"_{delegate}")(obj=obj, **kwargs) - def exists(self, obj, **kwargs): - return self._invoke("exists", obj, **kwargs) + def exists( + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + ) -> bool: + return self._invoke( + "exists", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def construct_path(self, obj, **kwargs): - return self._invoke("construct_path", obj, **kwargs) + def construct_path( + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + in_cache: bool = False, + ) -> str: + return self._invoke( + "construct_path", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + in_cache=in_cache, + ) - def create(self, obj, **kwargs): - return self._invoke("create", obj, **kwargs) + def create( + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + ): + return self._invoke( + "create", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def empty(self, obj, **kwargs): - return self._invoke("empty", obj, **kwargs) + def empty( + self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir: bool = False + ) -> bool: + return self._invoke( + "empty", + obj, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def size(self, obj, **kwargs): - return self._invoke("size", obj, **kwargs) + def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir: bool = False) -> int: + return self._invoke( + "size", obj, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name, obj_dir=obj_dir + ) - def delete(self, obj, **kwargs): - return self._invoke("delete", obj, **kwargs) + def delete( + self, + obj, + entire_dir: bool = False, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + ) -> bool: + return self._invoke( + "delete", + obj, + entire_dir=entire_dir, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def get_data(self, obj, **kwargs): - return self._invoke("get_data", obj, **kwargs) + def get_data( + self, + obj, + start=0, + count=-1, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + ): + return self._invoke( + "get_data", + obj, + start=start, + count=count, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def get_filename(self, obj, **kwargs): - return self._invoke("get_filename", obj, **kwargs) + def get_filename( + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + sync_cache: bool = True, + ) -> str: + return self._invoke( + "get_filename", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + sync_cache=sync_cache, + ) - def update_from_file(self, obj, **kwargs): - return self._invoke("update_from_file", obj, **kwargs) + def update_from_file( + self, + obj, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + file_name=None, + create: bool = False, + preserve_symlinks: bool = False, + ) -> None: + return self._invoke( + "update_from_file", + obj, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + file_name=file_name, + create=create, + preserve_symlinks=preserve_symlinks, + ) - def get_object_url(self, obj, **kwargs): - return self._invoke("get_object_url", obj, **kwargs) + def get_object_url(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir: bool = False): + return self._invoke( + "get_object_url", + obj, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) def get_concrete_store_name(self, obj): return self._invoke("get_concrete_store_name", obj) @@ -495,7 +684,7 @@ def get_store_usage_percent(self): def get_store_by(self, obj, **kwargs): return self._invoke("get_store_by", obj, **kwargs) - def is_private(self, obj): + def is_private(self, obj) -> bool: return self._invoke("is_private", obj) def cache_targets(self) -> List[CacheTarget]: @@ -609,7 +798,7 @@ def _get_concrete_store_description_markdown(self, obj): def _get_store_by(self, obj): return self.store_by - def _is_private(self, obj): + def _is_private(self, obj) -> bool: return self.private @property @@ -707,8 +896,15 @@ def to_dict(self): return as_dict def __get_filename( - self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False - ): + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + ) -> str: """ Return the absolute path for the file corresponding to the `obj.id`. @@ -726,30 +922,32 @@ def __get_filename( ) # For backward compatibility: check the old style root path first; # otherwise construct hashed path. - if not os.path.exists(path): - return self._construct_path( - obj, - base_dir=base_dir, - dir_only=dir_only, - extra_dir=extra_dir, - extra_dir_at_root=extra_dir_at_root, - alt_name=alt_name, - ) + if os.path.exists(path): + return path + return self._construct_path( + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) # TODO: rename to _disk_path or something like that to avoid conflicts with # children that'll use the local_extra_dirs decorator, e.g. S3 def _construct_path( self, obj, - old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, - obj_dir=False, - **kwargs, - ): + obj_dir: bool = False, + in_cache: bool = False, + old_style=False, + ) -> str: """ Construct the absolute path for accessing the object identified by `obj.id`. @@ -816,7 +1014,7 @@ def _construct_path( path = os.path.join(path, alt_name if alt_name else f"dataset_{obj_id}.dat") return os.path.abspath(path) - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: """Override `ObjectStore`'s stub and check on disk.""" if self.check_old_style: path = self._construct_path(obj, old_style=True, **kwargs) @@ -841,9 +1039,9 @@ def _create(self, obj, **kwargs): umask_fix_perms(path, self.config.umask, 0o666) return self - def _empty(self, obj, **kwargs): + def _empty(self, obj, **kwargs) -> bool: """Override `ObjectStore`'s stub by checking file size on disk.""" - return self.size(obj, **kwargs) == 0 + return self._size(obj, **kwargs) == 0 def _size(self, obj, **kwargs) -> int: """Override `ObjectStore`'s stub by return file size on disk. @@ -865,7 +1063,7 @@ def _size(self, obj, **kwargs) -> int: else: return 0 - def _delete(self, obj, entire_dir=False, **kwargs): + def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool: """Override `ObjectStore`'s stub; delete the file or folder on disk.""" path = self._get_filename(obj, **kwargs) extra_dir = kwargs.get("extra_dir", None) @@ -884,7 +1082,7 @@ def _delete(self, obj, entire_dir=False, **kwargs): # and another process writes files into that directory. # If the path doesn't exist anymore, another rmtree call was successful. path = self.__get_filename(obj, **kwargs) - if path is None: + if not os.path.exists(path): return True else: log.critical(f"{path} delete error {ex}", exc_info=True) @@ -898,7 +1096,7 @@ def _get_data(self, obj, start=0, count=-1, **kwargs): data_file.close() return content - def _get_filename(self, obj, **kwargs): + def _get_filename(self, obj, sync_cache: bool = True, **kwargs) -> str: """ Override `ObjectStore`'s stub. @@ -916,9 +1114,10 @@ def _get_filename(self, obj, **kwargs): raise ObjectNotFound return path - def _update_from_file(self, obj, file_name=None, create=False, **kwargs): + def _update_from_file( + self, obj, file_name=None, create: bool = False, preserve_symlinks: bool = False, **kwargs + ) -> None: """`create` parameter is not used in this implementation.""" - preserve_symlinks = kwargs.pop("preserve_symlinks", False) # FIXME: symlinks and the object store model may not play well together # these should be handled better, e.g. registering the symlink'd file # as an object @@ -961,6 +1160,8 @@ class NestedObjectStore(BaseObjectStore): Example: DistributedObjectStore, HierarchicalObjectStore """ + backends: Dict + def __init__(self, config, config_xml=None): """Extend `ObjectStore`'s constructor.""" super().__init__(config) @@ -972,7 +1173,7 @@ def shutdown(self): store.shutdown() super().shutdown() - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: """Determine if the `obj` exists in any of the backends.""" return self._call_method("_exists", obj, False, False, **kwargs) @@ -988,15 +1189,15 @@ def cache_targets(self) -> List[CacheTarget]: # TODO: merge more intelligently - de-duplicate paths and handle conflicting sizes/percents return cache_targets - def _empty(self, obj, **kwargs): + def _empty(self, obj, **kwargs) -> bool: """For the first backend that has this `obj`, determine if it is empty.""" return self._call_method("_empty", obj, True, False, **kwargs) - def _size(self, obj, **kwargs): + def _size(self, obj, **kwargs) -> int: """For the first backend that has this `obj`, return its size.""" return self._call_method("_size", obj, 0, False, **kwargs) - def _delete(self, obj, **kwargs): + def _delete(self, obj, **kwargs) -> bool: """For the first backend that has this `obj`, delete it.""" return self._call_method("_delete", obj, False, False, **kwargs) @@ -1004,16 +1205,46 @@ def _get_data(self, obj, **kwargs): """For the first backend that has this `obj`, get data from it.""" return self._call_method("_get_data", obj, ObjectNotFound, True, **kwargs) - def _get_filename(self, obj, **kwargs): + def _get_filename(self, obj, **kwargs) -> str: """For the first backend that has this `obj`, get its filename.""" return self._call_method("_get_filename", obj, ObjectNotFound, True, **kwargs) - def _update_from_file(self, obj, **kwargs): + def _update_from_file( + self, + obj, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir: bool = False, + file_name=None, + create: bool = False, + preserve_symlinks: bool = False, + ) -> None: """For the first backend that has this `obj`, update it from the given file.""" - if kwargs.get("create", False): - self._create(obj, **kwargs) - kwargs["create"] = False - return self._call_method("_update_from_file", obj, ObjectNotFound, True, **kwargs) + if create: + self._create( + obj, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) + return self._call_method( + "_update_from_file", + obj, + ObjectNotFound, + True, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + file_name=file_name, + create=False, + preserve_symlinks=preserve_symlinks, + ) def _get_object_url(self, obj, **kwargs): """For the first backend that has this `obj`, get its URL.""" @@ -1028,7 +1259,7 @@ def _get_concrete_store_description_markdown(self, obj): def _get_concrete_store_badges(self, obj) -> List[BadgeDict]: return self._call_method("_get_concrete_store_badges", obj, [], False) - def _is_private(self, obj): + def _is_private(self, obj) -> bool: return self._call_method("_is_private", obj, False, False) def _get_store_by(self, obj): @@ -1044,8 +1275,22 @@ def _repr_object_for_exception(self, obj): def _call_method(self, method, obj, default, default_is_exception, **kwargs): """Check all children object stores for the first one with the dataset.""" + base_dir = kwargs.get("base_dir", None) + dir_only = kwargs.get("dir_only", False) + extra_dir = kwargs.get("extra_dir", None) + extra_dir_at_root = kwargs.get("extra_dir_at_root", False) + alt_name = kwargs.get("alt_name", None) + obj_dir = kwargs.get("obj_dir", False) for store in self.backends.values(): - if store.exists(obj, **kwargs): + if store.exists( + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ): return store.__getattribute__(method)(obj, **kwargs) if default_is_exception: raise default( @@ -1075,6 +1320,7 @@ class DistributedObjectStore(NestedObjectStore): with weighting. """ + backends: Dict[str, Any] # BaseObjectStore or ConcreteObjectStore? store_type = "distributed" _quota_source_map: Optional["QuotaSourceMap"] _device_source_map: Optional["DeviceSourceMap"] @@ -1099,7 +1345,6 @@ def __init__( super().__init__(config, config_dict) self._quota_source_map = None self._device_source_map = None - self.backends = {} self.weighted_backend_ids = [] self.original_weighted_backend_ids = [] self.max_percent_full = {} @@ -1109,22 +1354,22 @@ def __init__( user_selection_allowed = [] for backend_def in config_dict["backends"]: - backened_id = backend_def["id"] + backend_id = backend_def["id"] maxpctfull = backend_def.get("max_percent_full", 0) weight = backend_def["weight"] allow_selection = backend_def.get("allow_selection") if allow_selection: - user_selection_allowed.append(backened_id) + user_selection_allowed.append(backend_id) backend = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) - self.backends[backened_id] = backend - self.max_percent_full[backened_id] = maxpctfull + self.backends[backend_id] = backend + self.max_percent_full[backend_id] = maxpctfull for _ in range(0, weight): # The simplest way to do weighting: add backend ids to a # sequence the number of times equalling weight, then randomly # choose a backend from that sequence at creation - self.weighted_backend_ids.append(backened_id) + self.weighted_backend_ids.append(backend_id) self.original_weighted_backend_ids = self.weighted_backend_ids self.user_object_store_resolver = user_object_store_resolver @@ -1244,7 +1489,7 @@ def __filesystem_monitor(self, sleeper: Sleeper): self.weighted_backend_ids = new_weighted_backend_ids sleeper.sleep(120) # Test free space every 2 minutes - def _construct_path(self, obj, **kwargs): + def _construct_path(self, obj, **kwargs) -> str: return self._resolve_backend(obj.object_store_id).construct_path(obj, **kwargs) def _create(self, obj, **kwargs): @@ -1339,8 +1584,22 @@ def __get_store_id_for(self, obj, **kwargs): # if this instance has been switched from a non-distributed to a # distributed object store, or if the object's store id is invalid, # try to locate the object + base_dir = kwargs.get("base_dir", None) + dir_only = kwargs.get("dir_only", False) + extra_dir = kwargs.get("extra_dir", None) + extra_dir_at_root = kwargs.get("extra_dir_at_root", False) + alt_name = kwargs.get("alt_name", None) + obj_dir = kwargs.get("obj_dir", False) for id, store in self.backends.items(): - if store.exists(obj, **kwargs): + if store.exists( + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ): log.warning( f"{obj.__class__.__name__} object with ID {obj.id} found in backend object store with ID {id}" ) @@ -1395,13 +1654,13 @@ class HierarchicalObjectStore(NestedObjectStore): When creating objects only the first store is used. """ + backends: Dict[int, BaseObjectStore] store_type = "hierarchical" def __init__(self, config, config_dict, fsmon=False): """The default constructor. Extends `NestedObjectStore`.""" super().__init__(config, config_dict) - backends: Dict[int, ObjectStore] = {} is_private = config_dict.get("private", DEFAULT_PRIVATE) for order, backend_def in enumerate(config_dict["backends"]): backend_is_private = backend_def.get("private") @@ -1416,9 +1675,8 @@ def __init__(self, config, config_dict, fsmon=False): assert backend_quota.get("source", DEFAULT_QUOTA_SOURCE) == DEFAULT_QUOTA_SOURCE assert backend_quota.get("enabled", DEFAULT_QUOTA_ENABLED) == DEFAULT_QUOTA_ENABLED - backends[order] = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) + self.backends[order] = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) - self.backends = backends self.private = is_private quota_config = config_dict.get("quota", {}) self.quota_source = quota_config.get("source", DEFAULT_QUOTA_SOURCE) @@ -1450,21 +1708,21 @@ def to_dict(self): as_dict["private"] = self.private return as_dict - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: """Check all child object stores.""" for store in self.backends.values(): if store.exists(obj, **kwargs): return True return False - def _construct_path(self, obj, **kwargs): + def _construct_path(self, obj, **kwargs) -> str: return self.backends[0].construct_path(obj, **kwargs) def _create(self, obj, **kwargs): """Call the primary object store.""" return self.backends[0].create(obj, **kwargs) - def _is_private(self, obj): + def _is_private(self, obj) -> bool: # Unlink the DistributedObjectStore - the HierarchicalObjectStore does not use # object_store_id - so all the contained object stores need to define is_private # the same way. @@ -1840,17 +2098,16 @@ def __init__(self, has_object_store, user): self.object_store_id = None self.user = user - def set_object_store_id(self, data, require_shareable=False): + def set_object_store_id(self, data: "DatasetInstance", require_shareable: bool = False) -> None: self.set_dataset_object_store_id(data.dataset, require_shareable=require_shareable) - def set_dataset_object_store_id(self, dataset, require_shareable=True): + def set_dataset_object_store_id(self, dataset: "Dataset", require_shareable: bool = True) -> None: # Create an empty file immediately. The first dataset will be # created in the "default" store, all others will be created in # the same store as the first. dataset.object_store_id = self.object_store_id try: - ensure_non_private = require_shareable - concrete_store = self.object_store.create(dataset, ensure_non_private=ensure_non_private) + concrete_store = self.object_store.create(dataset) if concrete_store.private and require_shareable: raise ObjectCreationProblemSharingDisabled() except ObjectInvalid: diff --git a/lib/galaxy/objectstore/_caching_base.py b/lib/galaxy/objectstore/_caching_base.py index 066fc9077e34..7beb8c63437c 100644 --- a/lib/galaxy/objectstore/_caching_base.py +++ b/lib/galaxy/objectstore/_caching_base.py @@ -57,10 +57,9 @@ def _construct_path( extra_dir=None, extra_dir_at_root=False, alt_name=None, - obj_dir=False, - in_cache=False, - **kwargs, - ): + obj_dir: bool = False, + in_cache: bool = False, + ) -> str: # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): @@ -112,7 +111,7 @@ def _in_cache(self, rel_path: str) -> bool: cache_path = self._get_cache_path(rel_path) return os.path.exists(cache_path) - def _pull_into_cache(self, rel_path) -> bool: + def _pull_into_cache(self, rel_path, **kwargs) -> bool: # Ensure the cache directory structure exists (e.g., dataset_#_files/) rel_path_dir = os.path.dirname(rel_path) if not os.path.exists(self._get_cache_path(rel_path_dir)): @@ -129,7 +128,7 @@ def _get_data(self, obj, start=0, count=-1, **kwargs): rel_path = self._construct_path(obj, **kwargs) # Check cache first and get file if not there if not self._in_cache(rel_path): - self._pull_into_cache(rel_path) + self._pull_into_cache(rel_path, **kwargs) # Read the file content from cache data_file = open(self._get_cache_path(rel_path)) data_file.seek(start) @@ -137,7 +136,7 @@ def _get_data(self, obj, start=0, count=-1, **kwargs): data_file.close() return content - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: in_cache = exists_remotely = False rel_path = self._construct_path(obj, **kwargs) dir_only = kwargs.get("dir_only", False) @@ -248,7 +247,7 @@ def _push_to_storage(self, rel_path, source_file=None, from_string=None): ) return success - def _empty(self, obj, **kwargs): + def _empty(self, obj, **kwargs) -> bool: if self._exists(obj, **kwargs): return self._size(obj, **kwargs) == 0 else: @@ -257,7 +256,7 @@ def _empty(self, obj, **kwargs): def _get_size_in_cache(self, rel_path): return os.path.getsize(self._get_cache_path(rel_path)) - def _size(self, obj, **kwargs): + def _size(self, obj, **kwargs) -> int: rel_path = self._construct_path(obj, **kwargs) if self._in_cache(rel_path): try: @@ -269,11 +268,10 @@ def _size(self, obj, **kwargs): log.warning("Did not find dataset '%s', returning 0 for size", rel_path) return 0 - def _get_filename(self, obj, **kwargs): + def _get_filename(self, obj, sync_cache: bool = True, **kwargs) -> str: base_dir = kwargs.get("base_dir", None) dir_only = kwargs.get("dir_only", False) obj_dir = kwargs.get("obj_dir", False) - sync_cache = kwargs.get("sync_cache", True) rel_path = self._construct_path(obj, **kwargs) @@ -298,7 +296,7 @@ def _get_filename(self, obj, **kwargs): self._download_directory_into_cache(rel_path, cache_path) return cache_path else: - if self._pull_into_cache(rel_path): + if self._pull_into_cache(rel_path, **kwargs): return cache_path raise ObjectNotFound(f"objectstore.get_filename, no cache_path: {obj}, kwargs: {kwargs}") @@ -313,7 +311,7 @@ def _download_directory_into_cache(self, rel_path, cache_path): # object stores should definitely override this. pass - def _delete(self, obj, entire_dir=False, **kwargs): + def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool: rel_path = self._construct_path(obj, **kwargs) extra_dir = kwargs.get("extra_dir", None) base_dir = kwargs.get("base_dir", None) @@ -342,7 +340,9 @@ def _delete(self, obj, entire_dir=False, **kwargs): log.exception("%s delete error", self._get_filename(obj, **kwargs)) return False - def _update_from_file(self, obj, file_name=None, create=False, **kwargs): + def _update_from_file( + self, obj, file_name=None, create: bool = False, preserve_symlinks: bool = False, **kwargs + ) -> None: if create: self._create(obj, **kwargs) diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index 9241c1efe75c..727bf9c87b14 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -450,7 +450,7 @@ def _push_to_storage(self, rel_path, source_file=None, from_string=None): finally: log.debug("irods_pt _push_to_storage: %s", ipt_timer) - def _delete(self, obj, entire_dir=False, **kwargs): + def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool: ipt_timer = ExecutionTimer() rel_path = self._construct_path(obj, **kwargs) extra_dir = kwargs.get("extra_dir", None) diff --git a/lib/galaxy/objectstore/pithos.py b/lib/galaxy/objectstore/pithos.py index 43697062d9d0..45e95f4fa573 100644 --- a/lib/galaxy/objectstore/pithos.py +++ b/lib/galaxy/objectstore/pithos.py @@ -149,7 +149,7 @@ def _download(self, rel_path): # No need to overwrite "shutdown" - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: """Check if file exists, fix if file in cache and not on Pithos+ :returns: weather the file exists remotely or in cache """ diff --git a/lib/galaxy/objectstore/pulsar.py b/lib/galaxy/objectstore/pulsar.py index 04eab33d5fa4..7444cc49d82f 100644 --- a/lib/galaxy/objectstore/pulsar.py +++ b/lib/galaxy/objectstore/pulsar.py @@ -24,7 +24,7 @@ class PulsarObjectStore(BaseObjectStore): def __init__(self, config, config_xml): self.pulsar_client = self.__build_pulsar_client(config_xml) - def _exists(self, obj, **kwds): + def _exists(self, obj, **kwds) -> bool: return self.pulsar_client.exists(**self.__build_kwds(obj, **kwds)) def file_ready(self, obj, **kwds): @@ -33,23 +33,23 @@ def file_ready(self, obj, **kwds): def _create(self, obj, **kwds): return self.pulsar_client.create(**self.__build_kwds(obj, **kwds)) - def _empty(self, obj, **kwds): + def _empty(self, obj, **kwds) -> bool: return self.pulsar_client.empty(**self.__build_kwds(obj, **kwds)) - def _size(self, obj, **kwds): + def _size(self, obj, **kwds) -> int: return self.pulsar_client.size(**self.__build_kwds(obj, **kwds)) - def _delete(self, obj, **kwds): + def _delete(self, obj, **kwds) -> bool: return self.pulsar_client.delete(**self.__build_kwds(obj, **kwds)) # TODO: Optimize get_data. def _get_data(self, obj, **kwds): return self.pulsar_client.get_data(**self.__build_kwds(obj, **kwds)) - def _get_filename(self, obj, **kwds): + def _get_filename(self, obj, **kwds) -> str: return self.pulsar_client.get_filename(**self.__build_kwds(obj, **kwds)) - def _update_from_file(self, obj, **kwds): + def _update_from_file(self, obj, **kwds) -> None: return self.pulsar_client.update_from_file(**self.__build_kwds(obj, **kwds)) def _get_store_usage_percent(self): diff --git a/lib/galaxy/objectstore/rucio.py b/lib/galaxy/objectstore/rucio.py index 4bb6540a34de..bbf8fff01395 100644 --- a/lib/galaxy/objectstore/rucio.py +++ b/lib/galaxy/objectstore/rucio.py @@ -2,6 +2,7 @@ import logging import os import shutil +from typing import Optional try: import rucio.common @@ -249,12 +250,12 @@ def data_object_exists(self, key): except Exception: return False - def get_size(self, key): + def get_size(self, key) -> int: key = _encode_key(key) dids = [{"scope": self.scope, "name": key}] try: repl = next(self.get_rucio_client().list_replicas(dids)) - return repl["bytes"] + return int(repl["bytes"]) except Exception: return 0 @@ -306,7 +307,7 @@ def _initialize(self): self._ensure_staging_path_writable() self._start_cache_monitor_if_needed() - def _pull_into_cache(self, rel_path, auth_token): + def _pull_into_cache(self, rel_path, **kwargs) -> bool: log.debug("rucio _pull_into_cache: %s", rel_path) # Ensure the cache directory structure exists (e.g., dataset_#_files/) rel_path_dir = os.path.dirname(rel_path) @@ -314,6 +315,7 @@ def _pull_into_cache(self, rel_path, auth_token): os.makedirs(self._get_cache_path(rel_path_dir), exist_ok=True) # Now pull in the file dest = self._get_cache_path(rel_path) + auth_token = self._get_token(**kwargs) file_ok = self.rucio_broker.download(rel_path, dest, auth_token) self._fix_permissions(self._get_cache_path(rel_path_dir)) return file_ok @@ -334,7 +336,7 @@ def _fix_permissions(self, rel_path): # "interfaces to implement" - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: rel_path = self._construct_path(obj, **kwargs) log.debug("rucio _exists: %s", rel_path) @@ -387,26 +389,27 @@ def _create(self, obj, **kwargs): log.debug("rucio _create: %s", rel_path) return self - def _size(self, obj, **kwargs): + def _size(self, obj, **kwargs) -> int: rel_path = self._construct_path(obj, **kwargs) log.debug("rucio _size: %s", rel_path) if self._in_cache(rel_path): + size: Optional[int] = None try: size = os.path.getsize(self._get_cache_path(rel_path)) except OSError as ex: log.info("Could not get size of file '%s' in local cache, will try iRODS. Error: %s", rel_path, ex) - if size != 0: + if size is not None: return size if self._exists(obj, **kwargs): return self._get_remote_size(rel_path) log.warning("Did not find dataset '%s', returning 0 for size", rel_path) return 0 - def _get_remote_size(self, rel_path): + def _get_remote_size(self, rel_path) -> int: return self.rucio_broker.get_size(rel_path) - def _delete(self, obj, entire_dir=False, **kwargs): + def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool: rel_path = self._construct_path(obj, **kwargs) extra_dir = kwargs.get("extra_dir", None) base_dir = kwargs.get("base_dir", None) @@ -454,12 +457,11 @@ def _get_token(self, **kwargs): log.debug("Failed to get auth token: %s", e) return None - def _get_filename(self, obj, **kwargs): + def _get_filename(self, obj, sync_cache: bool = True, **kwargs) -> str: base_dir = kwargs.get("base_dir", None) dir_only = kwargs.get("dir_only", False) auth_token = self._get_token(**kwargs) rel_path = self._construct_path(obj, **kwargs) - sync_cache = kwargs.get("sync_cache", True) log.debug("rucio _get_filename: %s", rel_path) @@ -491,7 +493,7 @@ def _get_filename(self, obj, **kwargs): if dir_only: # Directories do not get pulled into cache return cache_path else: - if self._pull_into_cache(rel_path, auth_token): + if self._pull_into_cache(rel_path, auth_token=auth_token): return cache_path raise ObjectNotFound(f"objectstore.get_filename, no cache_path: {obj}, kwargs: {kwargs}") @@ -508,7 +510,9 @@ def _register_file(self, rel_path, file_name): log.debug("rucio _register_file: %s", file_name) return - def _update_from_file(self, obj, file_name=None, create=False, **kwargs): + def _update_from_file( + self, obj, file_name=None, create: bool = False, preserve_symlinks: bool = False, **kwargs + ) -> None: rel_path = self._construct_path(obj, **kwargs) log.debug("rucio _update_from_file: %s", rel_path) diff --git a/lib/galaxy/selenium/cli.py b/lib/galaxy/selenium/cli.py index f52f3b3a7332..109343ea5961 100644 --- a/lib/galaxy/selenium/cli.py +++ b/lib/galaxy/selenium/cli.py @@ -73,7 +73,7 @@ def __init__(self, args): self.driver = driver self.target_url = args.galaxy_url - def build_url(self, url=""): + def build_url(self, url="", for_selenium: bool = True): return urljoin(self.target_url, url) def screenshot(self, label: str) -> None: diff --git a/lib/galaxy/tool_shed/galaxy_install/metadata/installed_repository_metadata_manager.py b/lib/galaxy/tool_shed/galaxy_install/metadata/installed_repository_metadata_manager.py index e2ed787bdba2..02272ddfe06f 100644 --- a/lib/galaxy/tool_shed/galaxy_install/metadata/installed_repository_metadata_manager.py +++ b/lib/galaxy/tool_shed/galaxy_install/metadata/installed_repository_metadata_manager.py @@ -192,7 +192,9 @@ def reset_metadata_on_selected_repositories(self, user, **kwd): status = "error" return message, status - def set_repository(self, repository): + def set_repository( + self, repository, relative_install_dir: Optional[str] = None, changeset_revision: Optional[str] = None + ): super().set_repository(repository) self.repository_clone_url = common_util.generate_clone_url_for_installed_repository(self.app, repository) diff --git a/lib/galaxy/tool_util/deps/resolvers/tool_shed_packages.py b/lib/galaxy/tool_util/deps/resolvers/tool_shed_packages.py index 30a8a1bcc398..137edf56a6e9 100644 --- a/lib/galaxy/tool_util/deps/resolvers/tool_shed_packages.py +++ b/lib/galaxy/tool_util/deps/resolvers/tool_shed_packages.py @@ -31,7 +31,7 @@ def _find_dep_versioned(self, name, version, type="package", **kwds): else: return NullDependency(version=version, name=name) - def _find_dep_default(self, name, type="package", **kwds): + def _find_dep_default(self, name, type="package", exact=True, **kwds): if type == "set_environment" and kwds.get("installed_tool_dependencies", None): installed_tool_dependency = self._get_installed_dependency(name, type, version=None, **kwds) if installed_tool_dependency: diff --git a/lib/galaxy/tool_util/toolbox/base.py b/lib/galaxy/tool_util/toolbox/base.py index 3f50c28cecea..f6ab7bfa98f1 100644 --- a/lib/galaxy/tool_util/toolbox/base.py +++ b/lib/galaxy/tool_util/toolbox/base.py @@ -33,7 +33,6 @@ unicodify, ) from galaxy.util.bunch import Bunch -from galaxy.util.dictifiable import Dictifiable from .filters import FilterFactory from .integrated_panel import ManagesIntegratedToolPanelMixin from .lineages import LineageMap @@ -134,7 +133,7 @@ def handle_tags(self, tool_id, tool_definition_source) -> None: return None -class AbstractToolBox(Dictifiable, ManagesIntegratedToolPanelMixin): +class AbstractToolBox(ManagesIntegratedToolPanelMixin): """ Abstract container for managing a ToolPanel - containing tools and workflows optionally in labelled sections. @@ -246,7 +245,7 @@ def _default_panel_view(self, trans): config_value = getattr(config, "default_panel_view", None) return config_value or self.__default_panel_view - def create_tool(self, config_file, tool_shed_repository=None, guid=None, **kwds): + def create_tool(self, config_file, tool_cache_data_dir=None, **kwds): raise NotImplementedError() def create_dynamic_tool(self, dynamic_tool): @@ -1106,10 +1105,10 @@ def load_tool( if not tool or guid and guid != tool.guid: try: tool = self.create_tool( - config_file=config_file, + config_file, + tool_cache_data_dir=tool_cache_data_dir, tool_shed_repository=tool_shed_repository, guid=guid, - tool_cache_data_dir=tool_cache_data_dir, **kwds, ) except Exception: diff --git a/lib/galaxy/tool_util/toolbox/panel.py b/lib/galaxy/tool_util/toolbox/panel.py index 0dd2fbbb3525..85ebdaa06d0d 100644 --- a/lib/galaxy/tool_util/toolbox/panel.py +++ b/lib/galaxy/tool_util/toolbox/panel.py @@ -6,7 +6,7 @@ Tuple, ) -from galaxy.util.dictifiable import Dictifiable +from galaxy.util.dictifiable import UsesDictVisibleKeys from galaxy.util.odict import odict from .parser import ensure_tool_conf_item @@ -44,7 +44,7 @@ def panel_items_iter(self): yield (panel_key, panel_type, panel_value) -class ToolSection(Dictifiable, HasPanelItems): +class ToolSection(UsesDictVisibleKeys, HasPanelItems): """ A group of tools with similar type/purpose that will be displayed as a group in the user interface. @@ -107,7 +107,7 @@ def to_dict(self, trans, link_details=False, tool_help=False, toolbox=None, only `section.elems` """ - section_dict = super().to_dict() + section_dict = super()._dictify_view_keys() section_elts = [] kwargs = dict(trans=trans, link_details=link_details, tool_help=tool_help) for elt in self.elems.values(): @@ -131,7 +131,7 @@ def panel_items(self): return self.elems -class ToolSectionLabel(Dictifiable): +class ToolSectionLabel(UsesDictVisibleKeys): """ A label for a set of tools that can be displayed above groups of tools and sections in the user interface @@ -151,7 +151,7 @@ def __init__(self, item): self.links = item.get("links", None) def to_dict(self, **kwds): - return super().to_dict() + return super()._dictify_view_keys() class ToolPanelElements(odict, HasPanelItems): diff --git a/lib/galaxy/tool_util/unittest_utils/parameters.py b/lib/galaxy/tool_util/unittest_utils/parameters.py index d3be68b7cca2..71738b5f4694 100644 --- a/lib/galaxy/tool_util/unittest_utils/parameters.py +++ b/lib/galaxy/tool_util/unittest_utils/parameters.py @@ -1,5 +1,4 @@ import os -from typing import List from galaxy.tool_util.parameters import ( from_input_source, @@ -11,7 +10,6 @@ class ParameterBundle(ToolParameterBundle): - input_models: List[ToolParameterT] def __init__(self, parameter: ToolParameterT): self.input_models = [parameter] diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index fdb530888a5a..18bd68c369d9 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -164,7 +164,7 @@ ) from galaxy.util.bunch import Bunch from galaxy.util.compression_utils import get_fileobj_raw -from galaxy.util.dictifiable import Dictifiable +from galaxy.util.dictifiable import UsesDictVisibleKeys from galaxy.util.expressions import ExpressionContext from galaxy.util.form_builder import SelectField from galaxy.util.json import ( @@ -730,7 +730,7 @@ class _Options(Bunch): refresh: str -class Tool(Dictifiable): +class Tool(UsesDictVisibleKeys): """ Represents a computational tool that can be executed through Galaxy. """ @@ -2397,7 +2397,7 @@ def to_dict(self, trans, link_details=False, io_details=False, tool_help=False): """Returns dict of tool.""" # Basic information - tool_dict = super().to_dict() + tool_dict = self._dictify_view_keys() tool_dict["edam_operations"] = self.edam_operations tool_dict["edam_topics"] = self.edam_topics diff --git a/lib/galaxy/tools/error_reports/plugins/__init__.py b/lib/galaxy/tools/error_reports/plugins/__init__.py index 97de6441cb30..08e03097775e 100644 --- a/lib/galaxy/tools/error_reports/plugins/__init__.py +++ b/lib/galaxy/tools/error_reports/plugins/__init__.py @@ -17,6 +17,6 @@ class ErrorPlugin(metaclass=ABCMeta): def plugin_type(self): """Short string providing labelling this plugin""" - def submit_report(self, dataset, job, tool, user_submission=False, **kwargs): + def submit_report(self, dataset, job, tool, **kwargs): """Submit the bug report and render a string to be displayed to the user.""" return None diff --git a/lib/galaxy/tools/error_reports/plugins/slack.py b/lib/galaxy/tools/error_reports/plugins/slack.py index 53affb80bc1a..3c4f273ed034 100644 --- a/lib/galaxy/tools/error_reports/plugins/slack.py +++ b/lib/galaxy/tools/error_reports/plugins/slack.py @@ -17,7 +17,7 @@ class SlackPlugin(BaseGitPlugin): - """Send error report to Sentry.""" + """Send error report to Slack.""" plugin_type = "slack" diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 5e8c355b3989..5e84b7782fc6 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -55,7 +55,7 @@ unicodify, XML, ) -from galaxy.util.dictifiable import Dictifiable +from galaxy.util.dictifiable import UsesDictVisibleKeys from galaxy.util.expressions import ExpressionContext from galaxy.util.hash_util import HASH_NAMES from galaxy.util.rules_dsl import RuleSet @@ -161,7 +161,7 @@ def to_dict(self): return as_dict -class ToolParameter(Dictifiable): +class ToolParameter(UsesDictVisibleKeys): """ Describes a parameter accepted by a tool. This is just a simple stub at the moment but in the future should encapsulate more complex parameters (lists @@ -318,7 +318,7 @@ def validate(self, value, trans=None) -> None: def to_dict(self, trans, other_values=None): """to_dict tool parameter. This can be overridden by subclasses.""" other_values = other_values or {} - tool_dict = super().to_dict() + tool_dict = self._dictify_view_keys() tool_dict["model_class"] = self.__class__.__name__ tool_dict["optional"] = self.optional tool_dict["hidden"] = self.hidden @@ -464,7 +464,7 @@ def __init__(self, tool, input_source): if self.min is not None or self.max is not None: self.validators.append(validation.InRangeValidator(None, self.min, self.max)) - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): other_values = other_values or {} try: return int(value) @@ -536,7 +536,7 @@ def __init__(self, tool, input_source): if self.min is not None or self.max is not None: self.validators.append(validation.InRangeValidator(None, self.min, self.max)) - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): other_values = other_values or {} try: return float(value) @@ -992,7 +992,10 @@ def get_legal_names(self, trans, other_values): """ return {n: v for n, v, _ in self.get_options(trans, other_values)} - def from_json(self, value, trans, other_values=None, require_legal_value=True): + def from_json(self, value, trans=None, other_values=None): + return self._select_from_json(value, trans, other_values=other_values, require_legal_value=True) + + def _select_from_json(self, value, trans, other_values=None, require_legal_value=True): other_values = other_values or {} try: legal_values = self.get_legal_values(trans, other_values, value) @@ -1268,7 +1271,7 @@ def __init__(self, tool, input_source): self.default_value = input_source.get("value", None) self.is_dynamic = True - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): other_values = other_values or {} if self.multiple: tag_list = [] @@ -1290,7 +1293,7 @@ def from_json(self, value, trans, other_values=None): value = None # We skip requiring legal values -- this is similar to optional, but allows only subset of datasets to be positive # TODO: May not actually be required for (nested) collection input ? - return super().from_json(value, trans, other_values, require_legal_value=False) + return super()._select_from_json(value, trans, other_values, require_legal_value=False) def get_tag_list(self, other_values): """ @@ -1395,7 +1398,7 @@ def to_json(self, value, app, use_security): return value.strip() return value - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): """ Label convention prepends column number with a 'c', but tool uses the integer. This removes the 'c' when entered into a workflow. @@ -1665,9 +1668,9 @@ def recurse_option_elems(cur_options, option_elems): elif not self.dynamic_options: recurse_option_elems(self.options, elem.find("options").findall("option")) - def _get_options_from_code(self, trans=None, value=None, other_values=None): + def _get_options_from_code(self, trans=None, other_values=None): assert self.dynamic_options, Exception("dynamic_options was not specifed") - call_other_values = ExpressionContext({"__trans__": trans, "__value__": value}) + call_other_values = ExpressionContext({"__trans__": trans, "__value__": None}) if other_values: call_other_values.parent = other_values.parent call_other_values.update(other_values.dict) @@ -1676,11 +1679,11 @@ def _get_options_from_code(self, trans=None, value=None, other_values=None): except Exception: return [] - def get_options(self, trans=None, value=None, other_values=None): + def get_options(self, trans=None, other_values=None): other_values = other_values or {} if self.is_dynamic: if self.dynamic_options: - options = self._get_options_from_code(trans=trans, value=value, other_values=other_values) + options = self._get_options_from_code(trans=trans, other_values=other_values) else: options = [] for filter_key, filter_value in self.filtered.items(): @@ -1710,7 +1713,7 @@ def recurse_options(legal_values, options): recurse_options(legal_values, self.get_options(trans=trans, other_values=other_values)) return legal_values - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): other_values = other_values or {} legal_values = self.get_legal_values(trans, other_values, value) if not legal_values and trans.workflow_building_mode: @@ -2111,7 +2114,7 @@ def __init__(self, tool, input_source, trans=None): ) self.conversions.append((name, conv_extension, [conv_type])) - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): session = trans.sa_session other_values = other_values or {} @@ -2468,7 +2471,7 @@ def match_multirun_collections(self, trans, history, dataset_collection_matcher) if match: yield history_dataset_collection, match.implicit_conversion - def from_json(self, value, trans, other_values=None): + def from_json(self, value, trans=None, other_values=None): session = trans.sa_session other_values = other_values or {} diff --git a/lib/galaxy/tools/parameters/grouping.py b/lib/galaxy/tools/parameters/grouping.py index ebf64542dd2e..9962e61cf624 100644 --- a/lib/galaxy/tools/parameters/grouping.py +++ b/lib/galaxy/tools/parameters/grouping.py @@ -29,7 +29,7 @@ sanitize_for_filename, ) from galaxy.util.bunch import Bunch -from galaxy.util.dictifiable import Dictifiable +from galaxy.util.dictifiable import UsesDictVisibleKeys from galaxy.util.expressions import ExpressionContext if TYPE_CHECKING: @@ -55,7 +55,7 @@ ] -class Group(Dictifiable): +class Group(UsesDictVisibleKeys): dict_collection_visible_keys = ["name", "type"] type: str @@ -87,7 +87,7 @@ def get_initial_value(self, trans, context): raise TypeError("Not implemented") def to_dict(self, trans): - group_dict = super().to_dict() + group_dict = self._dictify_view_keys() return group_dict @@ -822,7 +822,7 @@ def nested_to_dict(input): return cond_dict -class ConditionalWhen(Dictifiable): +class ConditionalWhen(UsesDictVisibleKeys): dict_collection_visible_keys = ["value"] def __init__(self): @@ -832,7 +832,7 @@ def __init__(self): def to_dict(self, trans): if self.inputs is None: raise Exception("Must set 'inputs' attribute to use.") - when_dict = super().to_dict() + when_dict = self._dictify_view_keys() def input_to_dict(input): return input.to_dict(trans) diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py index 607207eb872c..24ea05f697da 100644 --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -1134,7 +1134,7 @@ def commaify(amount): @overload -def unicodify( # type: ignore[overload-overlap] +def unicodify( # type: ignore[overload-overlap] # ignore can be removed in mypy >=1.11.0 value: Literal[None], encoding: str = DEFAULT_ENCODING, error: str = "replace", diff --git a/lib/galaxy/util/dictifiable.py b/lib/galaxy/util/dictifiable.py index 4d099884424b..d40c7f8f1f93 100644 --- a/lib/galaxy/util/dictifiable.py +++ b/lib/galaxy/util/dictifiable.py @@ -7,18 +7,26 @@ Optional, ) +ValueMapperT = Dict[str, Callable] + def dict_for(obj, **kwds): # Create dict to represent item. return dict(model_class=obj.__class__.__name__, **kwds) -class Dictifiable: - """Mixin that enables objects to be converted to dictionaries. This is useful - when for sharing objects across boundaries, such as the API, tool scripts, - and JavaScript code.""" +class UsesDictVisibleKeys: + """Mixin used to implement to_dict methods that consume dict_{view}_visible_keys to produce dicts. - def to_dict(self, view: str = "collection", value_mapper: Optional[Dict[str, Callable]] = None) -> Dict[str, Any]: + For typical to_dict methods that just consume a view and value mapper use the Dictifable mixin instead + of this more low level mixin, but if you want to consume other things in your to_dict method that + are incompatible (such as required arguments) - inherit this lower level mixin and implement a custom + to_dict with whatever signature makes sense for the class. + """ + + def _dictify_view_keys( + self, view: str = "collection", value_mapper: Optional[ValueMapperT] = None + ) -> Dict[str, Any]: """ Return item dictionary. """ @@ -70,3 +78,15 @@ def get_value(key, item): rval[key] = None return rval + + +class Dictifiable(UsesDictVisibleKeys): + """Mixin that enables objects to be converted to dictionaries. This is useful + when for sharing objects across boundaries, such as the API, tool scripts, + and JavaScript code.""" + + def to_dict(self, view: str = "collection", value_mapper: Optional[ValueMapperT] = None) -> Dict[str, Any]: + """ + Return item dictionary. + """ + return self._dictify_view_keys(view=view, value_mapper=value_mapper) diff --git a/lib/galaxy/util/odict.py b/lib/galaxy/util/odict.py index aeb66cd20039..4b3fdbc2dc7f 100644 --- a/lib/galaxy/util/odict.py +++ b/lib/galaxy/util/odict.py @@ -71,10 +71,6 @@ def setdefault(self, key, failobj=None): self._keys.append(key) return UserDict.setdefault(self, key, failobj) - def update(self, dict): - for key, val in dict.items(): - self.__setitem__(key, val) - def values(self): return map(self.get, self._keys) diff --git a/lib/galaxy/visualization/data_providers/basic.py b/lib/galaxy/visualization/data_providers/basic.py index 356bcebcc9c3..686b5e870c24 100644 --- a/lib/galaxy/visualization/data_providers/basic.py +++ b/lib/galaxy/visualization/data_providers/basic.py @@ -1,6 +1,4 @@ -import sys from json import loads -from typing import Iterator from galaxy.datatypes.tabular import Tabular from galaxy.model import DatasetInstance @@ -30,42 +28,6 @@ def __init__( self.dependencies = dependencies self.error_max_vals = error_max_vals - def has_data(self, **kwargs): - """ - Returns true if dataset has data in the specified genome window, false - otherwise. - """ - raise Exception("Unimplemented Function") - - def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: - """ - Returns an iterator that provides data in the region chrom:start-end - """ - raise Exception("Unimplemented Function") - - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): - """ - Process data from an iterator to a format that can be provided to client. - """ - raise Exception("Unimplemented Function") - - def get_data(self, chrom, start, end, start_val=0, max_vals=sys.maxsize, **kwargs): - """ - Returns data as specified by kwargs. start_val is the first element to - return and max_vals indicates the number of values to return. - - Return value must be a dictionary with the following attributes: - dataset_type, data - """ - iterator = self.get_iterator(chrom, start, end) - return self.process_data(iterator, start_val, max_vals, **kwargs) - - def write_data_to_file(self, filename, **kwargs): - """ - Write data in region defined by chrom, start, and end to a file. - """ - raise Exception("Unimplemented Function") - class ColumnDataProvider(BaseDataProvider): """Data provider for columnar data""" diff --git a/lib/galaxy/visualization/data_providers/genome.py b/lib/galaxy/visualization/data_providers/genome.py index 9735167c3843..f241ceebf286 100644 --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -44,6 +44,8 @@ from galaxy.visualization.data_providers.basic import BaseDataProvider from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar +IntWebParam = Union[str, int] + # # Utility functions. # @@ -98,7 +100,10 @@ def _chrom_naming_matches(chrom1, chrom2): ) -class FeatureLocationIndexDataProvider(BaseDataProvider): +# Was previously a BaseDataProvider but it doesn't have the same interface or use any of +# the parent methods. It is also only used explicitly, constructed by name, and access with +# get_data directly using a specified query. +class FeatureLocationIndexDataProvider: """ Reads/writes/queries feature location index (FLI) datasets. """ @@ -185,19 +190,13 @@ def __init__( error_max_vals=error_max_vals, ) - def write_data_to_file(self, regions, filename): - """ - Write data in region defined by chrom, start, and end to a file. - """ - raise Exception("Unimplemented Function") - def valid_chroms(self): """ Returns chroms/contigs that the dataset contains """ return None # by default - def has_data(self, chrom, start, end, **kwargs): + def has_data(self, chrom): """ Returns true if dataset has data in the specified genome window, false otherwise. @@ -217,13 +216,13 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: """ raise Exception("Unimplemented Function") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Process data from an iterator to a format that can be provided to client. """ raise Exception("Unimplemented Function") - def get_data(self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.maxsize, **kwargs): + def get_data(self, chrom: str, start: IntWebParam, end: IntWebParam, start_val=0, max_vals=sys.maxsize, **kwargs): """ Returns data in region defined by chrom, start, and end. start_val and max_vals are used to denote the data to return: start_val is the first element to @@ -232,7 +231,7 @@ def get_data(self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.ma Return value must be a dictionary with the following attributes: dataset_type, data """ - start, end = int(low), int(high) + start, end = int(start), int(end) with self.open_data_file() as data_file: iterator = self.get_iterator(data_file, chrom, start, end, **kwargs) data = self.process_data(iterator, start_val, max_vals, start=start, end=end, **kwargs) @@ -384,14 +383,6 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: return iterator - def write_data_to_file(self, regions, filename): - with self.open_data_file() as data_file, open(filename, "w") as out: - for region in regions: - # Write data in region. - iterator = self.get_iterator(data_file, region.chrom, region.start, region.end) - for line in iterator: - out.write(f"{line}\n") - # # -- Interval data providers -- @@ -410,7 +401,7 @@ class IntervalDataProvider(GenomeDataProvider): def get_iterator(self, data_file, chrom, start, end, **kwargs): raise Exception("Unimplemented Function") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ @@ -468,9 +459,6 @@ def col_fn(col): return {"data": rval, "message": message} - def write_data_to_file(self, regions, filename): - raise Exception("Unimplemented Function") - class IntervalTabixDataProvider(TabixDataProvider, IntervalDataProvider): """ @@ -495,7 +483,7 @@ class BedDataProvider(GenomeDataProvider): def get_iterator(self, data_file, chrom, start, end, **kwargs): raise Exception("Unimplemented Method") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ @@ -561,18 +549,6 @@ def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): return {"data": rval, "dataset_type": self.dataset_type, "message": message} - def write_data_to_file(self, regions, filename): - with open(filename, "w") as out: - for region in regions: - # Write data in region. - chrom = region.chrom - start = region.start - end = region.end - with self.open_data_file() as data_file: - iterator = self.get_iterator(data_file, chrom, start, end) - for line in iterator: - out.write(f"{line}\n") - class BedTabixDataProvider(TabixDataProvider, BedDataProvider): """ @@ -645,7 +621,7 @@ class VcfDataProvider(GenomeDataProvider): dataset_type = "variant" - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Returns a dict with the following attributes:: @@ -757,15 +733,6 @@ def get_mapping(ref, alt): return {"data": data, "message": message} - def write_data_to_file(self, regions, filename): - out = open(filename, "w") - with self.open_data_file() as data_file: - for region in regions: - # Write data in region. - iterator = self.get_iterator(data_file, region.chrom, region.start, region.end) - for line in iterator: - out.write(f"{line}\n") - class VcfTabixDataProvider(TabixDataProvider, VcfDataProvider): """ @@ -844,43 +811,6 @@ def get_filters(self): filters.append({"name": "Mapping Quality", "type": "number", "index": filter_col}) return filters - def write_data_to_file(self, regions, filename): - """ - Write reads in regions to file. - """ - - # Open current BAM file using index. - bamfile = pysam.AlignmentFile( - self.original_dataset.get_file_name(), mode="rb", index_filename=self.converted_dataset.get_file_name() - ) - - # TODO: write headers as well? - new_bamfile = pysam.AlignmentFile(filename, template=bamfile, mode="wb") - - for region in regions: - # Write data from region. - chrom = region.chrom - start = region.start - end = region.end - - try: - data = bamfile.fetch(start=start, end=end, reference=chrom) - except ValueError: - # Try alternative chrom naming. - chrom = _convert_between_ucsc_and_ensemble_naming(chrom) - try: - data = bamfile.fetch(start=start, end=end, reference=chrom) - except ValueError: - return None - - # Write reads in region. - for read in data: - new_bamfile.write(read) - - # Cleanup. - new_bamfile.close() - bamfile.close() - @contextmanager def open_data_file(self): # Attempt to open the BAM file with index @@ -913,7 +843,7 @@ def process_data( self, iterator, start_val=0, - max_vals=None, + max_vals=sys.maxsize, ref_seq=None, iterator_type="nth", mean_depth=None, @@ -1189,7 +1119,7 @@ def has_data(self, chrom): f.close() return all_dat is not None - def get_data(self, chrom, start, end, start_val=0, max_vals=None, num_samples=1000, **kwargs): + def get_data(self, chrom: str, start, end, start_val=0, max_vals=sys.maxsize, **kwargs): start = int(start) end = int(end) @@ -1261,7 +1191,7 @@ def summarize_region(bbi, chrom, start, end, num_points): return result # Approach is different depending on region size. - num_samples = int(num_samples) + num_samples = int(kwargs.get("num_samples", 100)) if end - start < num_samples: # Get values for individual bases in region, including start and end. # To do this, need to increase end to next base and request number of points. @@ -1328,27 +1258,6 @@ class IntervalIndexDataProvider(GenomeDataProvider, FilterableMixin): dataset_type = "interval_index" - def write_data_to_file(self, regions, filename): - index = Indexes(self.converted_dataset.get_file_name()) - with open(self.original_dataset.get_file_name()) as source, open(filename, "w") as out: - for region in regions: - # Write data from region. - chrom = region.chrom - start = region.start - end = region.end - for _start, _end, offset in index.find(chrom, start, end): - source.seek(offset) - - # HACK: write differently depending on original dataset format. - if self.original_dataset.ext not in ["gff", "gff3", "gtf"]: - line = source.readline() - out.write(line) - else: - reader = GFFReaderWrapper(source, fix_strand=True) - feature = next(reader) - for interval in feature.intervals: - out.write("\t".join(interval.fields) + "\n") - @contextmanager def open_data_file(self): i = Indexes(self.converted_dataset.get_file_name()) @@ -1364,7 +1273,7 @@ def get_iterator(self, data_file, chrom, start, end, **kwargs) -> Iterator[str]: return data_file.find(chrom, start, end) - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): results = [] message = None with open(self.original_dataset.get_file_name()) as source: @@ -1438,7 +1347,7 @@ def features_in_region_iter(): return features_in_region_iter() - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Process data from an iterator to a format that can be provided to client. """ @@ -1466,7 +1375,7 @@ class GtfTabixDataProvider(TabixDataProvider): Returns data from GTF datasets that are indexed via tabix. """ - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): # Loop through lines and group by transcript_id; each group is a feature. # TODO: extend this code or use code in gff_util to process GFF/3 as well @@ -1521,7 +1430,7 @@ class ENCODEPeakDataProvider(GenomeDataProvider): def get_iterator(self, data_file, chrom, start, end, **kwargs): raise Exception("Unimplemented Method") - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ @@ -1621,7 +1530,7 @@ def get_filters(self): class ChromatinInteractionsDataProvider(GenomeDataProvider): - def process_data(self, iterator, start_val=0, max_vals=None, **kwargs): + def process_data(self, iterator, start_val=0, max_vals=sys.maxsize, **kwargs): """ Provides """ diff --git a/lib/galaxy_test/driver/driver_util.py b/lib/galaxy_test/driver/driver_util.py index a2ee29f5f774..43f096a963e0 100644 --- a/lib/galaxy_test/driver/driver_util.py +++ b/lib/galaxy_test/driver/driver_util.py @@ -799,7 +799,7 @@ def __init__(self): self.server_wrappers: List[ServerWrapper] = [] self.temp_directories: List[str] = [] - def setup(self, config_object=None) -> None: + def setup(self) -> None: """Called before tests are built.""" def tear_down(self) -> None: diff --git a/lib/galaxy_test/selenium/test_workflow_editor.py b/lib/galaxy_test/selenium/test_workflow_editor.py index be5433ef3f18..ececfacac313 100644 --- a/lib/galaxy_test/selenium/test_workflow_editor.py +++ b/lib/galaxy_test/selenium/test_workflow_editor.py @@ -1481,12 +1481,6 @@ def workflow_index_open_with_name(self, name): self.workflow_index_search_for(name) self.components.workflows.edit_button.wait_for_and_click() - def workflow_upload_yaml_with_random_name(self, content): - workflow_populator = self.workflow_populator - name = self._get_random_name() - workflow_populator.upload_yaml_workflow(content, name=name) - return name - @retry_assertion_during_transitions def assert_wf_name_is(self, expected_name): edit_name_element = self.components.workflow_editor.edit_name.wait_for_visible() diff --git a/lib/tool_shed/metadata/repository_metadata_manager.py b/lib/tool_shed/metadata/repository_metadata_manager.py index 31a1fc45cac0..1f9b817e0230 100644 --- a/lib/tool_shed/metadata/repository_metadata_manager.py +++ b/lib/tool_shed/metadata/repository_metadata_manager.py @@ -957,9 +957,11 @@ def reset_metadata_on_selected_repositories(self, **kwd): status = "error" return message, status - def set_repository(self, repository, repository_clone_url=None): + def set_repository( + self, repository, relative_install_dir: Optional[str] = None, changeset_revision: Optional[str] = None + ): super().set_repository(repository) - self.repository_clone_url = repository_clone_url or common_util.generate_clone_url_for(self.trans, repository) + self.repository_clone_url = relative_install_dir or common_util.generate_clone_url_for(self.trans, repository) def set_repository_metadata(self, host, content_alert_str="", **kwd): """ diff --git a/lib/tool_shed/test/base/driver.py b/lib/tool_shed/test/base/driver.py index a4fa67efdfb5..5fe5280d786c 100644 --- a/lib/tool_shed/test/base/driver.py +++ b/lib/tool_shed/test/base/driver.py @@ -66,7 +66,7 @@ def build_shed_app(simple_kwargs): class ToolShedTestDriver(driver_util.TestDriver): """Instantiate a Galaxy-style TestDriver for testing the tool shed.""" - def setup(self): + def setup(self) -> None: """Entry point for test driver script.""" self.external_shed = bool(os.environ.get("TOOL_SHED_TEST_EXTERNAL", None)) if not self.external_shed: diff --git a/test/unit/app/authnz/test_custos_authnz.py b/test/unit/app/authnz/test_custos_authnz.py index 0a940f5695d7..70398ec71ec1 100644 --- a/test/unit/app/authnz/test_custos_authnz.py +++ b/test/unit/app/authnz/test_custos_authnz.py @@ -631,7 +631,12 @@ def test_disconnect(self): provider = custos_authnz_token.provider email = custos_authnz_token.user.email - success, message, redirect_uri = self.custos_authnz.disconnect(provider, self.trans, email, "/") + success, message, redirect_uri = self.custos_authnz.disconnect( + provider, + self.trans, + disconnect_redirect_url="/", + email=email, + ) assert 1 == len(self.trans.sa_session.deleted) deleted_token = self.trans.sa_session.deleted[0] diff --git a/test/unit/app/tools/test_metadata.py b/test/unit/app/tools/test_metadata.py index 7e1784a84caa..8f83c6b90ef6 100644 --- a/test/unit/app/tools/test_metadata.py +++ b/test/unit/app/tools/test_metadata.py @@ -18,7 +18,7 @@ class TestMetadata(TestCase, tools_support.UsesTools): def setUp(self): super().setUp() self.setup_app() - model.Dataset.object_store = self.app.object_store # type: ignore[assignment] + model.Dataset.object_store = self.app.object_store job = model.Job() sa_session = self.app.model.session sa_session.add(job) diff --git a/test/unit/tool_util/test_parameter_specification.py b/test/unit/tool_util/test_parameter_specification.py index 1ad8332230fc..81ebc33b3017 100644 --- a/test/unit/tool_util/test_parameter_specification.py +++ b/test/unit/tool_util/test_parameter_specification.py @@ -14,13 +14,13 @@ encode, RequestInternalToolState, RequestToolState, - ToolParameterModel, validate_internal_job, validate_internal_request, validate_request, validate_test_case, ) from galaxy.tool_util.parameters.json import to_json_schema_string +from galaxy.tool_util.parameters.models import ToolParameterT from galaxy.tool_util.unittest_utils.parameters import ( parameter_bundle, parameter_bundle_for_file, @@ -85,12 +85,12 @@ def _test_file(file: str, specification=None): _assert_internal_requests_invalid(tool_parameter_model, combos["request_invalid"]) -def _for_each(test: Callable, parameter: ToolParameterModel, requests: List[Dict[str, Any]]) -> None: +def _for_each(test: Callable, parameter: ToolParameterT, requests: List[Dict[str, Any]]) -> None: for request in requests: test(parameter, request) -def _assert_request_validates(parameter, request) -> None: +def _assert_request_validates(parameter: ToolParameterT, request: Dict[str, Any]) -> None: try: validate_request(parameter_bundle(parameter), request) except RequestParameterInvalidException as e: diff --git a/test/unit/webapps/test_service_base.py b/test/unit/webapps/test_service_base.py index 993b841c133e..4a8f351af080 100644 --- a/test/unit/webapps/test_service_base.py +++ b/test/unit/webapps/test_service_base.py @@ -13,7 +13,7 @@ def __init__(self, expected_filename: str, expected_mime_type: str) -> None: self.expected_filename = expected_filename self.expected_mime_type = expected_mime_type - def new_target(self, filename, mime_type): + def new_target(self, filename, mime_type, duration=None, security=None): assert filename == self.expected_filename assert mime_type == self.expected_mime_type