Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop job integrity checks #18675

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions doc/source/admin/galaxy_options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1167,39 +1167,6 @@
:Type: str


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``check_job_script_integrity``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Set to false to disable various checks Galaxy will do to ensure it
can run job scripts before attempting to execute or submit them.
:Default: ``true``
:Type: bool


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``check_job_script_integrity_count``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Number of checks to execute if check_job_script_integrity is
enabled.
:Default: ``35``
:Type: int


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``check_job_script_integrity_sleep``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Time to sleep between checks if check_job_script_integrity is
enabled (in seconds).
:Default: ``0.25``
:Type: float


~~~~~~~~~~~~~~~~~~~~~
``default_job_shell``
~~~~~~~~~~~~~~~~~~~~~
Expand Down
3 changes: 0 additions & 3 deletions lib/galaxy/app_unittest_utils/galaxy_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,6 @@ def __init__(self, **kwargs):
self.enable_tool_document_cache = False
self.tool_cache_data_dir = os.path.join(self.root, "tool_cache")
self.external_chown_script = None
self.check_job_script_integrity = False
self.check_job_script_integrity_count = 0
self.check_job_script_integrity_sleep = 0

self.default_panel_view = "default"
self.panel_views_dir = ""
Expand Down
12 changes: 0 additions & 12 deletions lib/galaxy/config/sample/galaxy.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -913,18 +913,6 @@ galaxy:
# <cache_dir>.
#template_cache_path: compiled_templates

# Set to false to disable various checks Galaxy will do to ensure it
# can run job scripts before attempting to execute or submit them.
#check_job_script_integrity: true

# Number of checks to execute if check_job_script_integrity is
# enabled.
#check_job_script_integrity_count: 35

# Time to sleep between checks if check_job_script_integrity is
# enabled (in seconds).
#check_job_script_integrity_sleep: 0.25

# Set the default shell used by non-containerized jobs Galaxy-wide.
# This defaults to bash for all jobs and can be overridden at the
# destination level for heterogeneous clusters. conda job resolution
Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/config/sample/tool_shed.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ tool_shed:
# options.
#whoosh_index_dir: database/toolshed_whoosh_indexes

# Cache directory for Pydantic model objects.
#model_cache_dir: database/model_cache

# For searching repositories at /api/repositories:
#repo_name_boost: 0.9

Expand Down
22 changes: 0 additions & 22 deletions lib/galaxy/config/schemas/config_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -848,28 +848,6 @@ mapping:
Mako templates are compiled as needed and cached for reuse, this directory is
used for the cache

check_job_script_integrity:
type: bool
default: true
required: false
desc: |
Set to false to disable various checks Galaxy will do to ensure it
can run job scripts before attempting to execute or submit them.

check_job_script_integrity_count:
type: int
default: 35
required: false
desc: |
Number of checks to execute if check_job_script_integrity is enabled.

check_job_script_integrity_sleep:
type: float
default: .25
required: false
desc: |
Time to sleep between checks if check_job_script_integrity is enabled (in seconds).

default_job_shell:
type: str
default: /bin/bash
Expand Down
9 changes: 0 additions & 9 deletions lib/galaxy/job_execution/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ class JobIO(UsesDictVisibleKeys):
"len_file_path",
"builds_file_path",
"file_sources_dict",
"check_job_script_integrity",
"check_job_script_integrity_count",
"check_job_script_integrity_sleep",
"tool_source",
"tool_source_class",
"tool_dir",
Expand All @@ -105,9 +102,6 @@ def __init__(
new_file_path: str,
len_file_path: str,
builds_file_path: str,
check_job_script_integrity: bool,
check_job_script_integrity_count: int,
check_job_script_integrity_sleep: float,
file_sources_dict: Dict[str, Any],
user_context: Union[FileSourcesUserContext, Dict[str, Any]],
tool_source: Optional[str] = None,
Expand Down Expand Up @@ -137,9 +131,6 @@ def __init__(
self.new_file_path = new_file_path
self.len_file_path = len_file_path
self.builds_file_path = builds_file_path
self.check_job_script_integrity = check_job_script_integrity
self.check_job_script_integrity_count = check_job_script_integrity_count
self.check_job_script_integrity_sleep = check_job_script_integrity_sleep
self.tool_dir = tool_dir
self.is_task = is_task
self.tool_source = tool_source
Expand Down
3 changes: 0 additions & 3 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1083,9 +1083,6 @@ def job_io(self):
len_file_path=self.app.config.len_file_path,
file_sources_dict=self.app.file_sources.to_dict(for_serialization=True, user_context=user_context),
user_context=user_context,
check_job_script_integrity=self.app.config.check_job_script_integrity,
check_job_script_integrity_count=self.app.config.check_job_script_integrity_count,
check_job_script_integrity_sleep=self.app.config.check_job_script_integrity_sleep,
tool_source=tool_source,
tool_source_class=type(self.tool.tool_source).__name__ if self.tool else None,
tool_dir=self.tool and self.tool.tool_dir,
Expand Down
11 changes: 2 additions & 9 deletions lib/galaxy/jobs/command_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@

from galaxy import util
from galaxy.job_execution.output_collect import default_exit_code_file
from galaxy.jobs.runners.util.job_script import (
INTEGRITY_INJECTION,
ScriptIntegrityChecks,
write_script,
)
from galaxy.jobs.runners.util.job_script import write_script
from galaxy.tool_util.deps.container_classes import (
Container,
TRAP_KILL_CONTAINER,
Expand Down Expand Up @@ -154,7 +150,7 @@ def build_command(
relocate_contents = (
"from galaxy_ext.cwl.handle_outputs import relocate_dynamic_outputs; relocate_dynamic_outputs()"
)
write_script(relocate_script_file, relocate_contents, ScriptIntegrityChecks(check_job_script_integrity=False))
write_script(relocate_script_file, relocate_contents)
commands_builder.append_command(SETUP_GALAXY_FOR_METADATA)
commands_builder.append_command(f"python '{relocate_script_file}'")

Expand Down Expand Up @@ -184,8 +180,6 @@ def __externalize_commands(
# for instance.
if shell and shell.lower() == "none":
return tool_commands
if job_wrapper.job_io.check_job_script_integrity:
integrity_injection = INTEGRITY_INJECTION
set_e = ""
if job_wrapper.strict_shell:
set_e = "set -e\n"
Expand All @@ -196,7 +190,6 @@ def __externalize_commands(
write_script(
local_container_script,
script_contents,
job_io=job_wrapper.job_io,
)
commands = f"{shell} {local_container_script}"
# TODO: Cleanup for_pulsar hack.
Expand Down
5 changes: 2 additions & 3 deletions lib/galaxy/jobs/runners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from galaxy.jobs.runners.util import runner_states
from galaxy.jobs.runners.util.env import env_to_statement
from galaxy.jobs.runners.util.job_script import (
DescribesScriptIntegrityChecks,
job_script,
write_script,
)
Expand Down Expand Up @@ -520,8 +519,8 @@ def get_job_file(self, job_wrapper: "MinimalJobWrapper", **kwds) -> str:
options.update(**kwds)
return job_script(**options)

def write_executable_script(self, path: str, contents: str, job_io: DescribesScriptIntegrityChecks) -> None:
write_script(path, contents, job_io)
def write_executable_script(self, path: str, contents: str) -> None:
write_script(path, contents)

def _find_container(
self,
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/jobs/runners/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,5 +605,5 @@ def write_command(self, job_wrapper: "MinimalJobWrapper") -> str:
"galaxy_virtual_env": None,
}
job_file_contents = self.get_job_file(job_wrapper, **job_script_props)
self.write_executable_script(job_file, job_file_contents, job_io=job_wrapper.job_io)
self.write_executable_script(job_file, job_file_contents)
return job_file
2 changes: 1 addition & 1 deletion lib/galaxy/jobs/runners/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __command_line(self, job_wrapper: "MinimalJobWrapper") -> Tuple[str, str]:
"shell": job_wrapper.shell,
}
job_file_contents = self.get_job_file(job_wrapper, **job_script_props)
self.write_executable_script(job_file, job_file_contents, job_io=job_wrapper.job_io)
self.write_executable_script(job_file, job_file_contents)
return job_file, exit_code_path

def queue_job(self, job_wrapper):
Expand Down
80 changes: 6 additions & 74 deletions lib/galaxy/jobs/runners/util/job_script/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
import logging
import os
import subprocess
import time
from dataclasses import dataclass
from string import Template
from typing import (
Any,
Dict,
Optional,
)

from typing_extensions import Protocol

from galaxy.util import (
RWXR_XR_X,
unicodify,
Expand All @@ -32,19 +26,6 @@
GALAXY_SLOTS="1"
"""

INTEGRITY_INJECTION = """
# The following block can be used by the job system
# to ensure this script is runnable before actually attempting
# to run it.
if [ -n "$ABC_TEST_JOB_SCRIPT_INTEGRITY_XYZ" ]; then
exit 42
fi
"""

INTEGRITY_SYNC_COMMAND = "/bin/sync"
DEFAULT_INTEGRITY_CHECK = True
DEFAULT_INTEGRITY_COUNT = 35
DEFAULT_INTEGRITY_SLEEP = 0.25
REQUIRED_TEMPLATE_PARAMS = ["working_directory", "command"]
OPTIONAL_TEMPLATE_PARAMS: Dict[str, Any] = {
"galaxy_lib": None,
Expand All @@ -54,7 +35,6 @@
"slots_statement": SLOTS_STATEMENT_CLUSTER_DEFAULT,
"instrument_pre_commands": "",
"instrument_post_commands": "",
"integrity_injection": INTEGRITY_INJECTION,
"shell": DEFAULT_SHELL,
"preserve_python_environment": True,
"tmp_dir_creation_statement": '""',
Expand Down Expand Up @@ -113,67 +93,19 @@ def job_script(template=DEFAULT_JOB_FILE_TEMPLATE, **kwds):
return template.safe_substitute(template_params)


class DescribesScriptIntegrityChecks(Protocol):
check_job_script_integrity: bool
check_job_script_integrity_count: Optional[int]
check_job_script_integrity_sleep: Optional[float]


@dataclass
class ScriptIntegrityChecks:
"""Minimal class implementing the DescribesScriptIntegrityChecks protocol"""

check_job_script_integrity: bool
check_job_script_integrity_count: Optional[int] = None
check_job_script_integrity_sleep: Optional[float] = None


def write_script(path: str, contents, job_io: DescribesScriptIntegrityChecks, mode: int = RWXR_XR_X) -> None:
def write_script(path: str, contents, mode: int = RWXR_XR_X, use_fork_safe_write=False) -> None:
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)
fork_safe_write(path, contents)
if use_fork_safe_write:
fork_safe_write(path, contents)
else:
with open(path, "w", encoding="utf-8") as f:
f.write(unicodify(contents))
os.chmod(path, mode)
if job_io.check_job_script_integrity:
assert job_io.check_job_script_integrity_count is not None
assert job_io.check_job_script_integrity_sleep is not None
_handle_script_integrity(path, job_io.check_job_script_integrity_count, job_io.check_job_script_integrity_sleep)


def _handle_script_integrity(
path: str, check_job_script_integrity_count: int, check_job_script_integrity_sleep: float
) -> None:
script_integrity_verified = False
for _ in range(check_job_script_integrity_count):
try:
returncode = subprocess.call([path], env={"ABC_TEST_JOB_SCRIPT_INTEGRITY_XYZ": "1"})
if returncode == 42:
script_integrity_verified = True
break

log.debug("Script integrity error for file '%s': returncode was %d", path, returncode)

# Else we will sync and wait to see if the script becomes
# executable.
try:
# sync file system to avoid "Text file busy" problems.
# These have occurred both in Docker containers and on EC2 clusters
# under high load.
subprocess.check_call(INTEGRITY_SYNC_COMMAND)
except Exception as e:
log.debug("Error syncing the filesystem: %s", unicodify(e))

except Exception as exc:
log.debug("Script not available yet: %s", unicodify(exc))

time.sleep(check_job_script_integrity_sleep)

if not script_integrity_verified:
raise Exception(f"Failed to write job script '{path}', could not verify job script integrity.")


__all__ = (
"job_script",
"write_script",
"INTEGRITY_INJECTION",
)
6 changes: 0 additions & 6 deletions test/unit/app/jobs/test_command_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,6 @@ def __init__(self, job_dir):
self.working_directory = job_dir
self.prepare_input_files_cmds = None
self.commands_in_new_shell = False
self.app = Bunch(
config=Bunch(
check_job_script_integrity=False,
)
)
self.shell = "/bin/sh"
self.use_metadata_binary = False
self.job_id = 1
Expand All @@ -248,7 +243,6 @@ def setup_external_metadata(self, *args, **kwds):
def job_io(self):
return Bunch(
get_output_fnames=lambda: ["output1"],
check_job_script_integrity=False,
version_path=None,
)

Expand Down
4 changes: 1 addition & 3 deletions test/unit/app/jobs/test_runner_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,7 @@ def change_state(self, state, job=None):

@property
def job_io(self):
return bunch.Bunch(
get_output_fnames=lambda: [], check_job_script_integrity=False, version_path="/tmp/version_path"
)
return bunch.Bunch(get_output_fnames=lambda: [], version_path="/tmp/version_path")

def get_job(self):
return self.job
Expand Down
3 changes: 0 additions & 3 deletions test/unit/job_execution/test_job_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ def job_io(app: FileSourcesMockApp, job: Job) -> JobIO:
builds_file_path=WORKING_DIRECTORY,
user_context=USER_CONTEXT,
file_sources_dict=app.file_sources.to_dict(for_serialization=True, user_context=user_context),
check_job_script_integrity=False,
check_job_script_integrity_count=1,
check_job_script_integrity_sleep=1,
tool_dir=WORKING_DIRECTORY,
is_task=False,
)
Expand Down
Loading