From 39e5bc4ce24f4f740f4496965459119429e96768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Manuel=20Dom=C3=ADnguez?= Date: Fri, 28 Jun 2024 12:31:26 +0200 Subject: [PATCH] squash kysrpex:interactivetoolssqlalchemy for usegalaxy branch --- doc/source/admin/galaxy_options.rst | 23 ++- .../admin/special_topics/interactivetools.rst | 13 ++ lib/galaxy/config/__init__.py | 46 ++++- lib/galaxy/config/sample/galaxy.yml.sample | 17 +- lib/galaxy/config/schemas/config_schema.yml | 16 +- lib/galaxy/managers/interactivetool.py | 173 ++++++++---------- test/unit/config/test_config_values.py | 38 ++++ 7 files changed, 217 insertions(+), 109 deletions(-) diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index cdf82ab688fd..d2d2994999b7 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -2128,13 +2128,34 @@ ~~~~~~~~~~~~~~~~~~~~~~~~ :Description: - Map for interactivetool proxy. + Map for the interactivetool proxy. Mappings are stored in a SQLite + database file located on this path. As an alternative, you may + also store them in any other RDBMS supported by SQLAlchemy using + the option ``interactivetoolsproxy_map``, which overrides this + one. The value of this option will be resolved with respect to . :Default: ``interactivetools_map.sqlite`` :Type: str +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``interactivetoolsproxy_map`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Use a database supported by SQLAlchemy as map for the + interactivetool proxy. When this option is set, the value of + ``interactivetools_map`` is ignored. The value of this option must + be a `SQLAlchemy database URL + `_. + Mappings are written to the table "gxitproxy" within the database. + This value cannot match ``database_connection`` nor + ``install_database_connection``. +:Default: ``None`` +:Type: str + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``interactivetools_prefix`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/admin/special_topics/interactivetools.rst b/doc/source/admin/special_topics/interactivetools.rst index 6d032c02d05c..f66d895e8725 100644 --- a/doc/source/admin/special_topics/interactivetools.rst +++ b/doc/source/admin/special_topics/interactivetools.rst @@ -102,6 +102,19 @@ The ``gx-it-proxy`` config relates to an important service in the InteractiveToo proxy. ``gx-it-proxy`` runs as a separate process listening at port 4002 (by default). HTTP requests are decoded based on the URL and headers, then somewhat massaged, and finally forwarded to the correct entry point port of the target InteractiveTool. +.. note:: + + Entry point mappings used by the proxy are stored on a SQLite database file located at ``interactivetools_map``. In + `some situations `_, + SQLite may not be the best choice. A common case is a high-availability production setup, meaning that multiple + copies of Galaxy are running on different servers behind a load balancer. + + For these situations, there exists an optional |configuration option interactivetoolsproxy_map|_ that allows using + any database supported by SQLAlchemy (it overrides ``interactivetools_map``). + +.. |configuration option interactivetoolsproxy_map| replace:: configuration option ``interactivetoolsproxy_map`` +.. _configuration option interactivetoolsproxy_map: ../config.html#interactivetoolsproxy-map + .. note:: A previous config option ``interactivetools_shorten_url`` was removed in commit `#73100de `_ diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py index ce189b894fdc..ecf8f8aebcfb 100644 --- a/lib/galaxy/config/__init__.py +++ b/lib/galaxy/config/__init__.py @@ -1103,10 +1103,46 @@ def _process_config(self, kwargs: Dict[str, Any]) -> None: self.proxy_session_map = self.dynamic_proxy_session_map self.manage_dynamic_proxy = self.dynamic_proxy_manage # Set to false if being launched externally - # InteractiveTools propagator mapping file - self.interactivetools_map = self._in_root_dir( - kwargs.get("interactivetools_map", self._in_data_dir("interactivetools_map.sqlite")) - ) + # Interactive tools proxy mapping + if self.interactivetoolsproxy_map is None: + self.interactivetools_map = "sqlite:///" + self._in_root_dir( + kwargs.get("interactivetools_map", self._in_data_dir("interactivetools_map.sqlite")) + ) + else: + self.interactivetools_map = None # overridden by `self.interactivetoolsproxy_map` + + # ensure the database URL for the SQLAlchemy map does not match that of a Galaxy DB + urls = { + setting: urlparse(value) + for setting, value in ( + ("interactivetoolsproxy_map", self.interactivetoolsproxy_map), + ("database_connection", self.database_connection), + ("install_database_connection", self.install_database_connection), + ) + if value is not None + } + + def is_in_conflict(url1, url2): + return all( + ( + url1.scheme == url2.scheme, + url1.hostname == url2.hostname, + url1.port == url2.port, + url1.path == url2.path, + ) + ) + + conflicting_settings = { + setting + for setting, url in tuple(urls.items())[1:] # exclude "interactivetoolsproxy_map" + if is_in_conflict(url, list(urls.values())[0]) # compare with "interactivetoolsproxy_map" + } + + if conflicting_settings: + raise ConfigurationError( + f"Option `{tuple(urls)[0]}` cannot take the same value as: %s" + % ", ".join(f"`{setting}`" for setting in conflicting_settings) + ) # Compliance/Policy variables self.redact_username_during_deletion = False @@ -1227,6 +1263,8 @@ def try_parsing(value, name): try_parsing(self.database_connection, "database_connection") try_parsing(self.install_database_connection, "install_database_connection") + if self.interactivetoolsproxy_map is not None: + try_parsing(self.interactivetoolsproxy_map, "interactivetoolsproxy_map") try_parsing(self.amqp_internal_connection, "amqp_internal_connection") def _configure_dataset_storage(self): diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index bb4c6fc32d4f..00ea42a13080 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -191,7 +191,7 @@ gravity: # Routes file to monitor. # Should be set to the same path as ``interactivetools_map`` in the ``galaxy:`` section. This is ignored if - # ``interactivetools_map is set``. + # ``interactivetools_map is set. # sessions: database/interactivetools_map.sqlite # Include verbose messages in gx-it-proxy @@ -1357,11 +1357,24 @@ galaxy: # subdomain. Defaults to "/". #interactivetools_base_path: / - # Map for interactivetool proxy. + # Map for the interactivetool proxy. Mappings are stored in a SQLite + # database file located on this path. As an alternative, you may also + # store them in any other RDBMS supported by SQLAlchemy using the + # option ``interactivetoolsproxy_map``, which overrides this one. # The value of this option will be resolved with respect to # . #interactivetools_map: interactivetools_map.sqlite + # Use a database supported by SQLAlchemy as map for the + # interactivetool proxy. When this option is set, the value of + # ``interactivetools_map`` is ignored. The value of this option must + # be a `SQLAlchemy database URL + # `_. + # Mappings are written to the table "gxitproxy" within the database. + # This value cannot match ``database_connection`` nor + # ``install_database_connection``. + #interactivetoolsproxy_map: null + # Prefix to use in the formation of the subdomain or path for # interactive tools #interactivetools_prefix: interactivetool diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index aa7f4891936d..b16b2d3fd8fb 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -1520,7 +1520,21 @@ mapping: default: interactivetools_map.sqlite path_resolves_to: data_dir desc: | - Map for interactivetool proxy. + Map for the interactivetool proxy. Mappings are stored in a SQLite database file + located on this path. As an alternative, you may also store them in any other RDBMS + supported by SQLAlchemy using the option ``interactivetoolsproxy_map``, which + overrides this one. + + interactivetoolsproxy_map: + type: str + required: false + desc: | + Use a database supported by SQLAlchemy as map for the interactivetool proxy. + When this option is set, the value of ``interactivetools_map`` is ignored. The + value of this option must be a + `SQLAlchemy database URL `_. + Mappings are written to the table "gxitproxy" within the database. This value cannot match + ``database_connection`` nor ``install_database_connection``. interactivetools_prefix: type: str diff --git a/lib/galaxy/managers/interactivetool.py b/lib/galaxy/managers/interactivetool.py index 8cec876ba561..f96fac35d96b 100644 --- a/lib/galaxy/managers/interactivetool.py +++ b/lib/galaxy/managers/interactivetool.py @@ -1,14 +1,22 @@ import json import logging -import sqlite3 from urllib.parse import ( urlsplit, urlunsplit, ) from sqlalchemy import ( + Column, + create_engine, + delete, + insert, + Integer, + MetaData, or_, select, + String, + Table, + Text, ) from galaxy import exceptions @@ -18,94 +26,67 @@ ) from galaxy.model.base import transaction from galaxy.security.idencoding import IdAsLowercaseAlphanumEncodingHelper -from galaxy.util.filelock import FileLock log = logging.getLogger(__name__) -DATABASE_TABLE_NAME = "gxitproxy" - - -class InteractiveToolSqlite: - def __init__(self, sqlite_filename, encode_id): - self.sqlite_filename = sqlite_filename - self.encode_id = encode_id - - def get(self, key, key_type): - with FileLock(self.sqlite_filename): - conn = sqlite3.connect(self.sqlite_filename) - try: - c = conn.cursor() - select = f"""SELECT token, host, port, info - FROM {DATABASE_TABLE_NAME} - WHERE key=? and key_type=?""" - c.execute( - select, - ( - key, - key_type, - ), - ) - try: - token, host, port, info = c.fetchone() - except TypeError: - log.warning("get(): invalid key: %s key_type %s", key, key_type) - return None - return dict(key=key, key_type=key_type, token=token, host=host, port=port, info=info) - finally: - conn.close() +gxitproxy = Table( + "gxitproxy", + MetaData(), + Column("key", String(16), primary_key=True), + Column("key_type", Text(), primary_key=True), + Column("token", String(32)), + Column("host", Text()), + Column("port", Integer()), + Column("info", Text()), +) + + +class InteractiveToolPropagatorSQLAlchemy: + """ + Propagator for InteractiveToolManager implemented using SQLAlchemy. + """ + + def __init__(self, database_url, encode_id): + """ + Constructor that sets up the propagator using a SQLAlchemy database URL. + + :param database_url: SQLAlchemy database URL, read more on the SQLAlchemy documentation + https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls. + :param encode_id: A helper class that can encode ids as lowercase alphanumeric strings and vice versa. + """ + self._engine = create_engine(database_url) + self._encode_id = encode_id def save(self, key, key_type, token, host, port, info=None): """ - Writeout a key, key_type, token, value store that is can be used for coordinating - with external resources. + Write out a key, key_type, token, value store that is can be used for coordinating with external resources. """ assert key, ValueError("A non-zero length key is required.") assert key_type, ValueError("A non-zero length key_type is required.") assert token, ValueError("A non-zero length token is required.") - with FileLock(self.sqlite_filename): - conn = sqlite3.connect(self.sqlite_filename) - try: - c = conn.cursor() - try: - # Create table - c.execute( - f"""CREATE TABLE {DATABASE_TABLE_NAME} - (key text, - key_type text, - token text, - host text, - port integer, - info text, - PRIMARY KEY (key, key_type) - )""" - ) - except Exception: - pass - delete = f"""DELETE FROM {DATABASE_TABLE_NAME} WHERE key=? and key_type=?""" - c.execute( - delete, - ( - key, - key_type, - ), - ) - insert = f"""INSERT INTO {DATABASE_TABLE_NAME} - (key, key_type, token, host, port, info) - VALUES (?, ?, ?, ?, ?, ?)""" - c.execute( - insert, - ( - key, - key_type, - token, - host, - port, - info, - ), - ) - conn.commit() - finally: - conn.close() + with self._engine.connect() as conn: + # create database table if not exists + gxitproxy.create(conn, checkfirst=True) + + # delete existing data with same key + stmt_delete = delete(gxitproxy).where( + gxitproxy.c.key == key, + gxitproxy.c.key_type == key_type, + ) + conn.execute(stmt_delete) + + # save data + stmt_insert = insert(gxitproxy).values( + key=key, + key_type=key_type, + token=token, + host=host, + port=port, + info=info, + ) + conn.execute(stmt_insert) + + conn.commit() def remove(self, **kwd): """ @@ -113,30 +94,17 @@ def remove(self, **kwd): with external resources. Remove entries that match all provided key=values """ assert kwd, ValueError("You must provide some values to key upon") - delete = f"DELETE FROM {DATABASE_TABLE_NAME} WHERE" - value_list = [] - for i, (key, value) in enumerate(kwd.items()): - if i != 0: - delete += " and" - delete += f" {key}=?" - value_list.append(value) - with FileLock(self.sqlite_filename): - conn = sqlite3.connect(self.sqlite_filename) - try: - c = conn.cursor() - try: - # Delete entry - c.execute(delete, tuple(value_list)) - except Exception as e: - log.debug("Error removing entry (%s): %s", delete, e) - conn.commit() - finally: - conn.close() + with self._engine.connect() as conn: + stmt = delete(gxitproxy).where( + *(gxitproxy.c[key] == value for key, value in kwd.items()), + ) + conn.execute(stmt) + conn.commit() def save_entry_point(self, entry_point): """Convenience method to easily save an entry_point.""" return self.save( - self.encode_id(entry_point.id), + self._encode_id(entry_point.id), entry_point.__class__.__name__.lower(), entry_point.token, entry_point.host, @@ -151,7 +119,7 @@ def save_entry_point(self, entry_point): def remove_entry_point(self, entry_point): """Convenience method to easily remove an entry_point.""" - return self.remove(key=self.encode_id(entry_point.id), key_type=entry_point.__class__.__name__.lower()) + return self.remove(key=self._encode_id(entry_point.id), key_type=entry_point.__class__.__name__.lower()) class InteractiveToolManager: @@ -166,7 +134,10 @@ def __init__(self, app): self.sa_session = app.model.context self.job_manager = app.job_manager self.encoder = IdAsLowercaseAlphanumEncodingHelper(app.security) - self.propagator = InteractiveToolSqlite(app.config.interactivetools_map, self.encoder.encode_id) + self.propagator = InteractiveToolPropagatorSQLAlchemy( + app.config.interactivetoolsproxy_map or app.config.interactivetools_map, + self.encoder.encode_id, + ) def create_entry_points(self, job, tool, entry_points=None, flush=True): entry_points = entry_points or tool.ports diff --git a/test/unit/config/test_config_values.py b/test/unit/config/test_config_values.py index 2f00824839d0..753fbf7b1f2d 100644 --- a/test/unit/config/test_config_values.py +++ b/test/unit/config/test_config_values.py @@ -67,6 +67,44 @@ def test_error_if_database_connection_contains_brackets(bracket): config.GalaxyAppConfiguration(override_tempdir=False, amqp_internal_connection=uri) +def test_error_if_interactivetoolsproxy_map_matches_other_database_connections(): + """ + The setting `interactivetoolsproxy_map` allows storing the session map in a + database supported by SQLAlchemy. This database must be different from the Galaxy database + and the tool shed database. + + Motivation for this constraint: + https://github.com/galaxyproject/galaxy/pull/18481#issuecomment-2218493956 + """ + database_connection = "dbscheme://user:password@host/db" + install_database_connection = "dbscheme://user:password@host/install_db" + settings = dict( + override_tempdir=False, + database_connection=database_connection, + install_database_connection=install_database_connection, + ) + + with pytest.raises(ConfigurationError): + # interactivetoolsproxy_map matches database_connection + config.GalaxyAppConfiguration( + **settings, + interactivetoolsproxy_map=database_connection, + ) + + with pytest.raises(ConfigurationError): + # interactivetoolsproxy_map matches install_database_connection + config.GalaxyAppConfiguration( + **settings, + interactivetoolsproxy_map=install_database_connection, + ) + + # interactivetoolsproxy_map differs from database_connection, install_database_connection + config.GalaxyAppConfiguration( + **settings, + interactivetoolsproxy_map="dbscheme://user:password@host/gxitproxy", + ) + + class TestIsFetchWithCeleryEnabled: def test_disabled_if_celery_disabled(self, appconfig): appconfig.enable_celery_tasks = False