From 3ffd598a1c559f9d9bb3b7e43cd8a24454c3f3de Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 8 Nov 2023 14:08:02 +0100 Subject: [PATCH 01/33] Do a real http request when performing name uniqueness check (#2942) When running in containers it is possible that the traffic is routed using `docker-proxy`, which listens on the port and accepting incoming connections. This commit effectively sticks to the original solution from #2878 --- patroni/__main__.py | 16 +++++++--------- patroni/log.py | 29 +++++++++++++++++++++-------- tests/test_patroni.py | 8 ++++---- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/patroni/__main__.py b/patroni/__main__.py index 02ba56da9..229ccfb9b 100644 --- a/patroni/__main__.py +++ b/patroni/__main__.py @@ -107,8 +107,6 @@ def load_dynamic_configuration(self) -> None: def ensure_unique_name(self) -> None: """A helper method to prevent splitbrain from operator naming error.""" - from urllib.parse import urlparse - from urllib3.connection import HTTPConnection from patroni.dcs import Member cluster = self.dcs.get_cluster() @@ -118,14 +116,14 @@ def ensure_unique_name(self) -> None: if not isinstance(member, Member): return try: - parts = urlparse(member.api_url) - if isinstance(parts.hostname, str): - connection = HTTPConnection(parts.hostname, port=parts.port or 80, timeout=3) - connection.connect() - logger.fatal("Can't start; there is already a node named '%s' running", self.config['name']) - sys.exit(1) + # Silence annoying WARNING: Retrying (...) messages when Patroni is quickly restarted. + # At this moment we don't have custom log levels configured and hence shouldn't lose anything useful. + self.logger.update_loggers({'urllib3.connectionpool': 'ERROR'}) + _ = self.request(member, endpoint="/liveness", timeout=3) + logger.fatal("Can't start; there is already a node named '%s' running", self.config['name']) + sys.exit(1) except Exception: - return + self.logger.update_loggers({}) def _get_tags(self) -> Dict[str, Any]: """Get tags configured for this node, if any. diff --git a/patroni/log.py b/patroni/log.py index 09d738830..6ac67a17d 100644 --- a/patroni/log.py +++ b/patroni/log.py @@ -202,24 +202,37 @@ def __init__(self) -> None: self._proxy_handler = ProxyHandler(self) self._root_logger.addHandler(self._proxy_handler) - def update_loggers(self) -> None: - """Configure loggers' log level as defined in ``log.loggers`` section of Patroni configuration. + def update_loggers(self, config: Dict[str, Any]) -> None: + """Configure custom loggers' log levels. .. note:: It creates logger objects that are not defined yet in the log manager. + + :param config: :class:`dict` object with custom loggers configuration, is set either from: + + * ``log.loggers`` section of Patroni configuration; or + + * from the method that is trying to make sure that the node name + isn't duplicated (to silence annoying ``urllib3`` WARNING's). + + :Example: + + .. code-block:: python + + update_loggers({'urllib3.connectionpool': 'WARNING'}) """ - loggers = deepcopy((self._config or {}).get('loggers') or {}) + loggers = deepcopy(config) for name, logger in self._root_logger.manager.loggerDict.items(): # ``Placeholder`` is a node in the log manager for which no logger has been defined. We are interested only # in the ones that were defined if not isinstance(logger, logging.PlaceHolder): - # if this logger is present in ``log.loggers`` Patroni configuration, use the configured level, - # otherwise use ``logging.NOTSET``, which means it will inherit the level from any parent node up to - # the root for which log level is defined. + # if this logger is present in *config*, use the configured level, otherwise + # use ``logging.NOTSET``, which means it will inherit the level + # from any parent node up to the root for which log level is defined. level = loggers.pop(name, logging.NOTSET) logger.setLevel(level) - # define loggers that do not exist yet and set level as configured in ``log.loggers`` section of configuration. + # define loggers that do not exist yet and set level as configured in the *config* for name, level in loggers.items(): logger = self._root_logger.manager.getLogger(name) logger.setLevel(level) @@ -274,7 +287,7 @@ def reload_config(self, config: Dict[str, Any]) -> None: self.log_handler = new_handler self._config = config.copy() - self.update_loggers() + self.update_loggers(config.get('loggers') or {}) def _close_old_handlers(self) -> None: """Close old log handlers. diff --git a/tests/test_patroni.py b/tests/test_patroni.py index 19497ab54..bf9e28712 100644 --- a/tests/test_patroni.py +++ b/tests/test_patroni.py @@ -45,7 +45,7 @@ class MockFrozenImporter(object): @patch('time.sleep', Mock()) @patch('subprocess.call', Mock(return_value=0)) @patch('patroni.psycopg.connect', psycopg_connect) -@patch('urllib3.connection.HTTPConnection.connect', Mock(side_effect=Exception)) +@patch('urllib3.PoolManager.request', Mock(side_effect=Exception)) @patch.object(ConfigHandler, 'append_pg_hba', Mock()) @patch.object(ConfigHandler, 'write_postgresql_conf', Mock()) @patch.object(ConfigHandler, 'write_recovery_conf', Mock()) @@ -69,7 +69,7 @@ def test_validate_config(self): self.assertRaises(SystemExit, _main) @patch('pkgutil.iter_importers', Mock(return_value=[MockFrozenImporter()])) - @patch('urllib3.connection.HTTPConnection.connect', Mock(side_effect=Exception)) + @patch('urllib3.PoolManager.request', Mock(side_effect=Exception)) @patch('sys.frozen', Mock(return_value=True), create=True) @patch.object(HTTPServer, '__init__', Mock()) @patch.object(etcd.Client, 'read', etcd_read) @@ -273,8 +273,8 @@ def test_ensure_unique_name(self): ) with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=bad_cluster)): # If the api of the running node cannot be reached, this implies unique name - with patch('urllib3.connection.HTTPConnection.connect', Mock(side_effect=ConnectionError)): + with patch('urllib3.PoolManager.request', Mock(side_effect=ConnectionError)): self.assertIsNone(self.p.ensure_unique_name()) # Only if the api of the running node is reachable do we throw an error - with patch('urllib3.connection.HTTPConnection.connect', Mock()): + with patch('urllib3.PoolManager.request', Mock()): self.assertRaises(SystemExit, self.p.ensure_unique_name) From 1b96ae9c0ad47fc1c6ee64c8c184b53061bb50ad Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Thu, 9 Nov 2023 11:09:38 +0100 Subject: [PATCH 02/33] Fix Etcd v2 with Citus (#2943) When deploying a new Citus cluster with Etcd v2 Patroni was failing to start with the following exception: ```python 2023-11-09 10:51:41,246 INFO: Selected new etcd server http://localhost:2379 Traceback (most recent call last): File "/home/akukushkin/git/patroni/./patroni.py", line 6, in main() File "/home/akukushkin/git/patroni/patroni/__main__.py", line 343, in main return patroni_main(args.configfile) File "/home/akukushkin/git/patroni/patroni/__main__.py", line 237, in patroni_main abstract_main(Patroni, configfile) File "/home/akukushkin/git/patroni/patroni/daemon.py", line 172, in abstract_main controller = cls(config) File "/home/akukushkin/git/patroni/patroni/__main__.py", line 66, in __init__ self.ensure_unique_name() File "/home/akukushkin/git/patroni/patroni/__main__.py", line 112, in ensure_unique_name cluster = self.dcs.get_cluster() File "/home/akukushkin/git/patroni/patroni/dcs/__init__.py", line 1654, in get_cluster cluster = self._get_citus_cluster() if self.is_citus_coordinator() else self.__get_patroni_cluster() File "/home/akukushkin/git/patroni/patroni/dcs/__init__.py", line 1638, in _get_citus_cluster cluster = groups.pop(CITUS_COORDINATOR_GROUP_ID, Cluster.empty()) AttributeError: 'Cluster' object has no attribute 'pop' ``` It is broken since #2909. In addition to that fix `_citus_cluster_loader()` interface by allowing it to return only dict obj. --- patroni/dcs/__init__.py | 2 +- patroni/dcs/consul.py | 4 +--- patroni/dcs/etcd.py | 13 +++++++++---- tests/test_etcd.py | 2 ++ 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index 389cae868..9c2f73357 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -1564,7 +1564,7 @@ def _cluster_loader(self, path: Any) -> Cluster: """ @abc.abstractmethod - def _citus_cluster_loader(self, path: Any) -> Union[Cluster, Dict[int, Cluster]]: + def _citus_cluster_loader(self, path: Any) -> Dict[int, Cluster]: """Load and build all Patroni clusters from a single Citus cluster. :param path: the path in DCS where to load Cluster(s) from. diff --git a/patroni/dcs/consul.py b/patroni/dcs/consul.py index 58e200301..1324e6065 100644 --- a/patroni/dcs/consul.py +++ b/patroni/dcs/consul.py @@ -422,7 +422,7 @@ def _consistency(self) -> str: def _cluster_loader(self, path: str) -> Cluster: _, results = self.retry(self._client.kv.get, path, recurse=True, consistency=self._consistency) if results is None: - raise NotFound + return Cluster.empty() nodes = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') @@ -445,8 +445,6 @@ def _load_cluster( ) -> Union[Cluster, Dict[int, Cluster]]: try: return loader(path) - except NotFound: - return Cluster.empty() except Exception: logger.exception('get_cluster') raise ConsulError('Consul is not responding properly') diff --git a/patroni/dcs/etcd.py b/patroni/dcs/etcd.py index f242a6b25..3be699a64 100644 --- a/patroni/dcs/etcd.py +++ b/patroni/dcs/etcd.py @@ -710,13 +710,20 @@ def _cluster_from_nodes(self, etcd_index: int, nodes: Dict[str, etcd.EtcdResult] return Cluster(initialize, config, leader, status, members, failover, sync, history, failsafe) def _cluster_loader(self, path: str) -> Cluster: - result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) + try: + result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) + except etcd.EtcdKeyNotFound: + return Cluster.empty() nodes = {node.key[len(result.key):].lstrip('/'): node for node in result.leaves} return self._cluster_from_nodes(result.etcd_index, nodes) def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: + try: + result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) + except etcd.EtcdKeyNotFound: + return {} + clusters: Dict[int, Dict[str, etcd.EtcdResult]] = defaultdict(dict) - result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) for node in result.leaves: key = node.key[len(result.key):].lstrip('/').split('/', 1) if len(key) == 2 and citus_group_re.match(key[0]): @@ -729,8 +736,6 @@ def _load_cluster( cluster = None try: cluster = loader(path) - except etcd.EtcdKeyNotFound: - cluster = Cluster.empty() except Exception as e: self._handle_exception(e, 'get_cluster', raise_ex=EtcdError('Etcd is not responding properly')) self._has_failed = False diff --git a/tests/test_etcd.py b/tests/test_etcd.py index 90402b5f9..874aac5cc 100644 --- a/tests/test_etcd.py +++ b/tests/test_etcd.py @@ -274,6 +274,8 @@ def test__get_citus_cluster(self): cluster = self.etcd.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) + self.etcd._base_path = '/service/nocluster' + self.assertTrue(self.etcd.get_cluster().is_empty()) def test_touch_member(self): self.assertFalse(self.etcd.touch_member('')) From 7370f70f13f2db968ed860ea9e8137d275dee860 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Fri, 10 Nov 2023 09:23:45 +0100 Subject: [PATCH 03/33] Fix pg_rewind behavior with Postgres v16+ (#2944) The error message format was changed in https://github.com/postgres/postgres/commit/4ac30ba4f29d4b586b131404b0d514f16501272a, what caused `pg_rewind` being called by Patroni even when it was not necessary. --- patroni/postgresql/rewind.py | 41 ++++++++++++++++++++++++++---------- tests/test_rewind.py | 10 +++++++++ 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/patroni/postgresql/rewind.py b/patroni/postgresql/rewind.py index 6e1aab884..4a5283f7c 100644 --- a/patroni/postgresql/rewind.py +++ b/patroni/postgresql/rewind.py @@ -101,12 +101,26 @@ def check_leader_has_run_checkpoint(conn_kwargs: Dict[str, Any]) -> Optional[str return 'not accessible or not healty' def _get_checkpoint_end(self, timeline: int, lsn: int) -> int: - """The checkpoint record size in WAL depends on postgres major version and platform (memory alignment). - Hence, the only reliable way to figure out where it ends, read the record from file with the help of pg_waldump - and parse the output. We are trying to read two records, and expect that it will fail to read the second one: - `pg_waldump: fatal: error in WAL record at 0/182E220: invalid record length at 0/182E298: wanted 24, got 0` - The error message contains information about LSN of the next record, which is exactly where checkpoint ends.""" + """Get the end of checkpoint record from WAL. + .. note:: + The checkpoint record size in WAL depends on postgres major version and platform (memory alignment). + Hence, the only reliable way to figure out where it ends, is to read the record from file with the + help of ``pg_waldump`` and parse the output. + + We are trying to read two records, and expect that it will fail to read the second record with message: + + fatal: error in WAL record at 0/182E220: invalid record length at 0/182E298: wanted 24, got 0; or + + fatal: error in WAL record at 0/182E220: invalid record length at 0/182E298: expected at least 24, got 0 + + The error message contains information about LSN of the next record, which is exactly where checkpoint ends. + + :param timeline: the checkpoint *timeline* from ``pg_controldata``. + :param lsn: the checkpoint *location* as :class:`int` from ``pg_controldata``. + + :returns: the end of checkpoint record as :class:`int` or ``0`` if failed to parse ``pg_waldump`` output. + """ lsn8 = format_lsn(lsn, True) lsn_str = format_lsn(lsn) out, err = self._postgresql.waldump(timeline, lsn_str, 2) @@ -117,12 +131,17 @@ def _get_checkpoint_end(self, timeline: int, lsn: int) -> int: if len(out) == 1 and len(err) == 1 and ', lsn: {0}, prev '.format(lsn8) in out[0] and pattern in err[0]: i = err[0].find(pattern) + len(pattern) - j = err[0].find(": wanted ", i) - if j > -1: - try: - return parse_lsn(err[0][i:j]) - except Exception as e: - logger.error('Failed to parse lsn %s: %r', err[0][i:j], e) + # Message format depends on the major version: + # * expected at least -- starting from v16 + # * wanted -- before v16 + # We will simply check all possible combinations. + for pattern in (': expected at least ', ': wanted '): + j = err[0].find(pattern, i) + if j > -1: + try: + return parse_lsn(err[0][i:j]) + except Exception as e: + logger.error('Failed to parse lsn %s: %r', err[0][i:j], e) logger.error('Failed to parse pg_%sdump output', self._postgresql.wal_name) logger.error(' stdout=%s', '\n'.join(out)) logger.error(' stderr=%s', '\n'.join(err)) diff --git a/tests/test_rewind.py b/tests/test_rewind.py index a54c27e99..af8e7d974 100644 --- a/tests/test_rewind.py +++ b/tests/test_rewind.py @@ -180,6 +180,16 @@ def test__check_timeline_and_lsn(self, mock_check_leader_is_not_in_recovery, moc self.r.trigger_check_diverged_lsn() mock_get_local_timeline_lsn.return_value = (False, 2, 67197377) self.assertTrue(self.r.rewind_or_reinitialize_needed_and_possible(self.leader)) + + mock_popen.return_value.communicate.return_value = ( + b'0, lsn: 0/040159C1, prev 0/\n', + b'pg_waldump: fatal: error in WAL record at 0/40159C1: invalid record ' + b'length at 0/402DD98: expected at least 24, got 0\n' + ) + self.r.reset_state() + self.r.trigger_check_diverged_lsn() + self.assertFalse(self.r.rewind_or_reinitialize_needed_and_possible(self.leader)) + self.r.reset_state() self.r.trigger_check_diverged_lsn() mock_popen.side_effect = Exception From 1870dcd8f954822201f6de46402dd2671163b1f9 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Mon, 13 Nov 2023 15:01:57 +0100 Subject: [PATCH 04/33] Fix bug with custom bootstrap (#2948) Patroni was falsely applying `--command` argument. Close https://github.com/zalando/patroni/issues/2947 --- patroni/postgresql/bootstrap.py | 7 +++---- tests/test_bootstrap.py | 9 ++++++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/patroni/postgresql/bootstrap.py b/patroni/postgresql/bootstrap.py index 751c0797f..a544bd735 100644 --- a/patroni/postgresql/bootstrap.py +++ b/patroni/postgresql/bootstrap.py @@ -188,10 +188,9 @@ def _custom_bootstrap(self, config: Any) -> bool: params = [] if config.get('no_params') else ['--scope=' + self._postgresql.scope, '--datadir=' + self._postgresql.data_dir] # Add custom parameters specified by the user - reserved_args = {'no_params', 'keep_existing_recovery_conf', 'recovery_conf', 'scope', 'datadir'} - for arg, val in config.items(): - if arg not in reserved_args: - params.append(f"--{arg}={val}") + reserved_args = {'command', 'no_params', 'keep_existing_recovery_conf', 'recovery_conf', 'scope', 'datadir'} + params += [f"--{arg}={val}" for arg, val in config.items() if arg not in reserved_args] + try: logger.info('Running custom bootstrap script: %s', config['command']) if self._postgresql.cancellable.call(shlex.split(config['command']) + params) != 0: diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index 4c2d1c982..8724b03cb 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -179,10 +179,17 @@ def test_bootstrap(self): @patch.object(Postgresql, 'controldata', Mock(return_value={'Database cluster state': 'in production'})) def test_custom_bootstrap(self, mock_cancellable_subprocess_call): self.p.config._config.pop('pg_hba') - config = {'method': 'foo', 'foo': {'command': 'bar'}} + config = {'method': 'foo', 'foo': {'command': 'bar --arg1=val1'}} mock_cancellable_subprocess_call.return_value = 1 self.assertFalse(self.b.bootstrap(config)) + self.assertEqual(mock_cancellable_subprocess_call.call_args_list[0][0][0], + ['bar', '--arg1=val1', '--scope=batman', '--datadir=' + os.path.join('data', 'test0')]) + + mock_cancellable_subprocess_call.reset_mock() + config['foo']['no_params'] = 1 + self.assertFalse(self.b.bootstrap(config)) + self.assertEqual(mock_cancellable_subprocess_call.call_args_list[0][0][0], ['bar', '--arg1=val1']) mock_cancellable_subprocess_call.return_value = 0 with patch('multiprocessing.Process', Mock(side_effect=Exception("42"))), \ From ecf158bce38095fb711e9c598c6090d3428a02b9 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Tue, 14 Nov 2023 13:44:54 +0100 Subject: [PATCH 05/33] Get rid of pass_obj() in most of patronictl commands (#2945) The `obj` could be easily obtained with the help of `click.get_current_context().obj`. Introduced function `is_citus_cluster()` will simplify future refactoring to add support of other MPP databases. In addition to that refactor ctl.py unit tests by moving most of mocks to the global scope., --- patroni/ctl.py | 236 +++++++++++-------------- tests/test_ctl.py | 435 ++++++++++++++++++++-------------------------- 2 files changed, 293 insertions(+), 378 deletions(-) diff --git a/patroni/ctl.py b/patroni/ctl.py index b4c57873d..2d92457a4 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -255,15 +255,23 @@ def load_config(path: str, dcs_url: Optional[str]) -> Dict[str, Any]: return config +def _get_configuration() -> Dict[str, Any]: + """Get configuration object. + + :returns: configuration object from the current context. + """ + return click.get_current_context().obj['__config'] + + option_format = click.option('--format', '-f', 'fmt', help='Output format', default='pretty', type=click.Choice(['pretty', 'tsv', 'json', 'yaml', 'yml'])) option_watchrefresh = click.option('-w', '--watch', type=float, help='Auto update the screen every X seconds') option_watch = click.option('-W', is_flag=True, help='Auto update the screen every 2 seconds') option_force = click.option('--force', is_flag=True, help='Do not ask for confirmation at any point') arg_cluster_name = click.argument('cluster_name', required=False, - default=lambda: click.get_current_context().obj.get('scope')) + default=lambda: _get_configuration().get('scope')) option_default_citus_group = click.option('--group', required=False, type=int, help='Citus group', - default=lambda: click.get_current_context().obj.get('citus', {}).get('group')) + default=lambda: _get_configuration().get('citus', {}).get('group')) option_citus_group = click.option('--group', required=False, type=int, help='Citus group') role_choice = click.Choice(['leader', 'primary', 'standby-leader', 'replica', 'standby', 'any', 'master']) @@ -301,15 +309,23 @@ def ctl(ctx: click.Context, config_file: str, dcs_url: Optional[str], insecure: level = os.environ.get(name, level) logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=level) logging.captureWarnings(True) # Capture eventual SSL warning - ctx.obj = load_config(config_file, dcs_url) + config = load_config(config_file, dcs_url) # backward compatibility for configuration file where ctl section is not defined - ctx.obj.setdefault('ctl', {})['insecure'] = ctx.obj.get('ctl', {}).get('insecure') or insecure + config.setdefault('ctl', {})['insecure'] = config.get('ctl', {}).get('insecure') or insecure + ctx.obj = {'__config': config} + +def is_citus_cluster() -> bool: + """Check if we are working with Citus cluster. -def get_dcs(config: Dict[str, Any], scope: str, group: Optional[int]) -> AbstractDCS: + :returns: ``True`` if configuration has ``citus`` section, otherwise ``False``. + """ + return bool(_get_configuration().get('citus')) + + +def get_dcs(scope: str, group: Optional[int]) -> AbstractDCS: """Get the DCS object. - :param config: Patroni configuration. :param scope: cluster name. :param group: if *group* is defined, use it to select which alternative Citus group this DCS refers to. If *group* is ``None`` and a Citus configuration exists, assume this is the coordinator. Coordinator has the group ``0``. @@ -320,13 +336,14 @@ def get_dcs(config: Dict[str, Any], scope: str, group: Optional[int]) -> Abstrac :raises: :class:`PatroniCtlException`: if not suitable DCS configuration could be found. """ + config = _get_configuration() config.update({'scope': scope, 'patronictl': True}) if group is not None: config['citus'] = {'group': group} config.setdefault('name', scope) try: dcs = _get_dcs(config) - if config.get('citus') and group is None: + if is_citus_cluster() and group is None: dcs.is_citus_coordinator = lambda: True return dcs except PatroniException as e: @@ -347,7 +364,7 @@ def request_patroni(member: Member, method: str = 'GET', ctx = click.get_current_context() # the current click context request_executor = ctx.obj.get('__request_patroni') if not request_executor: - request_executor = ctx.obj['__request_patroni'] = PatroniRequest(ctx.obj) + request_executor = ctx.obj['__request_patroni'] = PatroniRequest(_get_configuration()) return request_executor(member, method, endpoint, data) @@ -452,11 +469,9 @@ def watching(w: bool, watch: Optional[int], max_count: Optional[int] = None, cle yield 0 -def get_all_members(obj: Dict[str, Any], cluster: Cluster, - group: Optional[int], role: str = 'leader') -> Iterator[Member]: +def get_all_members(cluster: Cluster, group: Optional[int], role: str = 'leader') -> Iterator[Member]: """Get all cluster members that have the given *role*. - :param obj: the Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members from. If ``None`` get from all groups. :param role: role to filter members. Can be one among: @@ -470,7 +485,7 @@ def get_all_members(obj: Dict[str, Any], cluster: Cluster, :yields: members that have the given *role*. """ clusters = {0: cluster} - if obj.get('citus') and group is None: + if is_citus_cluster() and group is None: clusters.update(cluster.workers) if role in ('leader', 'master', 'primary', 'standby-leader'): # In the DCS the members' role can be one among: ``primary``, ``master``, ``replica`` or ``standby_leader``. @@ -492,11 +507,10 @@ def get_all_members(obj: Dict[str, Any], cluster: Cluster, yield m -def get_any_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], +def get_any_member(cluster: Cluster, group: Optional[int], role: Optional[str] = None, member: Optional[str] = None) -> Optional[Member]: """Get the first found cluster member that has the given *role*. - :param obj: the Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members from. If ``None`` get from all groups. :param role: role to filter members. See :func:`get_all_members` for available options. @@ -514,7 +528,7 @@ def get_any_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], elif role is None: role = 'leader' - for m in get_all_members(obj, cluster, group, role): + for m in get_all_members(cluster, group, role): if member is None or m.name == member: return m @@ -535,7 +549,7 @@ def get_all_members_leader_first(cluster: Cluster) -> Iterator[Member]: yield member -def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], connect_parameters: Dict[str, Any], +def get_cursor(cluster: Cluster, group: Optional[int], connect_parameters: Dict[str, Any], role: Optional[str] = None, member_name: Optional[str] = None) -> Union['cursor', 'Cursor[Any]', None]: """Get a cursor object to execute queries against a member that has the given *role* or *member_name*. @@ -544,7 +558,6 @@ def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], conn * ``fallback_application_name``: as ``Patroni ctl``; * ``connect_timeout``: as ``5``. - :param obj: the Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members to create a cursor against. If ``None`` consider members from all groups. @@ -559,7 +572,7 @@ def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], conn * A :class:`psycopg2.extensions.cursor` if using :mod:`psycopg2`; * ``None`` if not able to get a cursor that attendees *role* and *member_name*. """ - member = get_any_member(obj, cluster, group, role=role, member=member_name) + member = get_any_member(cluster, group, role=role, member=member_name) if member is None: return None @@ -594,7 +607,7 @@ def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], conn return None -def get_members(obj: Dict[str, Any], cluster: Cluster, cluster_name: str, member_names: List[str], role: str, +def get_members(cluster: Cluster, cluster_name: str, member_names: List[str], role: str, force: bool, action: str, ask_confirmation: bool = True, group: Optional[int] = None) -> List[Member]: """Get the list of members based on the given filters. @@ -618,7 +631,6 @@ def get_members(obj: Dict[str, Any], cluster: Cluster, cluster_name: str, member ``ask_confirmation=False``, and later call :func:`confirm_members_action` manually in the caller method. That way the workflow won't look broken to the user that is interacting with ``patronictl``. - :param obj: Patroni configuration. :param cluster: Patroni cluster. :param cluster_name: name of the Patroni cluster. :param member_names: used to filter which members should take the *action* based on their names. Each item is the @@ -647,13 +659,13 @@ def get_members(obj: Dict[str, Any], cluster: Cluster, cluster_name: str, member * Cluster does not have members that match the given *member_names*; or * No member with given *role* is found among the specified *member_names*. """ - members = list(get_all_members(obj, cluster, group, role)) + members = list(get_all_members(cluster, group, role)) candidates = {m.name for m in members} if not force or role: if not member_names and not candidates: raise PatroniCtlException('{0} cluster doesn\'t have any members'.format(cluster_name)) - output_members(obj, cluster, cluster_name, group=group) + output_members(cluster, cluster_name, group=group) if member_names: member_names = list(set(member_names) & candidates) @@ -713,9 +725,7 @@ def confirm_members_action(members: List[Member], force: bool, action: str, @click.option('--member', '-m', help='Generate a dsn for this member', type=str) @arg_cluster_name @option_citus_group -@click.pass_obj -def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - role: Optional[str], member: Optional[str]) -> None: +def dsn(cluster_name: str, group: Optional[int], role: Optional[str], member: Optional[str]) -> None: """Process ``dsn`` command of ``patronictl`` utility. Get DSN to connect to *member*. @@ -723,7 +733,6 @@ def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. note:: If no *role* nor *member* is given assume *role* as ``leader``. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get members to get DSN from. Refer to the module note for more details. @@ -736,8 +745,8 @@ def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], * both *role* and *member* are provided; or * No member matches requested *member* or *role*. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() - m = get_any_member(obj, cluster, group, role=role, member=member) + cluster = get_dcs(cluster_name, group).get_cluster() + m = get_any_member(cluster, group, role=role, member=member) if m is None: raise PatroniCtlException('Can not find a suitable member') @@ -759,9 +768,7 @@ def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], @click.option('--delimiter', help='The column delimiter', default='\t') @click.option('--command', '-c', help='The SQL commands to execute') @click.option('-d', '--dbname', help='database name to connect to', type=str) -@click.pass_obj def query( - obj: Dict[str, Any], cluster_name: str, group: Optional[int], role: Optional[str], @@ -780,7 +787,6 @@ def query( Perform a Postgres query in a Patroni node. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get members from to perform the query. Refer to the module note for more details. @@ -820,24 +826,22 @@ def query( if dbname: connect_parameters['dbname'] = dbname - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = cursor = None for _ in watching(w, watch, clear=False): if cluster is None: cluster = dcs.get_cluster() -# cursor = get_cursor(obj, cluster, group, connect_parameters, role=role, member=member) - output, header = query_member(obj, cluster, group, cursor, member, role, sql, connect_parameters) + output, header = query_member(cluster, group, cursor, member, role, sql, connect_parameters) print_output(header, output, fmt=fmt, delimiter=delimiter) -def query_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], - cursor: Union['cursor', 'Cursor[Any]', None], member: Optional[str], role: Optional[str], - command: str, connect_parameters: Dict[str, Any]) -> Tuple[List[List[Any]], Optional[List[Any]]]: +def query_member(cluster: Cluster, group: Optional[int], cursor: Union['cursor', 'Cursor[Any]', None], + member: Optional[str], role: Optional[str], command: str, + connect_parameters: Dict[str, Any]) -> Tuple[List[List[Any]], Optional[List[Any]]]: """Execute SQL *command* against a member. - :param obj: Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members from to perform the query. Refer to the module note for more details. @@ -866,7 +870,7 @@ def query_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], from . import psycopg try: if cursor is None: - cursor = get_cursor(obj, cluster, group, connect_parameters, role=role, member_name=member) + cursor = get_cursor(cluster, group, connect_parameters, role=role, member_name=member) if cursor is None: if member is not None: @@ -893,13 +897,11 @@ def query_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], @click.argument('cluster_name') @option_citus_group @option_format -@click.pass_obj -def remove(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: str) -> None: +def remove(cluster_name: str, group: Optional[int], fmt: str) -> None: """Process ``remove`` command of ``patronictl`` utility. Remove cluster *cluster_name* from the DCS. - :param obj: Patroni configuration. :param cluster_name: name of the cluster which information will be wiped out of the DCS. :param group: which Citus group should have its information wiped out of the DCS. Refer to the module note for more details. @@ -913,12 +915,12 @@ def remove(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: st * use did not type the correct leader name when requesting removal of a healthy cluster. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - if obj.get('citus') and group is None: + if is_citus_cluster() and group is None: raise PatroniCtlException('For Citus clusters the --group must me specified') - output_members(obj, cluster, cluster_name, fmt=fmt) + output_members(cluster, cluster_name, fmt=fmt) confirm = click.prompt('Please confirm the cluster name to remove', type=str) if confirm != cluster_name: @@ -1003,24 +1005,21 @@ def parse_scheduled(scheduled: Optional[str]) -> Optional[datetime.datetime]: @option_citus_group @click.option('--role', '-r', help='Reload only members with this role', type=role_choice, default='any') @option_force -@click.pass_obj -def reload(obj: Dict[str, Any], cluster_name: str, member_names: List[str], - group: Optional[int], force: bool, role: str) -> None: +def reload(cluster_name: str, member_names: List[str], group: Optional[int], force: bool, role: str) -> None: """Process ``reload`` command of ``patronictl`` utility. Reload configuration of cluster members based on given filters. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param member_names: name of the members which configuration should be reloaded. :param group: filter which Citus group we should reload members. Refer to the module note for more details. :param force: perform the reload without asking for confirmations. :param role: role to filter members. See :func:`get_all_members` for available options. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - members = get_members(obj, cluster, cluster_name, member_names, role, force, 'reload', group=group) + members = get_members(cluster, cluster_name, member_names, role, force, 'reload', group=group) for member in members: r = request_patroni(member, 'post', 'reload') @@ -1050,15 +1049,13 @@ def reload(obj: Dict[str, Any], cluster_name: str, member_names: List[str], @click.option('--pending', help='Restart if pending', is_flag=True) @click.option('--timeout', help='Return error and fail over if necessary when restarting takes longer than this.') @option_force -@click.pass_obj -def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member_names: List[str], +def restart(cluster_name: str, group: Optional[int], member_names: List[str], force: bool, role: str, p_any: bool, scheduled: Optional[str], version: Optional[str], pending: bool, timeout: Optional[str]) -> None: """Process ``restart`` command of ``patronictl`` utility. Restart Postgres on cluster members based on given filters. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should restart members. Refer to the module note for more details. :param member_names: name of the members that should be restarted. @@ -1076,9 +1073,9 @@ def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member * *version* could not be parsed; or * a restart is attempted against a cluster that is in maintenance mode. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() + cluster = get_dcs(cluster_name, group).get_cluster() - members = get_members(obj, cluster, cluster_name, member_names, role, force, 'restart', False, group=group) + members = get_members(cluster, cluster_name, member_names, role, force, 'restart', False, group=group) if scheduled is None and not force: next_hour = (datetime.datetime.now() + datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M') scheduled = click.prompt('When should the restart take place (e.g. ' + next_hour + ') ', @@ -1140,9 +1137,7 @@ def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member @click.argument('member_names', nargs=-1) @option_force @click.option('--wait', help='Wait until reinitialization completes', is_flag=True) -@click.pass_obj -def reinit(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - member_names: List[str], force: bool, wait: bool) -> None: +def reinit(cluster_name: str, group: Optional[int], member_names: List[str], force: bool, wait: bool) -> None: """Process ``reinit`` command of ``patronictl`` utility. Reinitialize cluster members based on given filters. @@ -1150,15 +1145,14 @@ def reinit(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. note:: Only reinitialize replica members, not a leader. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should reinit members. Refer to the module note for more details. :param member_names: name of the members that should be reinitialized. :param force: perform the restart without asking for confirmations. :param wait: wait for the operation to complete. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() - members = get_members(obj, cluster, cluster_name, member_names, 'replica', force, 'reinitialize', group=group) + cluster = get_dcs(cluster_name, group).get_cluster() + members = get_members(cluster, cluster_name, member_names, 'replica', force, 'reinitialize', group=group) wait_on_members: List[Member] = [] for member in members: @@ -1189,8 +1183,8 @@ def reinit(obj: Dict[str, Any], cluster_name: str, group: Optional[int], wait_on_members.remove(member) -def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: str, - group: Optional[int], leader: Optional[str], candidate: Optional[str], +def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[int], + leader: Optional[str], candidate: Optional[str], force: bool, scheduled: Optional[str] = None) -> None: """Perform a failover or a switchover operation in the cluster. @@ -1200,7 +1194,6 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s .. note:: If not able to perform the operation through the REST API, write directly to the DCS as a fall back. - :param obj: Patroni configuration. :param action: action to be taken -- ``failover`` or ``switchover``. :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a failover or switchover. If ``None``, user will be @@ -1222,17 +1215,17 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s * trying to schedule a switchover in a cluster that is in maintenance mode; or * user aborts the operation. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() click.echo('Current cluster topology') - output_members(obj, cluster, cluster_name, group=group) + output_members(cluster, cluster_name, group=group) - if obj.get('citus') and group is None: + if is_citus_cluster() and group is None: if force: raise PatroniCtlException('For Citus clusters the --group must me specified') else: group = click.prompt('Citus group', type=int) - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() global_config = get_global_config(cluster) @@ -1342,7 +1335,7 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s click.echo('{0} Could not {1} using Patroni api, falling back to DCS'.format(timestamp(), action)) dcs.manual_failover(leader, candidate, scheduled_at=scheduled_at) - output_members(obj, cluster, cluster_name, group=group) + output_members(cluster, cluster_name, group=group) @ctl.command('failover', help='Failover to a replica') @@ -1351,8 +1344,7 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s @click.option('--leader', '--primary', '--master', 'leader', help='The name of the current leader', default=None) @click.option('--candidate', help='The name of the candidate', default=None) @option_force -@click.pass_obj -def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], +def failover(cluster_name: str, group: Optional[int], leader: Optional[str], candidate: Optional[str], force: bool) -> None: """Process ``failover`` command of ``patronictl`` utility. @@ -1366,7 +1358,6 @@ def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. seealso:: Refer to :func:`_do_failover_or_switchover` for details. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a failover or switchover. If ``None``, user will be prompted for filling it -- unless *force* is ``True``, in which case an exception is raised by @@ -1381,7 +1372,7 @@ def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], click.echo(click.style( 'Supplying a leader name using this command is deprecated and will be removed in a future version of' ' Patroni, change your scripts to use `switchover` instead.\nExecuting switchover!', fg='red')) - _do_failover_or_switchover(obj, action, cluster_name, group, leader, candidate, force) + _do_failover_or_switchover(action, cluster_name, group, leader, candidate, force) @ctl.command('switchover', help='Switchover to a replica') @@ -1392,9 +1383,8 @@ def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], @click.option('--scheduled', help='Timestamp of a scheduled switchover in unambiguous format (e.g. ISO 8601)', default=None) @option_force -@click.pass_obj -def switchover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - leader: Optional[str], candidate: Optional[str], force: bool, scheduled: Optional[str]) -> None: +def switchover(cluster_name: str, group: Optional[int], leader: Optional[str], + candidate: Optional[str], force: bool, scheduled: Optional[str]) -> None: """Process ``switchover`` command of ``patronictl`` utility. Perform a switchover operation in the cluster. @@ -1402,7 +1392,6 @@ def switchover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. seealso:: Refer to :func:`_do_failover_or_switchover` for details. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a switchover. If ``None``, user will be prompted for filling it -- unless *force* is ``True``, in which case an exception is raised by @@ -1412,7 +1401,7 @@ def switchover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], :param force: perform the switchover without asking for confirmations. :param scheduled: timestamp when the switchover should be scheduled to occur. If ``now`` perform immediately. """ - _do_failover_or_switchover(obj, 'switchover', cluster_name, group, leader, candidate, force, scheduled) + _do_failover_or_switchover('switchover', cluster_name, group, leader, candidate, force, scheduled) def generate_topology(level: int, member: Dict[str, Any], @@ -1514,8 +1503,8 @@ def get_cluster_service_info(cluster: Dict[str, Any]) -> List[str]: return service_info -def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, - extended: bool = False, fmt: str = 'pretty', group: Optional[int] = None) -> None: +def output_members(cluster: Cluster, name: str, extended: bool = False, + fmt: str = 'pretty', group: Optional[int] = None) -> None: """Print information about the Patroni cluster and its members. Information is printed to console through :func:`print_output`, and contains: @@ -1540,7 +1529,6 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, The 3 extended columns are always included if *extended*, even if the member has no value for a given column. If not *extended*, these columns may still be shown if any of the members has any information for them. - :param obj: Patroni configuration. :param cluster: Patroni cluster. :param name: name of the Patroni cluster. :param extended: if extended information (pending restarts, scheduled restarts, node tags) should be printed, if @@ -1558,8 +1546,7 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, clusters = {group or 0: cluster_as_json(cluster)} - is_citus_cluster = obj.get('citus') - if is_citus_cluster: + if is_citus_cluster(): columns.insert(1, 'Group') if group is None: clusters.update({g: cluster_as_json(c) for g, c in cluster.workers.items()}) @@ -1597,10 +1584,12 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, rows.append([member.get(n.lower().replace(' ', '_'), '') for n in columns]) - title = 'Citus cluster' if is_citus_cluster else 'Cluster' - title_details = f' ({initialize})' - if is_citus_cluster: + if is_citus_cluster(): + title = 'Citus cluster' title_details = '' if group is None else f' (group: {group}, {initialize})' + else: + title = 'Cluster' + title_details = f' ({initialize})' title = f' {title}: {name}{title_details} ' print_output(columns, rows, {'Group': 'r', 'Lag in MB': 'r', 'TL': 'r'}, fmt, title) @@ -1611,7 +1600,7 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, for g, c in sorted(clusters.items()): service_info = get_cluster_service_info(c) if service_info: - if is_citus_cluster and group is None: + if is_citus_cluster() and group is None: click.echo('Citus group: {0}'.format(g)) click.echo(' ' + '\n '.join(service_info)) @@ -1624,16 +1613,14 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, @option_format @option_watch @option_watchrefresh -@click.pass_obj -def members(obj: Dict[str, Any], cluster_names: List[str], group: Optional[int], - fmt: str, watch: Optional[int], w: bool, extended: bool, ts: bool) -> None: +def members(cluster_names: List[str], group: Optional[int], fmt: str, + watch: Optional[int], w: bool, extended: bool, ts: bool) -> None: """Process ``list`` command of ``patronictl`` utility. Print information about the Patroni cluster through :func:`output_members`. - :param obj: Patroni configuration. :param cluster_names: name of clusters that should be printed. If ``None`` consider only the cluster present in - ``scope`` key of *obj*. + ``scope`` key of the configuration. :param group: filter which Citus group we should get members from. Refer to the module note for more details. :param fmt: the output table printing format. See :func:`print_output` for available options. :param watch: if given print output every *watch* seconds. @@ -1642,9 +1629,10 @@ def members(obj: Dict[str, Any], cluster_names: List[str], group: Optional[int], more details. :param ts: if timestamp should be included in the output. """ + config = _get_configuration() if not cluster_names: - if 'scope' in obj: - cluster_names = [obj['scope']] + if 'scope' in config: + cluster_names = [config['scope']] if not cluster_names: return logging.warning('Listing members: No cluster names were provided') @@ -1653,10 +1641,10 @@ def members(obj: Dict[str, Any], cluster_names: List[str], group: Optional[int], click.echo(timestamp(0)) for cluster_name in cluster_names: - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - output_members(obj, cluster, cluster_name, extended, fmt, group) + output_members(cluster, cluster_name, extended, fmt, group) @ctl.command('topology', help='Prints ASCII topology for given cluster') @@ -1698,14 +1686,12 @@ def timestamp(precision: int = 6) -> str: @click.argument('target', type=click.Choice(['restart', 'switchover'])) @click.option('--role', '-r', help='Flush only members with this role', type=role_choice, default='any') @option_force -@click.pass_obj -def flush(obj: Dict[str, Any], cluster_name: str, group: Optional[int], +def flush(cluster_name: str, group: Optional[int], member_names: List[str], force: bool, role: str, target: str) -> None: """Process ``flush`` command of ``patronictl`` utility. Discard scheduled restart or switchover events. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should flush an event. Refer to the module note for more details. :param member_names: name of the members which events should be flushed. @@ -1713,11 +1699,11 @@ def flush(obj: Dict[str, Any], cluster_name: str, group: Optional[int], :param role: role to filter members. See :func:`get_all_members` for available options. :param target: the event that should be flushed -- ``restart`` or ``switchover``. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() if target == 'restart': - for member in get_members(obj, cluster, cluster_name, member_names, role, force, 'flush', group=group): + for member in get_members(cluster, cluster_name, member_names, role, force, 'flush', group=group): if member.data.get('scheduled_restart'): r = request_patroni(member, 'delete', 'restart') check_response(r, member.name, 'flush scheduled restart') @@ -1775,10 +1761,9 @@ def wait_until_pause_is_applied(dcs: AbstractDCS, paused: bool, old_cluster: Clu return click.echo('Success: cluster management is {0}'.format(paused and 'paused' or 'resumed')) -def toggle_pause(config: Dict[str, Any], cluster_name: str, group: Optional[int], paused: bool, wait: bool) -> None: +def toggle_pause(cluster_name: str, group: Optional[int], paused: bool, wait: bool) -> None: """Toggle the ``pause`` state in the cluster members. - :param config: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should toggle the pause state of. Refer to the module note for more details. @@ -1790,7 +1775,7 @@ def toggle_pause(config: Dict[str, Any], cluster_name: str, group: Optional[int] * ``pause`` state is already *paused*; or * cluster contains no accessible members. """ - dcs = get_dcs(config, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() if get_global_config(cluster).is_paused == paused: raise PatroniCtlException('Cluster is {0} paused'.format(paused and 'already' or 'not')) @@ -1819,37 +1804,33 @@ def toggle_pause(config: Dict[str, Any], cluster_name: str, group: Optional[int] @ctl.command('pause', help='Disable auto failover') @arg_cluster_name @option_default_citus_group -@click.pass_obj @click.option('--wait', help='Wait until pause is applied on all nodes', is_flag=True) -def pause(obj: Dict[str, Any], cluster_name: str, group: Optional[int], wait: bool) -> None: +def pause(cluster_name: str, group: Optional[int], wait: bool) -> None: """Process ``pause`` command of ``patronictl`` utility. Put the cluster in maintenance mode. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should pause. Refer to the module note for more details. :param wait: ``True`` if it should block until the operation is finished or ``false`` for returning immediately. """ - return toggle_pause(obj, cluster_name, group, True, wait) + return toggle_pause(cluster_name, group, True, wait) @ctl.command('resume', help='Resume auto failover') @arg_cluster_name @option_default_citus_group @click.option('--wait', help='Wait until pause is cleared on all nodes', is_flag=True) -@click.pass_obj -def resume(obj: Dict[str, Any], cluster_name: str, group: Optional[int], wait: bool) -> None: +def resume(cluster_name: str, group: Optional[int], wait: bool) -> None: """Process ``unpause`` command of ``patronictl`` utility. Put the cluster out of maintenance mode. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should unpause. Refer to the module note for more details. :param wait: ``True`` if it should block until the operation is finished or ``false`` for returning immediately. """ - return toggle_pause(obj, cluster_name, group, False, wait) + return toggle_pause(cluster_name, group, False, wait) @contextmanager @@ -2081,15 +2062,12 @@ def invoke_editor(before_editing: str, cluster_name: str) -> Tuple[str, Dict[str @click.option('--replace', 'replace_filename', help='Apply configuration from file, replacing existing configuration.' ' Use - for stdin.') @option_force -@click.pass_obj -def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - force: bool, quiet: bool, kvpairs: List[str], pgkvpairs: List[str], - apply_filename: Optional[str], replace_filename: Optional[str]) -> None: +def edit_config(cluster_name: str, group: Optional[int], force: bool, quiet: bool, kvpairs: List[str], + pgkvpairs: List[str], apply_filename: Optional[str], replace_filename: Optional[str]) -> None: """Process ``edit-config`` command of ``patronictl`` utility. Update or replace Patroni configuration in the DCS. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group configuration we should edit. Refer to the module note for more details. :param force: if ``True`` apply config changes without asking for confirmations. @@ -2106,7 +2084,7 @@ def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], * Configuration is absent from DCS; or * Detected a concurrent modification of the configuration in the DCS. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() if not cluster.config: @@ -2152,17 +2130,15 @@ def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], @ctl.command('show-config', help="Show cluster configuration") @arg_cluster_name @option_default_citus_group -@click.pass_obj -def show_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int]) -> None: +def show_config(cluster_name: str, group: Optional[int]) -> None: """Process ``show-config`` command of ``patronictl`` utility. Show Patroni configuration stored in the DCS. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group configuration we should show. Refer to the module note for more details. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() + cluster = get_dcs(cluster_name, group).get_cluster() if cluster.config: click.echo(format_config_for_editing(cluster.config.data)) @@ -2171,8 +2147,7 @@ def show_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int]) -> @click.argument('cluster_name', required=False) @click.argument('member_names', nargs=-1) @option_citus_group -@click.pass_obj -def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member_names: List[str]) -> None: +def version(cluster_name: str, group: Optional[int], member_names: List[str]) -> None: """Process ``version`` command of ``patronictl`` utility. Show version of: @@ -2180,7 +2155,6 @@ def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member * ``patroni`` on all members of the cluster; * ``PostgreSQL`` on all members of the cluster. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get members from. Refer to the module note for more details. :param member_names: filter which members we should get version information from. @@ -2191,8 +2165,8 @@ def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member return click.echo("") - cluster = get_dcs(obj, cluster_name, group).get_cluster() - for m in get_all_members(obj, cluster, group, 'any'): + cluster = get_dcs(cluster_name, group).get_cluster() + for m in get_all_members(cluster, group, 'any'): if m.api_url: if not member_names or m.name in member_names: try: @@ -2210,8 +2184,7 @@ def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member @arg_cluster_name @option_default_citus_group @option_format -@click.pass_obj -def history(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: str) -> None: +def history(cluster_name: str, group: Optional[int], fmt: str) -> None: """Process ``history`` command of ``patronictl`` utility. Show the history of failover/switchover events in the cluster. @@ -2223,12 +2196,11 @@ def history(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: s * ``Timestamp``: timestamp when the event occurred; * ``New Leader``: the Postgres node that was promoted during the event. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get events from. Refer to the module note for more details. :param fmt: the output table printing format. See :func:`print_output` for available options. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() + cluster = get_dcs(cluster_name, group).get_cluster() cluster_history = cluster.history.lines if cluster.history else [] history: List[List[Any]] = list(map(list, cluster_history)) table_header_row = ['TL', 'LSN', 'Reason', 'Timestamp', 'New Leader'] diff --git a/tests/test_ctl.py b/tests/test_ctl.py index 856918123..bacd2d609 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -1,3 +1,4 @@ +import click import etcd import mock import os @@ -9,7 +10,7 @@ from patroni.ctl import ctl, load_config, output_members, get_dcs, parse_dcs, \ get_all_members, get_any_member, get_cursor, query_member, PatroniCtlException, apply_config_changes, \ format_config_for_editing, show_diff, invoke_editor, format_pg_version, CONFIG_FILE_PATH, PatronictlPrettyTable -from patroni.dcs.etcd import AbstractEtcdClientWithFailover, Cluster, Failover +from patroni.dcs import Cluster, Failover from patroni.psycopg import OperationalError from patroni.utils import tzutc from prettytable import PrettyTable, ALL @@ -21,26 +22,26 @@ get_cluster_initialized_with_only_leader, get_cluster_not_initialized_without_leader, get_cluster, Member -DEFAULT_CONFIG = { - 'scope': 'alpha', - 'restapi': {'listen': '::', 'certfile': 'a'}, - 'ctl': {'certfile': 'a'}, - 'etcd': {'host': 'localhost:2379'}, - 'citus': {'database': 'citus', 'group': 0}, - 'postgresql': {'data_dir': '.', 'pgpass': './pgpass', 'parameters': {}, 'retry_timeout': 5} -} +def get_default_config(*args): + return { + 'scope': 'alpha', + 'restapi': {'listen': '::', 'certfile': 'a'}, + 'ctl': {'certfile': 'a'}, + 'etcd': {'host': 'localhost:2379', 'retry_timeout': 10, 'ttl': 30}, + 'citus': {'database': 'citus', 'group': 0}, + 'postgresql': {'data_dir': '.', 'pgpass': './pgpass', 'parameters': {}, 'retry_timeout': 5} + } -@patch('patroni.ctl.load_config', Mock(return_value=DEFAULT_CONFIG)) +@patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) +@patch('patroni.ctl.load_config', get_default_config) +@patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) class TestCtl(unittest.TestCase): TEST_ROLES = ('master', 'primary', 'leader') @patch('socket.getaddrinfo', socket_getaddrinfo) - @patch.object(AbstractEtcdClientWithFailover, '_get_machines_list', Mock(return_value=['http://remotehost:2379'])) def setUp(self): self.runner = CliRunner() - self.e = get_dcs({'etcd': {'ttl': 30, 'host': 'ok:2379', 'retry_timeout': 10}, - 'citus': {'group': 0}}, 'foo', None) @patch('patroni.ctl.logging.debug') def test_load_config(self, mock_logger_debug): @@ -66,29 +67,31 @@ def test_load_config(self, mock_logger_debug): @patch('patroni.psycopg.connect', psycopg_connect) def test_get_cursor(self): - for role in self.TEST_ROLES: - self.assertIsNone(get_cursor({}, get_cluster_initialized_without_leader(), None, {}, role=role)) - self.assertIsNotNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {}, role=role)) + with click.Context(click.Command('query')) as ctx: + ctx.obj = {'__config': {}} + for role in self.TEST_ROLES: + self.assertIsNone(get_cursor(get_cluster_initialized_without_leader(), None, {}, role=role)) + self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {}, role=role)) - # MockCursor returns pg_is_in_recovery as false - self.assertIsNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {}, role='replica')) + # MockCursor returns pg_is_in_recovery as false + self.assertIsNone(get_cursor(get_cluster_initialized_with_leader(), None, {}, role='replica')) - self.assertIsNotNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, role='any')) + self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, role='any')) - # Mutually exclusive options - with self.assertRaises(PatroniCtlException) as e: - get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, member_name='other', - role='replica') + # Mutually exclusive options + with self.assertRaises(PatroniCtlException) as e: + get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, member_name='other', + role='replica') - self.assertEqual(str(e.exception), '--role and --member are mutually exclusive options') + self.assertEqual(str(e.exception), '--role and --member are mutually exclusive options') - # Invalid member provided - self.assertIsNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, - member_name='invalid')) + # Invalid member provided + self.assertIsNone(get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, + member_name='invalid')) - # Valid member provided - self.assertIsNotNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, - member_name='other')) + # Valid member provided + self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, + member_name='other')) def test_parse_dcs(self): assert parse_dcs(None) is None @@ -102,23 +105,20 @@ def test_parse_dcs(self): self.assertRaises(PatroniCtlException, parse_dcs, 'invalid://test') def test_output_members(self): - scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) - cluster = get_cluster_initialized_with_leader(Failover(1, 'foo', 'bar', scheduled_at)) - del cluster.members[1].data['conn_url'] - for fmt in ('pretty', 'json', 'yaml', 'topology'): - self.assertIsNone(output_members({}, cluster, name='abc', fmt=fmt)) - - with patch('click.echo') as mock_echo: - self.assertIsNone(output_members({}, cluster, name='abc', fmt='tsv')) - self.assertEqual(mock_echo.call_args[0][0], 'abc\tother\t\tReplica\trunning\t\tunknown') - - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - def test_switchover(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - mock_get_dcs.return_value.set_failover_value = Mock() - + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {}} + scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) + cluster = get_cluster_initialized_with_leader(Failover(1, 'foo', 'bar', scheduled_at)) + del cluster.members[1].data['conn_url'] + for fmt in ('pretty', 'json', 'yaml', 'topology'): + self.assertIsNone(output_members(cluster, name='abc', fmt=fmt)) + + with patch('click.echo') as mock_echo: + self.assertIsNone(output_members(cluster, name='abc', fmt='tsv')) + self.assertEqual(mock_echo.call_args[0][0], 'abc\tother\t\tReplica\trunning\t\tunknown') + + @patch('patroni.dcs.AbstractDCS.set_failover_value', Mock()) + def test_switchover(self): # Confirm result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') self.assertEqual(result.exit_code, 0) @@ -180,12 +180,12 @@ def test_switchover(self, mock_get_dcs): self.assertIn('Member dummy is not the leader of cluster dummy', result.output) # Errors while sending Patroni REST API request - with patch.object(PoolManager, 'request', Mock(side_effect=Exception)): + with patch('patroni.ctl.request_patroni', Mock(side_effect=Exception)): result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n2300-01-01T12:23:00\ny') self.assertIn('falling back to DCS', result.output) - with patch.object(PoolManager, 'request') as mock_api_request: + with patch('patroni.ctl.request_patroni') as mock_api_request: mock_api_request.return_value.status = 500 result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') self.assertIn('Switchover failed', result.output) @@ -196,64 +196,58 @@ def test_switchover(self, mock_get_dcs): self.assertIn('Switchover failed', result.output) # No members available - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_only_leader - result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') - self.assertEqual(result.exit_code, 1) - self.assertIn('No candidates found to switchover to', result.output) + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(return_value=get_cluster_initialized_with_only_leader())): + result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') + self.assertEqual(result.exit_code, 1) + self.assertIn('No candidates found to switchover to', result.output) # No leader available - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_without_leader - result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') - self.assertEqual(result.exit_code, 1) - self.assertIn('This cluster has no leader', result.output) + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_without_leader())): + result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') + self.assertEqual(result.exit_code, 1) + self.assertIn('This cluster has no leader', result.output) # Citus cluster, no group number specified result = self.runner.invoke(ctl, ['switchover', 'dummy', '--force'], input='\n') self.assertEqual(result.exit_code, 1) self.assertIn('For Citus clusters the --group must me specified', result.output) - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - @patch('patroni.ctl.request_patroni', Mock(return_value=MockResponse())) - def test_failover(self, mock_get_dcs): - mock_get_dcs.return_value.set_failover_value = Mock() - + @patch('patroni.dcs.AbstractDCS.set_failover_value', Mock()) + def test_failover(self): # No candidate specified - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader result = self.runner.invoke(ctl, ['failover', 'dummy'], input='0\n') self.assertIn('Failover could be performed only to a specific candidate', result.output) - cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) - # Temp test to check a fallback to switchover if leader is specified with patch('patroni.ctl._do_failover_or_switchover') as failover_func_mock: result = self.runner.invoke(ctl, ['failover', '--leader', 'leader', 'dummy'], input='0\n') self.assertIn('Supplying a leader name using this command is deprecated', result.output) - failover_func_mock.assert_called_once_with( - DEFAULT_CONFIG, 'switchover', 'dummy', None, 'leader', None, False) + failover_func_mock.assert_called_once_with('switchover', 'dummy', None, 'leader', None, False) # Failover to an async member in sync mode (confirm) + cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) cluster.members.append(Member(0, 'async', 28, {'api_url': 'http://127.0.0.1:8012/patroni'})) cluster.config.data['synchronous_mode'] = True - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='y\ny') - self.assertIn('Are you sure you want to failover to the asynchronous node async', result.output) + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=cluster)): + result = self.runner.invoke(ctl, + ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='y\ny') + self.assertIn('Are you sure you want to failover to the asynchronous node async', result.output) # Failover to an async member in sync mode (abort) - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='N') self.assertEqual(result.exit_code, 1) @patch('patroni.dcs.dcs_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) def test_get_dcs(self): - self.assertRaises(PatroniCtlException, get_dcs, {'dummy': {}}, 'dummy', 0) + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {'dummy': {}}} + self.assertRaises(PatroniCtlException, get_dcs, 'dummy', 0) @patch('patroni.psycopg.connect', psycopg_connect) @patch('patroni.ctl.query_member', Mock(return_value=([['mock column']], None))) - @patch('patroni.ctl.get_dcs') @patch.object(etcd.Client, 'read', etcd_read) - def test_query(self, mock_get_dcs): - mock_get_dcs.return_value = self.e + def test_query(self): # Mutually exclusive for role in self.TEST_ROLES: result = self.runner.invoke(ctl, ['query', 'alpha', '--member', 'abc', '--role', role]) @@ -286,31 +280,29 @@ def test_query(self, mock_get_dcs): def test_query_member(self): with patch('patroni.ctl.get_cursor', Mock(return_value=MockConnect().cursor())): for role in self.TEST_ROLES: - rows = query_member({}, None, None, None, None, role, 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, role, 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('False' in str(rows)) with patch.object(MockCursor, 'execute', Mock(side_effect=OperationalError('bla'))): - rows = query_member({}, None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) with patch('patroni.ctl.get_cursor', Mock(return_value=None)): # No role nor member given -- generic message - rows = query_member({}, None, None, None, None, None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('No connection is available' in str(rows)) # Member given -- message pointing to member - rows = query_member({}, None, None, None, 'foo', None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, 'foo', None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('No connection to member foo' in str(rows)) # Role given -- message pointing to role - rows = query_member({}, None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('No connection to role replica' in str(rows)) with patch('patroni.ctl.get_cursor', Mock(side_effect=OperationalError('bla'))): - rows = query_member({}, None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) - @patch('patroni.ctl.get_dcs') - def test_dsn(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_dsn(self): result = self.runner.invoke(ctl, ['dsn', 'alpha']) assert 'host=127.0.0.1 port=5435' in result.output @@ -323,11 +315,8 @@ def test_dsn(self, mock_get_dcs): result = self.runner.invoke(ctl, ['dsn', 'alpha', '--member', 'dummy']) assert result.exit_code == 1 - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') - def test_reload(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - + @patch('patroni.ctl.request_patroni') + def test_reload(self, mock_post): result = self.runner.invoke(ctl, ['reload', 'alpha'], input='y') assert 'Failed: reload for member' in result.output @@ -339,10 +328,8 @@ def test_reload(self, mock_get_dcs, mock_post): result = self.runner.invoke(ctl, ['reload', 'alpha'], input='y') assert 'Reload request received for member' in result.output - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') - def test_restart_reinit(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + @patch('patroni.ctl.request_patroni') + def test_restart_reinit(self, mock_post): mock_post.return_value.status = 503 result = self.runner.invoke(ctl, ['restart', 'alpha'], input='now\ny\n') assert 'Failed: restart for' in result.output @@ -417,12 +404,10 @@ def test_restart_reinit(self, mock_get_dcs, mock_post): assert 'Failed: another restart is already' in result.output assert result.exit_code == 0 - @patch('patroni.ctl.get_dcs') - def test_remove(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_remove(self): result = self.runner.invoke(ctl, ['remove', 'dummy'], input='\n') assert 'For Citus clusters the --group must me specified' in result.output - result = self.runner.invoke(ctl, ['-k', 'remove', 'alpha', '--group', '0'], input='alpha\nstandby') + result = self.runner.invoke(ctl, ['remove', 'alpha', '--group', '0'], input='alpha\nstandby') assert 'Please confirm' in result.output assert 'You are about to remove all' in result.output # Not typing an exact confirmation @@ -440,37 +425,36 @@ def test_remove(self, mock_get_dcs): assert result.exit_code == 0 def test_ctl(self): - self.runner.invoke(ctl, ['list']) - result = self.runner.invoke(ctl, ['--help']) assert 'Usage:' in result.output def test_get_any_member(self): - for role in self.TEST_ROLES: - self.assertIsNone(get_any_member({}, get_cluster_initialized_without_leader(), None, role=role)) + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {}} + for role in self.TEST_ROLES: + self.assertIsNone(get_any_member(get_cluster_initialized_without_leader(), None, role=role)) - m = get_any_member({}, get_cluster_initialized_with_leader(), None, role=role) - self.assertEqual(m.name, 'leader') + m = get_any_member(get_cluster_initialized_with_leader(), None, role=role) + self.assertEqual(m.name, 'leader') def test_get_all_members(self): - for role in self.TEST_ROLES: - self.assertEqual(list(get_all_members({}, get_cluster_initialized_without_leader(), None, role=role)), []) - - r = list(get_all_members({}, get_cluster_initialized_with_leader(), None, role=role)) - self.assertEqual(len(r), 1) - self.assertEqual(r[0].name, 'leader') + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {}} + for role in self.TEST_ROLES: + self.assertEqual(list(get_all_members(get_cluster_initialized_without_leader(), None, role=role)), []) - r = list(get_all_members({}, get_cluster_initialized_with_leader(), None, role='replica')) - self.assertEqual(len(r), 1) - self.assertEqual(r[0].name, 'other') + r = list(get_all_members(get_cluster_initialized_with_leader(), None, role=role)) + self.assertEqual(len(r), 1) + self.assertEqual(r[0].name, 'leader') - self.assertEqual(len(list(get_all_members({}, get_cluster_initialized_without_leader(), - None, role='replica'))), 2) + r = list(get_all_members(get_cluster_initialized_with_leader(), None, role='replica')) + self.assertEqual(len(r), 1) + self.assertEqual(r[0].name, 'other') - @patch('patroni.ctl.get_dcs') - def test_members(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + self.assertEqual(len(list(get_all_members(get_cluster_initialized_without_leader(), + None, role='replica'))), 2) + def test_members(self): result = self.runner.invoke(ctl, ['list']) assert '127.0.0.1' in result.output assert result.exit_code == 0 @@ -479,121 +463,94 @@ def test_members(self, mock_get_dcs): result = self.runner.invoke(ctl, ['list', '--group', '0']) assert 'Citus cluster: alpha (group: 0, 12345678901) -' in result.output - with patch('patroni.ctl.load_config', Mock(return_value={'scope': 'alpha'})): + config = get_default_config() + del config['citus'] + with patch('patroni.ctl.load_config', Mock(return_value=config)): result = self.runner.invoke(ctl, ['list']) assert 'Cluster: alpha (12345678901) -' in result.output with patch('patroni.ctl.load_config', Mock(return_value={})): self.runner.invoke(ctl, ['list']) - @patch('patroni.ctl.get_dcs') - def test_list_extended(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - + def test_list_extended(self): result = self.runner.invoke(ctl, ['list', 'dummy', '--extended', '--timestamp']) assert '2100' in result.output assert 'Scheduled restart' in result.output - @patch('patroni.ctl.get_dcs') - def test_topology(self, mock_get_dcs): - mock_get_dcs.return_value = self.e + def test_topology(self): cluster = get_cluster_initialized_with_leader() - cascade_member = Member(0, 'cascade', 28, {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5437/postgres', - 'api_url': 'http://127.0.0.1:8012/patroni', - 'state': 'running', - 'tags': {'replicatefrom': 'other'}, - }) - cascade_member_wrong_tags = Member(0, 'wrong_cascade', 28, - {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5438/postgres', - 'api_url': 'http://127.0.0.1:8013/patroni', - 'state': 'running', - 'tags': {'replicatefrom': 'nonexistinghost'}, - }) - cluster.members.append(cascade_member) - cluster.members.append(cascade_member_wrong_tags) - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['topology', 'dummy']) - assert '+\n| 0 | leader | 127.0.0.1:5435 | Leader |' in result.output - assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output - assert '|\n| 0 | + cascade | 127.0.0.1:5437 | Replica |' in result.output - assert '|\n| 0 | + wrong_cascade | 127.0.0.1:5438 | Replica |' in result.output - - cluster = get_cluster_initialized_without_leader() - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['topology', 'dummy']) - assert '+\n| 0 | + leader | 127.0.0.1:5435 | Replica |' in result.output - assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output - - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - def test_flush_restart(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - + cluster.members.append(Member(0, 'cascade', 28, + {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5437/postgres', + 'api_url': 'http://127.0.0.1:8012/patroni', 'state': 'running', + 'tags': {'replicatefrom': 'other'}})) + cluster.members.append(Member(0, 'wrong_cascade', 28, + {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5438/postgres', + 'api_url': 'http://127.0.0.1:8013/patroni', 'state': 'running', + 'tags': {'replicatefrom': 'nonexistinghost'}})) + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=cluster)): + result = self.runner.invoke(ctl, ['topology', 'dummy']) + assert '+\n| 0 | leader | 127.0.0.1:5435 | Leader |' in result.output + assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output + assert '|\n| 0 | + cascade | 127.0.0.1:5437 | Replica |' in result.output + assert '|\n| 0 | + wrong_cascade | 127.0.0.1:5438 | Replica |' in result.output + + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_without_leader())): + result = self.runner.invoke(ctl, ['topology', 'dummy']) + assert '+\n| 0 | + leader | 127.0.0.1:5435 | Replica |' in result.output + assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output + + @patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) + def test_flush_restart(self): for role in self.TEST_ROLES: - result = self.runner.invoke(ctl, ['-k', 'flush', 'dummy', 'restart', '-r', role], input='y') + result = self.runner.invoke(ctl, ['flush', 'dummy', 'restart', '-r', role], input='y') assert 'No scheduled restart' in result.output result = self.runner.invoke(ctl, ['flush', 'dummy', 'restart', '--force']) assert 'Success: flush scheduled restart' in result.output - with patch.object(PoolManager, 'request', return_value=MockResponse(404)): + with patch('patroni.ctl.request_patroni', Mock(return_value=MockResponse(404))): result = self.runner.invoke(ctl, ['flush', 'dummy', 'restart', '--force']) assert 'Failed: flush scheduled restart' in result.output - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - def test_flush_switchover(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) - assert 'No pending scheduled switchover' in result.output + def test_flush_switchover(self): + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())): + result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) + assert 'No pending scheduled switchover' in result.output scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) - mock_get_dcs.return_value.get_cluster = Mock( - return_value=get_cluster_initialized_with_leader(Failover(1, 'a', 'b', scheduled_at))) - result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) - assert result.output.startswith('Success: ') + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(return_value=get_cluster_initialized_with_leader(Failover(1, 'a', 'b', scheduled_at)))): + result = self.runner.invoke(ctl, ['-k', 'flush', 'dummy', 'switchover']) + assert result.output.startswith('Success: ') - mock_get_dcs.return_value.manual_failover = Mock() - with patch.object(PoolManager, 'request', side_effect=[MockResponse(409), Exception]): - result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) - assert 'Could not find any accessible member of cluster' in result.output + with patch('patroni.ctl.request_patroni', side_effect=[MockResponse(409), Exception]), \ + patch('patroni.dcs.AbstractDCS.manual_failover', Mock()): + result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) + assert 'Could not find any accessible member of cluster' in result.output - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') @patch('patroni.ctl.polling_loop', Mock(return_value=[1])) - def test_pause_cluster(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - - mock_post.return_value.status = 500 - result = self.runner.invoke(ctl, ['pause', 'dummy']) - assert 'Failed' in result.output + def test_pause_cluster(self): + with patch('patroni.ctl.request_patroni', Mock(return_value=MockResponse(500))): + result = self.runner.invoke(ctl, ['pause', 'dummy']) + assert 'Failed' in result.output - mock_post.return_value.status = 200 with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['pause', 'dummy']) assert 'Cluster is already paused' in result.output result = self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) assert "'pause' request sent" in result.output - mock_get_dcs.return_value.get_cluster = Mock(side_effect=[get_cluster_initialized_with_leader(), - get_cluster(None, None, [], None, None)]) - self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) - member = Member(1, 'other', 28, {}) - mock_get_dcs.return_value.get_cluster = Mock(side_effect=[get_cluster_initialized_with_leader(), - get_cluster(None, None, [member], None, None)]) - self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) - - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') - def test_resume_cluster(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(side_effect=[get_cluster_initialized_with_leader(), get_cluster(None, None, [], None, None)])): + self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(side_effect=[get_cluster_initialized_with_leader(), + get_cluster(None, None, [Member(1, 'other', 28, {})], None, None)])): + self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) + + @patch('patroni.ctl.request_patroni') + @patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) + def test_resume_cluster(self, mock_post): mock_post.return_value.status = 200 with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=False)): result = self.runner.invoke(ctl, ['resume', 'dummy']) @@ -701,67 +658,53 @@ def test_invoke_editor(self, mock_subprocess_call): with patch('shutil.which', Mock(return_value=e)): self.assertRaises(PatroniCtlException, invoke_editor, 'foo: bar\n', 'test') - @patch('patroni.ctl.get_dcs') - def test_show_config(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_show_config(self): self.runner.invoke(ctl, ['show-config', 'dummy']) - @patch('patroni.ctl.get_dcs') @patch('subprocess.call', Mock(return_value=0)) - def test_edit_config(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - mock_get_dcs.return_value.set_config_value = Mock(return_value=False) + def test_edit_config(self): os.environ['EDITOR'] = 'true' self.runner.invoke(ctl, ['edit-config', 'dummy']) self.runner.invoke(ctl, ['edit-config', 'dummy', '-s', 'foo=bar']) self.runner.invoke(ctl, ['edit-config', 'dummy', '--replace', 'postgres0.yml']) self.runner.invoke(ctl, ['edit-config', 'dummy', '--apply', '-'], input='foo: bar') self.runner.invoke(ctl, ['edit-config', 'dummy', '--force', '--apply', '-'], input='foo: bar') - mock_get_dcs.return_value.set_config_value.return_value = True - self.runner.invoke(ctl, ['edit-config', 'dummy', '--force', '--apply', '-'], input='foo: bar') - mock_get_dcs.return_value.get_cluster = Mock(return_value=Cluster.empty()) - result = self.runner.invoke(ctl, ['edit-config', 'dummy']) - assert result.exit_code == 1 - assert 'The config key does not exist in the cluster dummy' in result.output - - @patch('patroni.ctl.get_dcs') - def test_version(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - with patch.object(PoolManager, 'request') as mocked: - result = self.runner.invoke(ctl, ['version']) - assert 'patronictl version' in result.output - mocked.return_value.data = b'{"patroni":{"version":"1.2.3"},"server_version": 100001}' - result = self.runner.invoke(ctl, ['version', 'dummy']) - assert '1.2.3' in result.output - with patch.object(PoolManager, 'request', Mock(side_effect=Exception)): - result = self.runner.invoke(ctl, ['version', 'dummy']) - assert 'failed to get version' in result.output - - @patch('patroni.ctl.get_dcs') - def test_history(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = Mock() - mock_get_dcs.return_value.get_cluster.return_value.history.lines = [[1, 67176, 'no recovery target specified']] - result = self.runner.invoke(ctl, ['history']) - assert 'Reason' in result.output + with patch('patroni.dcs.etcd.Etcd.set_config_value', Mock(return_value=True)): + self.runner.invoke(ctl, ['edit-config', 'dummy', '--force', '--apply', '-'], input='foo: bar') + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=Cluster.empty())): + result = self.runner.invoke(ctl, ['edit-config', 'dummy']) + assert result.exit_code == 1 + assert 'The config key does not exist in the cluster dummy' in result.output + + @patch('patroni.ctl.request_patroni') + def test_version(self, mock_request): + result = self.runner.invoke(ctl, ['version']) + assert 'patronictl version' in result.output + mock_request.return_value.data = b'{"patroni":{"version":"1.2.3"},"server_version": 100001}' + result = self.runner.invoke(ctl, ['version', 'dummy']) + assert '1.2.3' in result.output + mock_request.side_effect = Exception + result = self.runner.invoke(ctl, ['version', 'dummy']) + assert 'failed to get version' in result.output + + def test_history(self): + with patch('patroni.dcs.AbstractDCS.get_cluster') as mock_get_cluster: + mock_get_cluster.return_value.history.lines = [[1, 67176, 'no recovery target specified']] + result = self.runner.invoke(ctl, ['history']) + assert 'Reason' in result.output def test_format_pg_version(self): self.assertEqual(format_pg_version(100001), '10.1') self.assertEqual(format_pg_version(90605), '9.6.5') - @patch('patroni.ctl.get_dcs') - def test_get_members(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_not_initialized_without_leader - result = self.runner.invoke(ctl, ['reinit', 'dummy']) - assert "cluster doesn\'t have any members" in result.output + def test_get_members(self): + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(return_value=get_cluster_not_initialized_without_leader())): + result = self.runner.invoke(ctl, ['reinit', 'dummy']) + assert "cluster doesn\'t have any members" in result.output @patch('time.sleep', Mock()) - @patch('patroni.ctl.get_dcs') - def test_reinit_wait(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_reinit_wait(self): with patch.object(PoolManager, 'request') as mocked: mocked.side_effect = [Mock(data=s, status=200) for s in [b"reinitialize", b'{"state":"creating replica"}', b'{"state":"running"}']] From 5dab73553457d486474dd8ab53cf27fabdad6d7a Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 15 Nov 2023 11:25:46 +0100 Subject: [PATCH 06/33] Compatibility with antient mock (#2951) Just in case is someone still uses ubuntu 18.04 --- tests/test_barman_recover.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/test_barman_recover.py b/tests/test_barman_recover.py index 4334d6ae9..c0efda838 100644 --- a/tests/test_barman_recover.py +++ b/tests/test_barman_recover.py @@ -1,5 +1,6 @@ import logging -from mock import MagicMock, Mock, call, patch +import mock +from mock import MagicMock, Mock, patch import unittest from urllib3.exceptions import MaxRetryError @@ -156,18 +157,19 @@ def test__create_recovery_operation(self, mock_post_request, mock_sleep, mock_lo "Maximum number of retries exceeded for method BarmanRecover._create_recovery_operation.") self.assertEqual(mock_sleep.call_count, self.br.max_retries) - mock_sleep.assert_has_calls([call(self.br.retry_wait)] * self.br.max_retries) + + mock_sleep.assert_has_calls([mock.call(self.br.retry_wait)] * self.br.max_retries) self.assertEqual(mock_logging.call_count, self.br.max_retries) for i in range(mock_logging.call_count): - call_args = mock_logging.mock_calls[i].args + call_args = mock_logging.call_args_list[i][0] self.assertEqual(len(call_args), 5) self.assertEqual(call_args[0], "Attempt %d of %d on method %s failed with %r.") self.assertEqual(call_args[1], i + 1) self.assertEqual(call_args[2], self.br.max_retries) self.assertEqual(call_args[3], "BarmanRecover._create_recovery_operation") self.assertIsInstance(call_args[4], KeyError) - self.assertEqual(repr(call_args[4]), "KeyError('operation_id')") + self.assertEqual(call_args[4].args, ('operation_id',)) @patch("logging.warning") @patch("time.sleep") @@ -190,18 +192,18 @@ def test__get_recovery_operation_status(self, mock_get_request, mock_sleep, mock "Maximum number of retries exceeded for method BarmanRecover._get_recovery_operation_status.") self.assertEqual(mock_sleep.call_count, self.br.max_retries) - mock_sleep.assert_has_calls([call(self.br.retry_wait)] * self.br.max_retries) + mock_sleep.assert_has_calls([mock.call(self.br.retry_wait)] * self.br.max_retries) self.assertEqual(mock_logging.call_count, self.br.max_retries) for i in range(mock_logging.call_count): - call_args = mock_logging.mock_calls[i].args + call_args = mock_logging.call_args_list[i][0] self.assertEqual(len(call_args), 5) self.assertEqual(call_args[0], "Attempt %d of %d on method %s failed with %r.") self.assertEqual(call_args[1], i + 1) self.assertEqual(call_args[2], self.br.max_retries) self.assertEqual(call_args[3], "BarmanRecover._get_recovery_operation_status") self.assertIsInstance(call_args[4], KeyError) - self.assertEqual(repr(call_args[4]), "KeyError('status')") + self.assertEqual(call_args[4].args, ('status',)) @patch.object(BarmanRecover, "_get_recovery_operation_status") @patch("time.sleep") @@ -232,16 +234,16 @@ def test_restore_backup(self, mock_create_op, mock_log_critical, mock_log_info, mock_create_op.assert_called_once() self.assertEqual(mock_get_status.call_count, 21) - mock_get_status.assert_has_calls([call("some_id")] * 21) + mock_get_status.assert_has_calls([mock.call("some_id")] * 21) self.assertEqual(mock_log_info.call_count, 21) - mock_log_info.assert_has_calls([call("Created the recovery operation with ID %s", "some_id")] - + [call("Recovery operation %s is still in progress", "some_id")] * 20) + mock_log_info.assert_has_calls([mock.call("Created the recovery operation with ID %s", "some_id")] + + [mock.call("Recovery operation %s is still in progress", "some_id")] * 20) mock_log_critical.assert_not_called() self.assertEqual(mock_sleep.call_count, 20) - mock_sleep.assert_has_calls([call(LOOP_WAIT)] * 20) + mock_sleep.assert_has_calls([mock.call(LOOP_WAIT)] * 20) # failed fast restore mock_create_op.reset_mock() @@ -271,16 +273,16 @@ def test_restore_backup(self, mock_create_op, mock_log_critical, mock_log_info, mock_create_op.assert_called_once() self.assertEqual(mock_get_status.call_count, 21) - mock_get_status.assert_has_calls([call("some_id")] * 21) + mock_get_status.assert_has_calls([mock.call("some_id")] * 21) self.assertEqual(mock_log_info.call_count, 21) - mock_log_info.assert_has_calls([call("Created the recovery operation with ID %s", "some_id")] - + [call("Recovery operation %s is still in progress", "some_id")] * 20) + mock_log_info.assert_has_calls([mock.call("Created the recovery operation with ID %s", "some_id")] + + [mock.call("Recovery operation %s is still in progress", "some_id")] * 20) mock_log_critical.assert_not_called() self.assertEqual(mock_sleep.call_count, 20) - mock_sleep.assert_has_calls([call(LOOP_WAIT)] * 20) + mock_sleep.assert_has_calls([mock.call(LOOP_WAIT)] * 20) # create retries exceeded mock_log_info.reset_mock() From 70b0991e6a7153a67375ca52c67696739a107506 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Mon, 20 Nov 2023 10:22:52 +0100 Subject: [PATCH 07/33] Bump pyright to 1.1.336 (#2952) and fix newly reported issues --- .github/workflows/tests.yaml | 2 +- patroni/validator.py | 30 +++++++++++++++++++----------- typings/urllib3/_collections.pyi | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 81119f258..df911e542 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -174,7 +174,7 @@ jobs: - uses: jakebailey/pyright-action@v1 with: - version: 1.1.333 + version: 1.1.336 docs: runs-on: ubuntu-latest diff --git a/patroni/validator.py b/patroni/validator.py index 016f4b2e7..d0b168be6 100644 --- a/patroni/validator.py +++ b/patroni/validator.py @@ -9,7 +9,7 @@ import shutil import socket -from typing import Any, Dict, Union, Iterator, List, Optional as OptionalType, Tuple +from typing import Any, Dict, Union, Iterator, List, Optional as OptionalType, Tuple, TYPE_CHECKING from .collections import CaseInsensitiveSet @@ -200,6 +200,8 @@ def get_bin_name(bin_name: str) -> str: :returns: value of ``postgresql.bin_name[*bin_name*]``, if present, otherwise *bin_name*. """ + if TYPE_CHECKING: # pragma: no cover + assert isinstance(schema.data, dict) return (schema.data.get('postgresql', {}).get('bin_name', {}) or {}).get(bin_name, bin_name) @@ -239,6 +241,8 @@ def validate_data_dir(data_dir: str) -> bool: if not os.path.isdir(os.path.join(data_dir, waldir)): raise ConfigParseError("data dir for the cluster is not empty, but doesn't contain" " \"{}\" directory".format(waldir)) + if TYPE_CHECKING: # pragma: no cover + assert isinstance(schema.data, dict) bin_dir = schema.data.get("postgresql", {}).get("bin_dir", None) major_version = get_major_version(bin_dir, get_bin_name('postgres')) if pgversion != major_version: @@ -274,6 +278,8 @@ def validate_binary_name(bin_name: str) -> bool: """ if not bin_name: raise ConfigParseError("is an empty string") + if TYPE_CHECKING: # pragma: no cover + assert isinstance(schema.data, dict) bin_dir = schema.data.get('postgresql', {}).get('bin_dir', None) if not shutil.which(bin_name, path=bin_dir): raise ConfigParseError(f"does not contain '{bin_name}' in '{bin_dir or '$PATH'}'") @@ -523,7 +529,7 @@ class Schema(object): * :class:`dict`: dictionary representing the YAML configuration tree. """ - def __init__(self, validator: Any) -> None: + def __init__(self, validator: Union[Dict[Any, Any], List[Any], Any]) -> None: """Create a :class:`Schema` object. .. note:: @@ -614,7 +620,7 @@ def __call__(self, data: Any) -> List[str]: errors.append(str(i)) return errors - def validate(self, data: Any) -> Iterator[Result]: + def validate(self, data: Union[Dict[Any, Any], Any]) -> Iterator[Result]: """Perform all validations from the schema against the given configuration. It first checks that *data* argument type is compliant with the type of ``validator`` attribute. @@ -638,11 +644,8 @@ def validate(self, data: Any) -> Iterator[Result]: # iterable objects in the structure, until we eventually reach a leaf node to validate its value. if isinstance(self.validator, str): yield Result(isinstance(self.data, str), "is not a string", level=1, data=self.data) - elif issubclass(type(self.validator), type): - validator = self.validator - if self.validator == str: - validator = str - yield Result(isinstance(self.data, validator), + elif isinstance(self.validator, type): + yield Result(isinstance(self.data, self.validator), "is not {}".format(_get_type_name(self.validator)), level=1, data=self.data) elif callable(self.validator): if hasattr(self.validator, "expected_type"): @@ -689,7 +692,7 @@ def iter(self) -> Iterator[Result]: for v in Schema(self.validator[0]).validate(value): yield Result(v.status, v.error, path=(str(key) + ("." + v.path if v.path else "")), level=v.level, data=value) - elif isinstance(self.validator, Directory): + elif isinstance(self.validator, Directory) and isinstance(self.data, str): yield from self.validator.validate(self.data) elif isinstance(self.validator, Or): yield from self.iter_or() @@ -701,6 +704,9 @@ def iter_dict(self) -> Iterator[Result]: """ # One key in `validator` attribute (`key` variable) can be mapped to one or more keys in `data` attribute (`d` # variable), depending on the `key` type. + if TYPE_CHECKING: # pragma: no cover + assert isinstance(self.validator, dict) + assert isinstance(self.data, dict) for key in self.validator.keys(): if isinstance(key, AtMostOne) and len(list(self._data_key(key))) > 1: yield Result(False, f"Multiple of {key.args} provided") @@ -730,6 +736,8 @@ def iter_or(self) -> Iterator[Result]: :yields: objects with the error message related to the failure, if any check fails. """ + if TYPE_CHECKING: # pragma: no cover + assert isinstance(self.validator, Or) results: List[Result] = [] for a in self.validator.args: r: List[Result] = [] @@ -766,7 +774,7 @@ def _data_key(self, key: Union[str, Optional, Or, AtMostOne]) -> Iterator[str]: yield key.name # If the key was defined as an `Or` object in `validator` attribute, then each of its values are the keys to # access the `data` dictionary. - elif isinstance(key, Or): + elif isinstance(key, Or) and isinstance(self.data, dict): # At least one of the `Or` entries should be available in the `data` dictionary. If we find at least one of # them in `data`, then we return all found entries so the caller method can validate them all. if any([item in self.data for item in key.args]): @@ -780,7 +788,7 @@ def _data_key(self, key: Union[str, Optional, Or, AtMostOne]) -> Iterator[str]: yield item # If the key was defined as a `AtMostOne` object in `validator` attribute, then each of its values # are the keys to access the `data` dictionary. - elif isinstance(key, AtMostOne): + elif isinstance(key, AtMostOne) and isinstance(self.data, dict): # Yield back all of the entries from the `data` dictionary, each will be validated and then counted # to inform us if we've provided too many for item in key.args: diff --git a/typings/urllib3/_collections.pyi b/typings/urllib3/_collections.pyi index b7a8cebe7..52b9f577e 100644 --- a/typings/urllib3/_collections.pyi +++ b/typings/urllib3/_collections.pyi @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, MutableMapping class HTTPHeaderDict(MutableMapping[str, str]): def __init__(self, headers=None, **kwargs) -> None: ... def __setitem__(self, key, val) -> None: ... From ac6f6ae1c2075b5f9e47b2be5bf9c2c4a9f46afd Mon Sep 17 00:00:00 2001 From: Ali Mehraji Date: Wed, 22 Nov 2023 11:25:51 +0330 Subject: [PATCH 08/33] Add ETCDCTL_API=3 env to Dockerfiles and update docker/README.md (#2946) --- Dockerfile | 1 + Dockerfile.citus | 1 + docker-compose-citus.yml | 2 - docker/README.md | 334 +++++++++++++++++++-------------------- 4 files changed, 169 insertions(+), 169 deletions(-) diff --git a/Dockerfile b/Dockerfile index c5b927ee6..3e638518d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -143,6 +143,7 @@ ARG PGBIN=/usr/lib/postgresql/$PG_MAJOR/bin ENV LC_ALL=$LC_ALL LANG=$LANG EDITOR=/usr/bin/editor ENV PGDATA=$PGDATA PATH=$PATH:$PGBIN +ENV ETCDCTL_API=3 COPY patroni /patroni/ COPY extras/confd/conf.d/haproxy.toml /etc/confd/conf.d/ diff --git a/Dockerfile.citus b/Dockerfile.citus index 7e6ec18c0..f52a36e7f 100644 --- a/Dockerfile.citus +++ b/Dockerfile.citus @@ -164,6 +164,7 @@ ARG PGBIN=/usr/lib/postgresql/$PG_MAJOR/bin ENV LC_ALL=$LC_ALL LANG=$LANG EDITOR=/usr/bin/editor ENV PGDATA=$PGDATA PATH=$PATH:$PGBIN +ENV ETCDCTL_API=3 COPY patroni /patroni/ COPY extras/confd/conf.d/haproxy.toml /etc/confd/conf.d/ diff --git a/docker-compose-citus.yml b/docker-compose-citus.yml index 7ff2a2c5c..cd63d5833 100644 --- a/docker-compose-citus.yml +++ b/docker-compose-citus.yml @@ -19,7 +19,6 @@ services: image: ${PATRONI_TEST_IMAGE:-patroni-citus} networks: [ demo ] environment: - ETCDCTL_API: 3 ETCD_LISTEN_PEER_URLS: http://0.0.0.0:2380 ETCD_LISTEN_CLIENT_URLS: http://0.0.0.0:2379 ETCD_INITIAL_CLUSTER: etcd1=http://etcd1:2380,etcd2=http://etcd2:2380,etcd3=http://etcd3:2380 @@ -53,7 +52,6 @@ services: - "5001:5001" # Load-balancing across workers primaries command: haproxy environment: &haproxy_env - ETCDCTL_API: 3 ETCDCTL_ENDPOINTS: http://etcd1:2379,http://etcd2:2379,http://etcd3:2379 PATRONI_ETCD3_HOSTS: "'etcd1:2379','etcd2:2379','etcd3:2379'" PATRONI_SCOPE: demo diff --git a/docker/README.md b/docker/README.md index da87842f5..0b7f3d584 100644 --- a/docker/README.md +++ b/docker/README.md @@ -19,102 +19,97 @@ The haproxy listens on ports 5000 (connects to the primary) and 5001 (does load- Example session: - $ docker-compose up -d - Creating demo-haproxy ... - Creating demo-patroni2 ... - Creating demo-patroni1 ... - Creating demo-patroni3 ... - Creating demo-etcd2 ... - Creating demo-etcd1 ... - Creating demo-etcd3 ... - Creating demo-haproxy - Creating demo-patroni2 - Creating demo-patroni1 - Creating demo-patroni3 - Creating demo-etcd1 - Creating demo-etcd2 - Creating demo-etcd2 ... done + $ docker compose up -d + ✔ Network patroni_demo Created + ✔ Container demo-etcd1 Started + ✔ Container demo-haproxy Started + ✔ Container demo-patroni1 Started + ✔ Container demo-patroni2 Started + ✔ Container demo-patroni3 Started + ✔ Container demo-etcd2 Started + ✔ Container demo-etcd3 Started $ docker ps - CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES - 5b7a90b4cfbf patroni "/bin/sh /entrypoint…" 29 seconds ago Up 27 seconds demo-etcd2 - e30eea5222f2 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 27 seconds demo-etcd1 - 83bcf3cb208f patroni "/bin/sh /entrypoint…" 29 seconds ago Up 27 seconds demo-etcd3 - 922532c56e7d patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds demo-patroni3 - 14f875e445f3 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds demo-patroni2 - 110d1073b383 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds demo-patroni1 - 5af5e6e36028 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds 0.0.0.0:5000-5001->5000-5001/tcp demo-haproxy + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + a37bcec56726 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-etcd3 + 034ab73868a8 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-patroni2 + 03837736f710 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-patroni3 + 22815c3d85b3 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-etcd2 + 814b4304d132 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes 0.0.0.0:5000-5001->5000-5001/tcp, :::5000-5001->5000-5001/tcp demo-haproxy + 6375b0ba2d0a patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-patroni1 + aef8bf3ee91f patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-etcd1 $ docker logs demo-patroni1 - 2019-02-20 08:19:32,714 INFO: Failed to import patroni.dcs.consul - 2019-02-20 08:19:32,737 INFO: Selected new etcd server http://etcd3:2379 - 2019-02-20 08:19:35,140 INFO: Lock owner: None; I am patroni1 - 2019-02-20 08:19:35,174 INFO: trying to bootstrap a new cluster + 2023-11-21 09:04:33,547 INFO: Selected new etcd server http://172.29.0.3:2379 + 2023-11-21 09:04:33,605 INFO: Lock owner: None; I am patroni1 + 2023-11-21 09:04:33,693 INFO: trying to bootstrap a new cluster ... - 2019-02-20 08:19:39,310 INFO: postmaster pid=37 - 2019-02-20 08:19:39.314 UTC [37] LOG: listening on IPv4 address "0.0.0.0", port 5432 - 2019-02-20 08:19:39.321 UTC [37] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" - 2019-02-20 08:19:39.353 UTC [39] LOG: database system was shut down at 2019-02-20 08:19:36 UTC - 2019-02-20 08:19:39.354 UTC [40] FATAL: the database system is starting up - localhost:5432 - rejecting connections - 2019-02-20 08:19:39.369 UTC [37] LOG: database system is ready to accept connections + 2023-11-21 09:04:34.920 UTC [43] LOG: starting PostgreSQL 15.5 (Debian 15.5-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit + 2023-11-21 09:04:34.921 UTC [43] LOG: listening on IPv4 address "0.0.0.0", port 5432 + 2023-11-21 09:04:34,922 INFO: postmaster pid=43 + 2023-11-21 09:04:34.922 UTC [43] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" + 2023-11-21 09:04:34.925 UTC [47] LOG: database system was shut down at 2023-11-21 09:04:34 UTC + 2023-11-21 09:04:34.928 UTC [43] LOG: database system is ready to accept connections localhost:5432 - accepting connections - 2019-02-20 08:19:39,383 INFO: establishing a new patroni connection to the postgres cluster - 2019-02-20 08:19:39,408 INFO: running post_bootstrap - 2019-02-20 08:19:39,432 WARNING: Could not activate Linux watchdog device: "Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog'" - 2019-02-20 08:19:39,515 INFO: initialized a new cluster - 2019-02-20 08:19:49,424 INFO: Lock owner: patroni1; I am patroni1 - 2019-02-20 08:19:49,447 INFO: Lock owner: patroni1; I am patroni1 - 2019-02-20 08:19:49,480 INFO: no action. i am the leader with the lock - 2019-02-20 08:19:59,422 INFO: Lock owner: patroni1; I am patroni1 + localhost:5432 - accepting connections + 2023-11-21 09:04:34,938 INFO: establishing a new patroni heartbeat connection to postgres + 2023-11-21 09:04:34,992 INFO: running post_bootstrap + 2023-11-21 09:04:35,004 WARNING: User creation via "bootstrap.users" will be removed in v4.0.0 + 2023-11-21 09:04:35,009 WARNING: Could not activate Linux watchdog device: Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog' + 2023-11-21 09:04:35,189 INFO: initialized a new cluster + 2023-11-21 09:04:35,328 INFO: no action. I am (patroni1), the leader with the lock + 2023-11-21 09:04:43,824 INFO: establishing a new patroni restapi connection to postgres + 2023-11-21 09:04:45,322 INFO: no action. I am (patroni1), the leader with the lock + 2023-11-21 09:04:55,320 INFO: no action. I am (patroni1), the leader with the lock + ... $ docker exec -ti demo-patroni1 bash postgres@patroni1:~$ patronictl list - +---------+----------+------------+--------+---------+----+-----------+ - | Cluster | Member | Host | Role | State | TL | Lag in MB | - +---------+----------+------------+--------+---------+----+-----------+ - | demo | patroni1 | 172.22.0.3 | Leader | running | 1 | 0 | - | demo | patroni2 | 172.22.0.7 | | running | 1 | 0 | - | demo | patroni3 | 172.22.0.4 | | running | 1 | 0 | - +---------+----------+------------+--------+---------+----+-----------+ + + Cluster: demo (7303838734793224214) --------+----+-----------+ + | Member | Host | Role | State | TL | Lag in MB | + +----------+------------+---------+-----------+----+-----------+ + | patroni1 | 172.29.0.2 | Leader | running | 1 | | + | patroni2 | 172.29.0.6 | Replica | streaming | 1 | 0 | + | patroni3 | 172.29.0.5 | Replica | streaming | 1 | 0 | + +----------+------------+---------+-----------+----+-----------+ postgres@patroni1:~$ etcdctl get --keys-only --prefix /service/demo /service/demo/config /service/demo/initialize /service/demo/leader - /service/demo/members/ /service/demo/members/patroni1 /service/demo/members/patroni2 /service/demo/members/patroni3 - /service/demo/optime/ - /service/demo/optime/leader + /service/demo/status postgres@patroni1:~$ etcdctl member list - 1bab629f01fa9065: name=etcd3 peerURLs=http://etcd3:2380 clientURLs=http://etcd3:2379 isLeader=false - 8ecb6af518d241cc: name=etcd2 peerURLs=http://etcd2:2380 clientURLs=http://etcd2:2379 isLeader=true - b2e169fcb8a34028: name=etcd1 peerURLs=http://etcd1:2380 clientURLs=http://etcd1:2379 isLeader=false + 2bf3e2ceda5d5960, started, etcd2, http://etcd2:2380, http://172.29.0.3:2379 + 55b3264e129c7005, started, etcd3, http://etcd3:2380, http://172.29.0.7:2379 + acce7233f8ec127e, started, etcd1, http://etcd1:2380, http://172.29.0.8:2379 + + postgres@patroni1:~$ exit $ docker exec -ti demo-haproxy bash postgres@haproxy:~$ psql -h localhost -p 5000 -U postgres -W Password: postgres - psql (11.2 (Ubuntu 11.2-1.pgdg18.04+1), server 10.7 (Debian 10.7-1.pgdg90+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) Type "help" for help. - localhost/postgres=# select pg_is_in_recovery(); + postgres=# SELECT pg_is_in_recovery(); pg_is_in_recovery ─────────────────── f (1 row) - localhost/postgres=# \q + postgres=# \q - $postgres@haproxy:~ psql -h localhost -p 5001 -U postgres -W + postgres@haproxy:~$ psql -h localhost -p 5001 -U postgres -W Password: postgres - psql (11.2 (Ubuntu 11.2-1.pgdg18.04+1), server 10.7 (Debian 10.7-1.pgdg90+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) Type "help" for help. - localhost/postgres=# select pg_is_in_recovery(); + postgres=# SELECT pg_is_in_recovery(); pg_is_in_recovery ─────────────────── t @@ -127,81 +122,86 @@ The haproxy listens on ports 5000 (connects to the coordinator primary) and 5001 Example session: - $ docker-compose -f docker-compose-citus.yml up -d - Creating demo-work2-1 ... done - Creating demo-work1-1 ... done - Creating demo-etcd2 ... done - Creating demo-etcd1 ... done - Creating demo-coord3 ... done - Creating demo-etcd3 ... done - Creating demo-coord1 ... done - Creating demo-haproxy ... done - Creating demo-work2-2 ... done - Creating demo-coord2 ... done - Creating demo-work1-2 ... done + $ docker compose -f docker-compose-citus.yml up -d + ✔ Network patroni_demo Created + ✔ Container demo-coord2 Started + ✔ Container demo-work2-2 Started + ✔ Container demo-etcd1 Started + ✔ Container demo-haproxy Started + ✔ Container demo-work1-1 Started + ✔ Container demo-work2-1 Started + ✔ Container demo-work1-2 Started + ✔ Container demo-coord1 Started + ✔ Container demo-etcd3 Started + ✔ Container demo-coord3 Started + ✔ Container demo-etcd2 Started + $ docker ps - CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES - 852d8885a612 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-coord3 - cdd692f947ab patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-work1-2 - 9f4e340b36da patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-etcd3 - d69c129a960a patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-etcd1 - c5849689b8cd patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-coord1 - c9d72bd6217d patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-work2-1 - 24b1b43efa05 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-coord2 - cb0cc2b4ca0a patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-work2-2 - 9796c6b8aad5 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 5 seconds demo-work1-1 - 8baccd74dcae patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-etcd2 - 353ec62a0187 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds 0.0.0.0:5000-5001->5000-5001/tcp demo-haproxy + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + 79c95492fac9 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-etcd3 + 77eb82d0f0c1 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work2-1 + 03dacd7267ef patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-etcd1 + db9206c66f85 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-etcd2 + 9a0fef7b7dd4 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work1-2 + f06b031d99dc patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work2-2 + f7c58545f314 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-coord2 + 383f9e7e188a patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work1-1 + f02e96dcc9d6 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-coord3 + 6945834b7056 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-coord1 + b96ca42f785d patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes 0.0.0.0:5000-5001->5000-5001/tcp, :::5000-5001->5000-5001/tcp demo-haproxy + $ docker logs demo-coord1 - 2023-01-05 15:09:31,295 INFO: Selected new etcd server http://172.27.0.4:2379 - 2023-01-05 15:09:31,388 INFO: Lock owner: None; I am coord1 - 2023-01-05 15:09:31,501 INFO: trying to bootstrap a new cluster + 2023-11-21 09:36:14,293 INFO: Selected new etcd server http://172.30.0.4:2379 + 2023-11-21 09:36:14,390 INFO: Lock owner: None; I am coord1 + 2023-11-21 09:36:14,478 INFO: trying to bootstrap a new cluster ... - 2023-01-05 15:09:45,096 INFO: postmaster pid=39 + 2023-11-21 09:36:16,475 INFO: postmaster pid=52 localhost:5432 - no response - 2023-01-05 15:09:45.137 UTC [39] LOG: starting PostgreSQL 15.1 (Debian 15.1-1.pgdg110+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit - 2023-01-05 15:09:45.137 UTC [39] LOG: listening on IPv4 address "0.0.0.0", port 5432 - 2023-01-05 15:09:45.152 UTC [39] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" - 2023-01-05 15:09:45.177 UTC [43] LOG: database system was shut down at 2023-01-05 15:09:32 UTC - 2023-01-05 15:09:45.193 UTC [39] LOG: database system is ready to accept connections + 2023-11-21 09:36:16.495 UTC [52] LOG: starting PostgreSQL 15.5 (Debian 15.5-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit + 2023-11-21 09:36:16.495 UTC [52] LOG: listening on IPv4 address "0.0.0.0", port 5432 + 2023-11-21 09:36:16.496 UTC [52] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" + 2023-11-21 09:36:16.498 UTC [56] LOG: database system was shut down at 2023-11-21 09:36:15 UTC + 2023-11-21 09:36:16.501 UTC [52] LOG: database system is ready to accept connections localhost:5432 - accepting connections localhost:5432 - accepting connections - 2023-01-05 15:09:46,139 INFO: establishing a new patroni connection to the postgres cluster - 2023-01-05 15:09:46,208 INFO: running post_bootstrap - 2023-01-05 15:09:47.209 UTC [55] LOG: starting maintenance daemon on database 16386 user 10 - 2023-01-05 15:09:47.209 UTC [55] CONTEXT: Citus maintenance daemon for database 16386 user 10 - 2023-01-05 15:09:47,215 WARNING: Could not activate Linux watchdog device: "Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog'" - 2023-01-05 15:09:47.446 UTC [41] LOG: checkpoint starting: immediate force wait - 2023-01-05 15:09:47,466 INFO: initialized a new cluster - 2023-01-05 15:09:47,594 DEBUG: query(SELECT nodeid, groupid, nodename, nodeport, noderole FROM pg_catalog.pg_dist_node WHERE noderole = 'primary', ()) - 2023-01-05 15:09:47,594 INFO: establishing a new patroni connection to the postgres cluster - 2023-01-05 15:09:47,467 INFO: Lock owner: coord1; I am coord1 - 2023-01-05 15:09:47,613 DEBUG: query(SELECT pg_catalog.citus_set_coordinator_host(%s, %s, 'primary', 'default'), ('172.27.0.6', 5432)) - 2023-01-05 15:09:47,924 INFO: no action. I am (coord1), the leader with the lock - 2023-01-05 15:09:51.282 UTC [41] LOG: checkpoint complete: wrote 1086 buffers (53.0%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.029 s, sync=3.746 s, total=3.837 s; sync files=280, longest=0.028 s, average=0.014 s; distance=8965 kB, estimate=8965 kB - 2023-01-05 15:09:51.283 UTC [41] LOG: checkpoint starting: immediate force wait - 2023-01-05 15:09:51.495 UTC [41] LOG: checkpoint complete: wrote 18 buffers (0.9%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.044 s, sync=0.091 s, total=0.212 s; sync files=15, longest=0.015 s, average=0.007 s; distance=67 kB, estimate=8076 kB - 2023-01-05 15:09:57,467 INFO: Lock owner: coord1; I am coord1 - 2023-01-05 15:09:57,569 INFO: Assigning synchronous standby status to ['coord3'] + 2023-11-21 09:36:17,509 INFO: establishing a new patroni heartbeat connection to postgres + 2023-11-21 09:36:17,569 INFO: running post_bootstrap + 2023-11-21 09:36:17,593 WARNING: User creation via "bootstrap.users" will be removed in v4.0.0 + 2023-11-21 09:36:17,783 INFO: establishing a new patroni restapi connection to postgres + 2023-11-21 09:36:17,969 WARNING: Could not activate Linux watchdog device: Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog' + 2023-11-21 09:36:17.969 UTC [70] LOG: starting maintenance daemon on database 16386 user 10 + 2023-11-21 09:36:17.969 UTC [70] CONTEXT: Citus maintenance daemon for database 16386 user 10 + 2023-11-21 09:36:18.159 UTC [54] LOG: checkpoint starting: immediate force wait + 2023-11-21 09:36:18,162 INFO: initialized a new cluster + 2023-11-21 09:36:18,164 INFO: Lock owner: coord1; I am coord1 + 2023-11-21 09:36:18,297 INFO: Enabled synchronous replication + 2023-11-21 09:36:18,298 DEBUG: Adding the new task: PgDistNode(nodeid=None,group=0,host=172.30.0.3,port=5432,event=after_promote) + 2023-11-21 09:36:18,298 DEBUG: Adding the new task: PgDistNode(nodeid=None,group=1,host=172.30.0.7,port=5432,event=after_promote) + 2023-11-21 09:36:18,298 DEBUG: Adding the new task: PgDistNode(nodeid=None,group=2,host=172.30.0.8,port=5432,event=after_promote) + 2023-11-21 09:36:18,299 DEBUG: query(SELECT nodeid, groupid, nodename, nodeport, noderole FROM pg_catalog.pg_dist_node WHERE noderole = 'primary', ()) + 2023-11-21 09:36:18,299 INFO: establishing a new patroni citus connection to postgres + 2023-11-21 09:36:18,323 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.30.0.7', 5432, 1)) + 2023-11-21 09:36:18,361 INFO: no action. I am (coord1), the leader with the lock + 2023-11-21 09:36:18,393 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.30.0.8', 5432, 2)) + 2023-11-21 09:36:28,164 INFO: Lock owner: coord1; I am coord1 + 2023-11-21 09:36:28,251 INFO: Assigning synchronous standby status to ['coord3'] server signaled - 2023-01-05 15:09:57.574 UTC [39] LOG: received SIGHUP, reloading configuration files - 2023-01-05 15:09:57.580 UTC [39] LOG: parameter "synchronous_standby_names" changed to "coord3" - 2023-01-05 15:09:59,637 INFO: Synchronous standby status assigned to ['coord3'] - 2023-01-05 15:09:59,638 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.27.0.2', 5432, 1)) - 2023-01-05 15:09:59.690 UTC [67] LOG: standby "coord3" is now a synchronous standby with priority 1 - 2023-01-05 15:09:59.690 UTC [67] STATEMENT: START_REPLICATION SLOT "coord3" 0/3000000 TIMELINE 1 - 2023-01-05 15:09:59,694 INFO: no action. I am (coord1), the leader with the lock - 2023-01-05 15:09:59,704 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.27.0.8', 5432, 2)) - 2023-01-05 15:10:07,625 INFO: no action. I am (coord1), the leader with the lock - 2023-01-05 15:10:17,579 INFO: no action. I am (coord1), the leader with the lock + 2023-11-21 09:36:28.435 UTC [52] LOG: received SIGHUP, reloading configuration files + 2023-11-21 09:36:28.436 UTC [52] LOG: parameter "synchronous_standby_names" changed to "coord3" + 2023-11-21 09:36:28.641 UTC [83] LOG: standby "coord3" is now a synchronous standby with priority 1 + 2023-11-21 09:36:28.641 UTC [83] STATEMENT: START_REPLICATION SLOT "coord3" 0/3000000 TIMELINE 1 + 2023-11-21 09:36:30,582 INFO: Synchronous standby status assigned to ['coord3'] + 2023-11-21 09:36:30,626 INFO: no action. I am (coord1), the leader with the lock + 2023-11-21 09:36:38,250 INFO: no action. I am (coord1), the leader with the lock + ... $ docker exec -ti demo-haproxy bash postgres@haproxy:~$ etcdctl member list - 1bab629f01fa9065, started, etcd3, http://etcd3:2380, http://172.27.0.10:2379 - 8ecb6af518d241cc, started, etcd2, http://etcd2:2380, http://172.27.0.4:2379 - b2e169fcb8a34028, started, etcd1, http://etcd1:2380, http://172.27.0.7:2379 + 2b28411e74c0c281, started, etcd3, http://etcd3:2380, http://172.30.0.4:2379 + 6c70137d27cfa6c1, started, etcd2, http://etcd2:2380, http://172.30.0.5:2379 + a28f9a70ebf21304, started, etcd1, http://etcd1:2380, http://172.30.0.6:2379 postgres@haproxy:~$ etcdctl get --keys-only --prefix /service/demo /service/demo/0/config @@ -229,7 +229,7 @@ Example session: postgres@haproxy:~$ psql -h localhost -p 5000 -U postgres -d citus Password for user postgres: postgres - psql (15.1 (Debian 15.1-1.pgdg110+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, compression: off) Type "help" for help. @@ -240,67 +240,67 @@ Example session: (1 row) citus=# table pg_dist_node; - nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------ - 1 | 0 | 172.27.0.6 | 5432 | default | t | t | primary | default | t | f - 2 | 1 | 172.27.0.2 | 5432 | default | t | t | primary | default | t | t - 3 | 2 | 172.27.0.8 | 5432 | default | t | t | primary | default | t | t + 1 | 0 | 172.30.0.3 | 5432 | default | t | t | primary | default | t | f + 2 | 1 | 172.30.0.7 | 5432 | default | t | t | primary | default | t | t + 3 | 2 | 172.30.0.8 | 5432 | default | t | t | primary | default | t | t (3 rows) citus=# \q postgres@haproxy:~$ patronictl list - + Citus cluster: demo ----------+--------------+---------+----+-----------+ - | Group | Member | Host | Role | State | TL | Lag in MB | - +-------+---------+-------------+--------------+---------+----+-----------+ - | 0 | coord1 | 172.27.0.6 | Leader | running | 1 | | - | 0 | coord2 | 172.27.0.5 | Replica | running | 1 | 0 | - | 0 | coord3 | 172.27.0.9 | Sync Standby | running | 1 | 0 | - | 1 | work1-1 | 172.27.0.2 | Leader | running | 1 | | - | 1 | work1-2 | 172.27.0.12 | Sync Standby | running | 1 | 0 | - | 2 | work2-1 | 172.27.0.11 | Sync Standby | running | 1 | 0 | - | 2 | work2-2 | 172.27.0.8 | Leader | running | 1 | | - +-------+---------+-------------+--------------+---------+----+-----------+ + + Citus cluster: demo ----------+--------------+-----------+----+-----------+ + | Group | Member | Host | Role | State | TL | Lag in MB | + +-------+---------+-------------+--------------+-----------+----+-----------+ + | 0 | coord1 | 172.30.0.3 | Leader | running | 1 | | + | 0 | coord2 | 172.30.0.12 | Replica | streaming | 1 | 0 | + | 0 | coord3 | 172.30.0.2 | Sync Standby | streaming | 1 | 0 | + | 1 | work1-1 | 172.30.0.7 | Leader | running | 1 | | + | 1 | work1-2 | 172.30.0.10 | Sync Standby | streaming | 1 | 0 | + | 2 | work2-1 | 172.30.0.8 | Leader | running | 1 | | + | 2 | work2-2 | 172.30.0.11 | Sync Standby | streaming | 1 | 0 | + +-------+---------+-------------+--------------+-----------+----+-----------+ + postgres@haproxy:~$ patronictl switchover --group 2 --force Current cluster topology - + Citus cluster: demo (group: 2, 7185185529556963355) +-----------+ - | Member | Host | Role | State | TL | Lag in MB | - +---------+-------------+--------------+---------+----+-----------+ - | work2-1 | 172.27.0.11 | Sync Standby | running | 1 | 0 | - | work2-2 | 172.27.0.8 | Leader | running | 1 | | - +---------+-------------+--------------+---------+----+-----------+ - 2023-01-05 15:29:29.54204 Successfully switched over to "work2-1" - + Citus cluster: demo (group: 2, 7185185529556963355) -------+ + + Citus cluster: demo (group: 2, 7303846899271086103) --+-----------+ + | Member | Host | Role | State | TL | Lag in MB | + +---------+-------------+--------------+-----------+----+-----------+ + | work2-1 | 172.30.0.8 | Leader | running | 1 | | + | work2-2 | 172.30.0.11 | Sync Standby | streaming | 1 | 0 | + +---------+-------------+--------------+-----------+----+-----------+ + 2023-11-21 09:44:15.83849 Successfully switched over to "work2-2" + + Citus cluster: demo (group: 2, 7303846899271086103) -------+ | Member | Host | Role | State | TL | Lag in MB | +---------+-------------+---------+---------+----+-----------+ - | work2-1 | 172.27.0.11 | Leader | running | 1 | | - | work2-2 | 172.27.0.8 | Replica | stopped | | unknown | + | work2-1 | 172.30.0.8 | Replica | stopped | | unknown | + | work2-2 | 172.30.0.11 | Leader | running | 1 | | +---------+-------------+---------+---------+----+-----------+ postgres@haproxy:~$ patronictl list - + Citus cluster: demo ----------+--------------+---------+----+-----------+ - | Group | Member | Host | Role | State | TL | Lag in MB | - +-------+---------+-------------+--------------+---------+----+-----------+ - | 0 | coord1 | 172.27.0.6 | Leader | running | 1 | | - | 0 | coord2 | 172.27.0.5 | Replica | running | 1 | 0 | - | 0 | coord3 | 172.27.0.9 | Sync Standby | running | 1 | 0 | - | 1 | work1-1 | 172.27.0.2 | Leader | running | 1 | | - | 1 | work1-2 | 172.27.0.12 | Sync Standby | running | 1 | 0 | - | 2 | work2-1 | 172.27.0.11 | Leader | running | 2 | | - | 2 | work2-2 | 172.27.0.8 | Sync Standby | running | 2 | 0 | - +-------+---------+-------------+--------------+---------+----+-----------+ + + Citus cluster: demo ----------+--------------+-----------+----+-----------+ + | Group | Member | Host | Role | State | TL | Lag in MB | + +-------+---------+-------------+--------------+-----------+----+-----------+ + | 0 | coord1 | 172.30.0.3 | Leader | running | 1 | | + | 0 | coord2 | 172.30.0.12 | Replica | streaming | 1 | 0 | + | 0 | coord3 | 172.30.0.2 | Sync Standby | streaming | 1 | 0 | + | 1 | work1-1 | 172.30.0.7 | Leader | running | 1 | | + | 1 | work1-2 | 172.30.0.10 | Sync Standby | streaming | 1 | 0 | + | 2 | work2-1 | 172.30.0.8 | Sync Standby | streaming | 2 | 0 | + | 2 | work2-2 | 172.30.0.11 | Leader | running | 2 | | + +-------+---------+-------------+--------------+-----------+----+-----------+ postgres@haproxy:~$ psql -h localhost -p 5000 -U postgres -d citus - Password for user postgres: postgres - psql (15.1 (Debian 15.1-1.pgdg110+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, compression: off) Type "help" for help. citus=# table pg_dist_node; - nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------+---------+-------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------ - 1 | 0 | 172.27.0.6 | 5432 | default | t | t | primary | default | t | f - 3 | 2 | 172.27.0.11 | 5432 | default | t | t | primary | default | t | t - 2 | 1 | 172.27.0.2 | 5432 | default | t | t | primary | default | t | t + 1 | 0 | 172.30.0.3 | 5432 | default | t | t | primary | default | t | f + 3 | 2 | 172.30.0.11 | 5432 | default | t | t | primary | default | t | t + 2 | 1 | 172.30.0.7 | 5432 | default | t | t | primary | default | t | t (3 rows) From 91327f943c92faa4bde478494e8c7338138badbc Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Thu, 23 Nov 2023 17:04:23 +0100 Subject: [PATCH 09/33] Factor out dynamic class finder/loader to a dedicated file (#2954) It could be reused to do the same for MPP modules/classes. Ref: #2940 and #2950 --- patroni/dcs/__init__.py | 72 ++++------------------------- patroni/dynamic_loader.py | 96 +++++++++++++++++++++++++++++++++++++++ tests/test_ctl.py | 2 +- 3 files changed, 107 insertions(+), 63 deletions(-) create mode 100644 patroni/dynamic_loader.py diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index 9c2f73357..b2538e1a6 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -1,26 +1,21 @@ """Abstract classes for Distributed Configuration Store.""" import abc import datetime -import importlib -import inspect import json import logging -import os -import pkgutil import re -import sys import time from collections import defaultdict from copy import deepcopy from random import randint from threading import Event, Lock -from types import ModuleType -from typing import Any, Callable, Collection, Dict, List, NamedTuple, Optional, Set, Tuple, Union, TYPE_CHECKING, \ - Type, Iterator +from typing import Any, Callable, Collection, Dict, Iterator, List, \ + NamedTuple, Optional, Tuple, Type, TYPE_CHECKING, Union from urllib.parse import urlparse, urlunparse, parse_qsl import dateutil.parser +from ..dynamic_loader import iter_classes, iter_modules from ..exceptions import PatroniFatalException from ..utils import deep_compare, uri from ..tags import Tags @@ -87,28 +82,9 @@ def parse_connection_string(value: str) -> Tuple[str, Union[str, None]]: def dcs_modules() -> List[str]: """Get names of DCS modules, depending on execution environment. - .. note:: - If being packaged with PyInstaller, modules aren't discoverable dynamically by scanning source directory because - :class:`importlib.machinery.FrozenImporter` doesn't implement :func:`iter_modules`. But it is still possible to - find all potential DCS modules by iterating through ``toc``, which contains list of all "frozen" resources. - :returns: list of known module names with absolute python module path namespace, e.g. ``patroni.dcs.etcd``. """ - dcs_dirname = os.path.dirname(__file__) - module_prefix = __package__ + '.' - - if getattr(sys, 'frozen', False): - toc: Set[str] = set() - # dcs_dirname may contain a dot, which causes pkgutil.iter_importers() - # to misinterpret the path as a package name. This can be avoided - # altogether by not passing a path at all, because PyInstaller's - # FrozenImporter is a singleton and registered as top-level finder. - for importer in pkgutil.iter_importers(): - if hasattr(importer, 'toc'): - toc |= getattr(importer, 'toc') - return [module for module in toc if module.startswith(module_prefix) and module.count('.') == 2] - - return [module_prefix + name for _, name, is_pkg in pkgutil.iter_modules([dcs_dirname]) if not is_pkg] + return iter_modules(__package__) def iter_dcs_classes( @@ -122,44 +98,16 @@ def iter_dcs_classes( :param config: configuration information with possible DCS names as keys. If given, only attempt to import DCS modules defined in the configuration. Else, if ``None``, attempt to import any supported DCS module. - :yields: a tuple containing the module ``name`` and the imported DCS class object. - """ - for mod_name in dcs_modules(): - name = mod_name.rpartition('.')[2] - if config is None or name in config: - - try: - module = importlib.import_module(mod_name) - dcs_module = find_dcs_class_in_module(module) - if dcs_module: - yield name, dcs_module - - except ImportError: - logger.log(logging.DEBUG if config is not None else logging.INFO, - 'Failed to import %s', mod_name) - - -def find_dcs_class_in_module(module: ModuleType) -> Optional[Type['AbstractDCS']]: - """Try to find the implementation of :class:`AbstractDCS` interface in *module* matching the *module* name. - - :param module: Imported DCS module. - - :returns: class with a name matching the name of *module* that implements :class:`AbstractDCS` or ``None`` if not - found. + :returns: an iterator of tuples, each containing the module ``name`` and the imported DCS class object. """ - module_name = module.__name__.rpartition('.')[2] - return next( - (obj for obj_name, obj in module.__dict__.items() - if (obj_name.lower() == module_name - and inspect.isclass(obj) and issubclass(obj, AbstractDCS))), - None) + return iter_classes(__package__, AbstractDCS, config) def get_dcs(config: Union['Config', Dict[str, Any]]) -> 'AbstractDCS': """Attempt to load a Distributed Configuration Store from known available implementations. .. note:: - Using the list of available DCS classes returned by :func:`iter_dcs_classes` attempt to dynamically + Using the list of available DCS classes returned by :func:`iter_classes` attempt to dynamically instantiate the class that implements a DCS using the abstract class :class:`AbstractDCS`. Basic top-level configuration parameters retrieved from *config* are propagated to the DCS specific config @@ -185,9 +133,9 @@ def get_dcs(config: Union['Config', Dict[str, Any]]) -> 'AbstractDCS': config[name].update(config['citus']) return dcs_class(config[name]) - raise PatroniFatalException( - f"Can not find suitable configuration of distributed configuration store\n" - f"Available implementations: {', '.join(sorted([n for n, _ in iter_dcs_classes()]))}") + available_implementations = ', '.join(sorted([n for n, _ in iter_dcs_classes()])) + raise PatroniFatalException("Can not find suitable configuration of distributed configuration store\n" + f"Available implementations: {available_implementations}") _Version = Union[int, str] diff --git a/patroni/dynamic_loader.py b/patroni/dynamic_loader.py new file mode 100644 index 000000000..6c207349e --- /dev/null +++ b/patroni/dynamic_loader.py @@ -0,0 +1,96 @@ +"""Helper functions to search for implementations of specific abstract interface in a package.""" +import importlib +import inspect +import logging +import os +import pkgutil +import sys +from types import ModuleType + +from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, TYPE_CHECKING, Type, TypeVar, Union + +if TYPE_CHECKING: # pragma: no cover + from .config import Config + +logger = logging.getLogger(__name__) + + +def iter_modules(package: str) -> List[str]: + """Get names of modules from *package*, depending on execution environment. + + .. note:: + If being packaged with PyInstaller, modules aren't discoverable dynamically by scanning source directory because + :class:`importlib.machinery.FrozenImporter` doesn't implement :func:`iter_modules`. But it is still possible to + find all potential modules by iterating through ``toc``, which contains list of all "frozen" resources. + + :param package: a package name to search modules in, e.g. ``patroni.dcs``. + + :returns: list of known module names with absolute python module path namespace, e.g. ``patroni.dcs.etcd``. + """ + module_prefix = package + '.' + + if getattr(sys, 'frozen', False): + toc: Set[str] = set() + # dirname may contain a few dots, which causes pkgutil.iter_importers() + # to misinterpret the path as a package name. This can be avoided + # altogether by not passing a path at all, because PyInstaller's + # FrozenImporter is a singleton and registered as top-level finder. + for importer in pkgutil.iter_importers(): + if hasattr(importer, 'toc'): + toc |= getattr(importer, 'toc') + dots = module_prefix.count('.') # search for modules only on the same level + return [module for module in toc if module.startswith(module_prefix) and module.count('.') == dots] + + # here we are making an assumption that the package which is calling this function is already imported + pkg_file = sys.modules[package].__file__ + if TYPE_CHECKING: # pragma: no cover + assert isinstance(pkg_file, str) + return [name for _, name, is_pkg in pkgutil.iter_modules([os.path.dirname(pkg_file)], module_prefix) if not is_pkg] + + +ClassType = TypeVar("ClassType") + + +def find_class_in_module(module: ModuleType, cls_type: Type[ClassType]) -> Optional[Type[ClassType]]: + """Try to find the implementation of *cls_type* class interface in *module* matching the *module* name. + + :param module: imported module. + :param cls_type: a class type we are looking for. + + :returns: class with a name matching the name of *module* that implements *cls_type* or ``None`` if not found. + """ + module_name = module.__name__.rpartition('.')[2] + return next( + (obj for obj_name, obj in module.__dict__.items() + if (obj_name.lower() == module_name + and inspect.isclass(obj) and issubclass(obj, cls_type))), + None) + + +def iter_classes( + package: str, cls_type: Type[ClassType], + config: Optional[Union['Config', Dict[str, Any]]] = None +) -> Iterator[Tuple[str, Type[ClassType]]]: + """Attempt to import modules and find implementations of *cls_type* that are present in the given configuration. + + .. note:: + If a module successfully imports we can assume that all its requirements are installed. + + :param package: a package name to search modules in, e.g. ``patroni.dcs``. + :param cls_type: a class type we are looking for. + :param config: configuration information with possible module names as keys. If given, only attempt to import + modules defined in the configuration. Else, if ``None``, attempt to import any supported module. + + :yields: a tuple containing the module ``name`` and the imported class object. + """ + for mod_name in iter_modules(package): + name = mod_name.rpartition('.')[2] + if config is None or name in config: + try: + module = importlib.import_module(mod_name) + module_cls = find_class_in_module(module, cls_type) + if module_cls: + yield name, module_cls + except ImportError: + logger.log(logging.DEBUG if config is not None else logging.INFO, + 'Failed to import %s', mod_name) diff --git a/tests/test_ctl.py b/tests/test_ctl.py index bacd2d609..96c36c16a 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -238,7 +238,7 @@ def test_failover(self): result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='N') self.assertEqual(result.exit_code, 1) - @patch('patroni.dcs.dcs_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) + @patch('patroni.dynamic_loader.iter_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) def test_get_dcs(self): with click.Context(click.Command('list')) as ctx: ctx.obj = {'__config': {'dummy': {}}} From 193c73f6b80c42b316d790f9b40321da2ee55a31 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Fri, 24 Nov 2023 09:26:05 +0100 Subject: [PATCH 10/33] Make GlobalConfig really global (#2935) 1. extract `GlobalConfig` class to its own module 2. make the module instantiate the `GlobalConfig` object on load and replace sys.modules with the this instance 3. don't pass `GlobalConfig` object around, but use `patroni.global_config` module everywhere. 4. move `ignore_slots_matchers`, `max_timelines_history`, and `permanent_slots` from `ClusterConfig` to `GlobalConfig`. 5. add `use_slots` property to global_config and remove duplicated code from `Cluster` and `Postgresql.ConfigHandler`. Besides that improve readability of couple of checks in ha.py and formatting of `/config` key when saved from patronictl. --- patroni/api.py | 27 ++-- patroni/config.py | 162 +---------------------- patroni/ctl.py | 21 +-- patroni/dcs/__init__.py | 62 ++------- patroni/global_config.py | 227 +++++++++++++++++++++++++++++++++ patroni/ha.py | 48 ++++--- patroni/postgresql/__init__.py | 25 +--- patroni/postgresql/config.py | 7 +- patroni/postgresql/slots.py | 10 +- patroni/postgresql/sync.py | 8 +- patroni/utils.py | 15 +-- tests/test_api.py | 29 ++--- tests/test_config.py | 10 +- tests/test_ctl.py | 11 +- tests/test_ha.py | 32 +++-- tests/test_patroni.py | 1 + tests/test_postgresql.py | 14 +- tests/test_slots.py | 10 +- tests/test_sync.py | 7 +- 19 files changed, 372 insertions(+), 354 deletions(-) create mode 100644 patroni/global_config.py diff --git a/patroni/api.py b/patroni/api.py index 6a89b1bae..2adc5f985 100644 --- a/patroni/api.py +++ b/patroni/api.py @@ -26,7 +26,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TYPE_CHECKING, Union -from . import psycopg +from . import global_config, psycopg from .__main__ import Patroni from .dcs import Cluster from .exceptions import PostgresConnectionException, PostgresException @@ -290,7 +290,7 @@ def do_GET(self, write_status_code_only: bool = False) -> None: patroni = self.server.patroni cluster = patroni.dcs.cluster - global_config = patroni.config.get_global_config(cluster) + config = global_config.from_cluster(cluster) leader_optime = cluster and cluster.last_lsn or 0 replayed_location = response.get('xlog', {}).get('replayed_location', 0) @@ -308,7 +308,7 @@ def do_GET(self, write_status_code_only: bool = False) -> None: standby_leader_status_code = 200 if response.get('role') == 'standby_leader' else 503 elif patroni.ha.is_leader(): leader_status_code = 200 - if global_config.is_standby_cluster: + if config.is_standby_cluster: primary_status_code = replica_status_code = 503 standby_leader_status_code = 200 if response.get('role') in ('replica', 'standby_leader') else 503 else: @@ -452,9 +452,8 @@ def do_GET_cluster(self) -> None: HTTP status ``200`` and the JSON representation of the cluster topology. """ cluster = self.server.patroni.dcs.get_cluster() - global_config = self.server.patroni.config.get_global_config(cluster) - response = cluster_as_json(cluster, global_config) + response = cluster_as_json(cluster) response['scope'] = self.server.patroni.postgresql.scope self._write_json_response(200, response) @@ -864,7 +863,7 @@ def do_POST_restart(self) -> None: if request: logger.debug("received restart request: {0}".format(request)) - if self.server.patroni.config.get_global_config(cluster).is_paused and 'schedule' in request: + if global_config.from_cluster(cluster).is_paused and 'schedule' in request: self.write_response(status_code, "Can't schedule restart in the paused state") return @@ -1033,7 +1032,7 @@ def is_failover_possible(self, cluster: Cluster, leader: Optional[str], candidat :returns: a string with the error message or ``None`` if good nodes are found. """ - is_synchronous_mode = self.server.patroni.config.get_global_config(cluster).is_synchronous_mode + is_synchronous_mode = global_config.from_cluster(cluster).is_synchronous_mode if leader and (not cluster.leader or cluster.leader.name != leader): return 'leader name does not match' if candidate: @@ -1091,7 +1090,7 @@ def do_POST_failover(self, action: str = 'failover') -> None: candidate = request.get('candidate') or request.get('member') scheduled_at = request.get('scheduled_at') cluster = self.server.patroni.dcs.get_cluster() - global_config = self.server.patroni.config.get_global_config(cluster) + config = global_config.from_cluster(cluster) logger.info("received %s request with leader=%s candidate=%s scheduled_at=%s", action, leader, candidate, scheduled_at) @@ -1104,12 +1103,12 @@ def do_POST_failover(self, action: str = 'failover') -> None: if not data and scheduled_at: if action == 'failover': data = "Failover can't be scheduled" - elif global_config.is_paused: + elif config.is_paused: data = "Can't schedule switchover in the paused state" else: (status_code, data, scheduled_at) = self.parse_schedule(scheduled_at, action) - if not data and global_config.is_paused and not candidate: + if not data and config.is_paused and not candidate: data = 'Switchover is possible only to a specific candidate in a paused state' if action == 'failover' and leader: @@ -1260,7 +1259,7 @@ def get_postgresql_status(self, retry: bool = False) -> Dict[str, Any]: """ postgresql = self.server.patroni.postgresql cluster = self.server.patroni.dcs.cluster - global_config = self.server.patroni.config.get_global_config(cluster) + config = global_config.from_cluster(cluster) try: if postgresql.state not in ('running', 'restarting', 'starting'): @@ -1291,10 +1290,10 @@ def get_postgresql_status(self, retry: bool = False) -> Dict[str, Any]: }) } - if result['role'] == 'replica' and global_config.is_standby_cluster: + if result['role'] == 'replica' and config.is_standby_cluster: result['role'] = postgresql.role - if result['role'] == 'replica' and global_config.is_synchronous_mode\ + if result['role'] == 'replica' and config.is_synchronous_mode\ and cluster and cluster.sync.matches(postgresql.name): result['sync_standby'] = True @@ -1319,7 +1318,7 @@ def get_postgresql_status(self, retry: bool = False) -> Dict[str, Any]: state = 'unknown' result: Dict[str, Any] = {'state': state, 'role': postgresql.role} - if global_config.is_paused: + if config.is_paused: result['pause'] = True if not cluster or cluster.is_unlocked(): result['cluster_unlocked'] = True diff --git a/patroni/config.py b/patroni/config.py index 00dba9d90..e523bc080 100644 --- a/patroni/config.py +++ b/patroni/config.py @@ -12,7 +12,7 @@ from . import PATRONI_ENV_PREFIX from .collections import CaseInsensitiveDict -from .dcs import ClusterConfig, Cluster +from .dcs import ClusterConfig from .exceptions import ConfigParseError from .file_perm import pg_perm from .postgresql.config import ConfigHandler @@ -54,154 +54,6 @@ def default_validator(conf: Dict[str, Any]) -> List[str]: return [] -class GlobalConfig(object): - """A class that wraps global configuration and provides convenient methods to access/check values. - - It is instantiated either by calling :func:`get_global_config` or :meth:`Config.get_global_config`, which picks - either a configuration from provided :class:`Cluster` object (the most up-to-date) or from the - local cache if :class:`ClusterConfig` is not initialized or doesn't have a valid config. - """ - - def __init__(self, config: Dict[str, Any]) -> None: - """Initialize :class:`GlobalConfig` object with given *config*. - - :param config: current configuration either from - :class:`ClusterConfig` or from :func:`Config.dynamic_configuration`. - """ - self.__config = config - - def get(self, name: str) -> Any: - """Gets global configuration value by *name*. - - :param name: parameter name. - - :returns: configuration value or ``None`` if it is missing. - """ - return self.__config.get(name) - - def check_mode(self, mode: str) -> bool: - """Checks whether the certain parameter is enabled. - - :param mode: parameter name, e.g. ``synchronous_mode``, ``failsafe_mode``, ``pause``, ``check_timeline``, and - so on. - - :returns: ``True`` if parameter *mode* is enabled in the global configuration. - """ - return bool(parse_bool(self.__config.get(mode))) - - @property - def is_paused(self) -> bool: - """``True`` if cluster is in maintenance mode.""" - return self.check_mode('pause') - - @property - def is_synchronous_mode(self) -> bool: - """``True`` if synchronous replication is requested and it is not a standby cluster config.""" - return self.check_mode('synchronous_mode') and not self.is_standby_cluster - - @property - def is_synchronous_mode_strict(self) -> bool: - """``True`` if at least one synchronous node is required.""" - return self.check_mode('synchronous_mode_strict') - - def get_standby_cluster_config(self) -> Union[Dict[str, Any], Any]: - """Get ``standby_cluster`` configuration. - - :returns: a copy of ``standby_cluster`` configuration. - """ - return deepcopy(self.get('standby_cluster')) - - @property - def is_standby_cluster(self) -> bool: - """``True`` if global configuration has a valid ``standby_cluster`` section.""" - config = self.get_standby_cluster_config() - return isinstance(config, dict) and\ - bool(config.get('host') or config.get('port') or config.get('restore_command')) - - def get_int(self, name: str, default: int = 0) -> int: - """Gets current value of *name* from the global configuration and try to return it as :class:`int`. - - :param name: name of the parameter. - :param default: default value if *name* is not in the configuration or invalid. - - :returns: currently configured value of *name* from the global configuration or *default* if it is not set or - invalid. - """ - ret = parse_int(self.get(name)) - return default if ret is None else ret - - @property - def min_synchronous_nodes(self) -> int: - """The minimal number of synchronous nodes based on whether ``synchronous_mode_strict`` is enabled or not.""" - return 1 if self.is_synchronous_mode_strict else 0 - - @property - def synchronous_node_count(self) -> int: - """Currently configured value of ``synchronous_node_count`` from the global configuration. - - Assume ``1`` if it is not set or invalid. - """ - return max(self.get_int('synchronous_node_count', 1), self.min_synchronous_nodes) - - @property - def maximum_lag_on_failover(self) -> int: - """Currently configured value of ``maximum_lag_on_failover`` from the global configuration. - - Assume ``1048576`` if it is not set or invalid. - """ - return self.get_int('maximum_lag_on_failover', 1048576) - - @property - def maximum_lag_on_syncnode(self) -> int: - """Currently configured value of ``maximum_lag_on_syncnode`` from the global configuration. - - Assume ``-1`` if it is not set or invalid. - """ - return self.get_int('maximum_lag_on_syncnode', -1) - - @property - def primary_start_timeout(self) -> int: - """Currently configured value of ``primary_start_timeout`` from the global configuration. - - Assume ``300`` if it is not set or invalid. - - .. note:: - ``master_start_timeout`` is still supported to keep backward compatibility. - """ - default = 300 - return self.get_int('primary_start_timeout', default)\ - if 'primary_start_timeout' in self.__config else self.get_int('master_start_timeout', default) - - @property - def primary_stop_timeout(self) -> int: - """Currently configured value of ``primary_stop_timeout`` from the global configuration. - - Assume ``0`` if it is not set or invalid. - - .. note:: - ``master_stop_timeout`` is still supported to keep backward compatibility. - """ - default = 0 - return self.get_int('primary_stop_timeout', default)\ - if 'primary_stop_timeout' in self.__config else self.get_int('master_stop_timeout', default) - - -def get_global_config(cluster: Optional[Cluster], default: Optional[Dict[str, Any]] = None) -> GlobalConfig: - """Instantiates :class:`GlobalConfig` based on the input. - - :param cluster: the currently known cluster state from DCS. - :param default: default configuration, which will be used if there is no valid *cluster.config*. - - :returns: :class:`GlobalConfig` object. - """ - # Try to protect from the case when DCS was wiped out - if cluster and cluster.config and cluster.config.modify_version: - config = cluster.config.data - else: - config = default or {} - return GlobalConfig(deepcopy(config)) - - class Config(object): """Handle Patroni configuration. @@ -949,18 +801,6 @@ def copy(self) -> Dict[str, Any]: """ return deepcopy(self.__effective_configuration) - def get_global_config(self, cluster: Optional[Cluster]) -> GlobalConfig: - """Instantiate :class:`GlobalConfig` based on input. - - Use the configuration from provided *cluster* (the most up-to-date) or from the - local cache if *cluster.config* is not initialized or doesn't have a valid config. - - :param cluster: the currently known cluster state from DCS. - - :returns: :class:`GlobalConfig` object. - """ - return get_global_config(cluster, self._dynamic_configuration) - def _validate_failover_tags(self) -> None: """Check ``nofailover``/``failover_priority`` config and warn user if it's contradictory. diff --git a/patroni/ctl.py b/patroni/ctl.py index 2d92457a4..13959896f 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -46,7 +46,8 @@ except ImportError: # pragma: no cover from cdiff import markup_to_pager, PatchStream # pyright: ignore [reportMissingModuleSource] -from .config import Config, get_global_config +from . import global_config +from .config import Config from .dcs import get_dcs as _get_dcs, AbstractDCS, Cluster, Member from .exceptions import PatroniException from .postgresql.misc import postgres_version_to_int @@ -1026,7 +1027,7 @@ def reload(cluster_name: str, member_names: List[str], group: Optional[int], for if r.status == 200: click.echo('No changes to apply on member {0}'.format(member.name)) elif r.status == 202: - config = get_global_config(cluster) + config = global_config.from_cluster(cluster) click.echo('Reload request received for member {0} and will be processed within {1} seconds'.format( member.name, config.get('loop_wait') or dcs.loop_wait) ) @@ -1105,7 +1106,7 @@ def restart(cluster_name: str, group: Optional[int], member_names: List[str], content['postgres_version'] = version if scheduled_at: - if get_global_config(cluster).is_paused: + if global_config.from_cluster(cluster).is_paused: raise PatroniCtlException("Can't schedule restart in the paused state") content['schedule'] = scheduled_at.isoformat() @@ -1228,7 +1229,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - global_config = get_global_config(cluster) + config = global_config.from_cluster(cluster) # leader has to be be defined for switchover only if action == 'switchover': @@ -1239,7 +1240,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i if force: leader = cluster.leader.name else: - prompt = 'Standby Leader' if global_config.is_standby_cluster else 'Primary' + prompt = 'Standby Leader' if config.is_standby_cluster else 'Primary' leader = click.prompt(prompt, type=str, default=(cluster.leader and cluster.leader.name)) if cluster.leader.name != leader: @@ -1268,7 +1269,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i if all((not force, action == 'failover', - global_config.is_synchronous_mode, + config.is_synchronous_mode, not cluster.sync.is_empty, not cluster.sync.matches(candidate, True))): if click.confirm(f'Are you sure you want to failover to the asynchronous node {candidate}'): @@ -1285,7 +1286,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i scheduled_at = parse_scheduled(scheduled) if scheduled_at: - if global_config.is_paused: + if config.is_paused: raise PatroniCtlException("Can't schedule switchover in the paused state") scheduled_at_str = scheduled_at.isoformat() @@ -1739,7 +1740,7 @@ def wait_until_pause_is_applied(dcs: AbstractDCS, paused: bool, old_cluster: Clu :param old_cluster: original cluster information before pause or unpause has been requested. Used to report which nodes are still pending to have ``pause`` equal *paused* at a given point in time. """ - config = get_global_config(old_cluster) + config = global_config.from_cluster(old_cluster) click.echo("'{0}' request sent, waiting until it is recognized by all nodes".format(paused and 'pause' or 'resume')) old = {m.name: m.version for m in old_cluster.members if m.api_url} @@ -1777,7 +1778,7 @@ def toggle_pause(cluster_name: str, group: Optional[int], paused: bool, wait: bo """ dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - if get_global_config(cluster).is_paused == paused: + if global_config.from_cluster(cluster).is_paused == paused: raise PatroniCtlException('Cluster is {0} paused'.format(paused and 'already' or 'not')) for member in get_all_members_leader_first(cluster): @@ -2122,7 +2123,7 @@ def edit_config(cluster_name: str, group: Optional[int], force: bool, quiet: boo return if force or click.confirm('Apply these changes?'): - if not dcs.set_config_value(json.dumps(changed_data), cluster.config.version): + if not dcs.set_config_value(json.dumps(changed_data, separators=(',', ':')), cluster.config.version): raise PatroniCtlException("Config modification aborted due to concurrent changes") click.echo("Configuration changed") diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index b2538e1a6..9298e3fce 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -15,6 +15,7 @@ import dateutil.parser +from .. import global_config from ..dynamic_loader import iter_classes, iter_modules from ..exceptions import PatroniFatalException from ..utils import deep_compare, uri @@ -538,24 +539,6 @@ def from_node(version: _Version, value: str, modify_version: Optional[_Version] modify_version = 0 return ClusterConfig(version, data, version if modify_version is None else modify_version) - @property - def permanent_slots(self) -> Dict[str, Any]: - """Dictionary of permanent slots information looked up from :attr:`~ClusterConfig.data`.""" - return (self.data.get('permanent_replication_slots') - or self.data.get('permanent_slots') - or self.data.get('slots') - or {}) - - @property - def ignore_slots_matchers(self) -> List[Dict[str, Any]]: - """The value for ``ignore_slots`` from :attr:`~ClusterConfig.data` if defined or an empty list.""" - return self.data.get('ignore_slots') or [] - - @property - def max_timelines_history(self) -> int: - """The value for ``max_timelines_history`` from :attr:`~ClusterConfig.data` if defined or ``0``.""" - return self.data.get('max_timelines_history', 0) - class SyncState(NamedTuple): """Immutable object (namedtuple) which represents last observed synchronous replication state. @@ -944,7 +927,7 @@ def is_logical_slot(value: Union[Any, Dict[str, Any]]) -> bool: @property def __permanent_slots(self) -> Dict[str, Union[Dict[str, Any], Any]]: """Dictionary of permanent replication slots with their known LSN.""" - ret: Dict[str, Union[Dict[str, Any], Any]] = deepcopy(self.config.permanent_slots if self.config else {}) + ret: Dict[str, Union[Dict[str, Any], Any]] = global_config.permanent_slots members: Dict[str, int] = {slot_name_from_member_name(m.name): m.lsn or 0 for m in self.members} slots: Dict[str, int] = {k: parse_int(v) or 0 for k, v in (self.slots or {}).items()} @@ -973,13 +956,8 @@ def __permanent_logical_slots(self) -> Dict[str, Any]: """Dictionary of permanent ``logical`` replication slots.""" return {name: value for name, value in self.__permanent_slots.items() if self.is_logical_slot(value)} - @property - def use_slots(self) -> bool: - """``True`` if cluster is configured to use replication slots.""" - return bool(self.config and (self.config.data.get('postgresql') or {}).get('use_slots', True)) - def get_replication_slots(self, my_name: str, role: str, nofailover: bool, major_version: int, *, - is_standby_cluster: bool = False, show_error: bool = False) -> Dict[str, Dict[str, Any]]: + show_error: bool = False) -> Dict[str, Dict[str, Any]]: """Lookup configured slot names in the DCS, report issues found and merge with permanent slots. Will log an error if: @@ -990,15 +968,12 @@ def get_replication_slots(self, my_name: str, role: str, nofailover: bool, major :param role: role of this node. :param nofailover: ``True`` if this node is tagged to not be a failover candidate. :param major_version: postgresql major version. - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. :param show_error: if ``True`` report error if any disabled logical slots or conflicting slot names are found. :returns: final dictionary of slot names, after merging with permanent slots and performing sanity checks. """ slots: Dict[str, Dict[str, str]] = self._get_members_slots(my_name, role) - permanent_slots: Dict[str, Any] = self._get_permanent_slots(is_standby_cluster=is_standby_cluster, - role=role, nofailover=nofailover, + permanent_slots: Dict[str, Any] = self._get_permanent_slots(role=role, nofailover=nofailover, major_version=major_version) disabled_permanent_logical_slots: List[str] = self._merge_permanent_slots( @@ -1058,8 +1033,7 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo logger.error("Bad value for slot '%s' in permanent_slots: %s", name, permanent_slots[name]) return disabled_permanent_logical_slots - def _get_permanent_slots(self, *, is_standby_cluster: bool, role: str, - nofailover: bool, major_version: int) -> Dict[str, Any]: + def _get_permanent_slots(self, *, role: str, nofailover: bool, major_version: int) -> Dict[str, Any]: """Get configured permanent replication slots. .. note:: @@ -1071,18 +1045,16 @@ def _get_permanent_slots(self, *, is_standby_cluster: bool, role: str, The returned dictionary for a non-standby cluster always contains permanent logical replication slots in order to show a warning if they are not supported by PostgreSQL before v11. - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. :param role: role of this node -- ``primary``, ``standby_leader`` or ``replica``. :param nofailover: ``True`` if this node is tagged to not be a failover candidate. :param major_version: postgresql major version. :returns: dictionary of permanent slot names mapped to attributes. """ - if not self.use_slots or nofailover: + if not global_config.use_slots or nofailover: return {} - if is_standby_cluster: + if global_config.is_standby_cluster: return self.__permanent_physical_slots \ if major_version >= SLOT_ADVANCE_AVAILABLE_VERSION or role == 'standby_leader' else {} @@ -1108,7 +1080,7 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str :returns: dictionary of physical replication slots that should exist on a given node. """ - if not self.use_slots: + if not global_config.use_slots: return {} # we always want to exclude the member with our name from the list @@ -1132,13 +1104,11 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str for k, v in slot_conflicts.items() if len(v) > 1)) return slots - def has_permanent_slots(self, my_name: str, *, is_standby_cluster: bool = False, nofailover: bool = False, + def has_permanent_slots(self, my_name: str, *, nofailover: bool = False, major_version: int = SLOT_ADVANCE_AVAILABLE_VERSION) -> bool: """Check if the given member node has permanent replication slots configured. :param my_name: name of the member node to check. - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. :param nofailover: ``True`` if this node is tagged to not be a failover candidate. :param major_version: postgresql major version. @@ -1146,20 +1116,16 @@ def has_permanent_slots(self, my_name: str, *, is_standby_cluster: bool = False, """ role = 'replica' members_slots: Dict[str, Dict[str, str]] = self._get_members_slots(my_name, role) - permanent_slots: Dict[str, Any] = self._get_permanent_slots(is_standby_cluster=is_standby_cluster, - role=role, nofailover=nofailover, + permanent_slots: Dict[str, Any] = self._get_permanent_slots(role=role, nofailover=nofailover, major_version=major_version) slots = deepcopy(members_slots) self._merge_permanent_slots(slots, permanent_slots, my_name, major_version) return len(slots) > len(members_slots) or any(self.is_physical_slot(v) for v in permanent_slots.values()) - def filter_permanent_slots(self, slots: Dict[str, int], is_standby_cluster: bool, - major_version: int) -> Dict[str, int]: + def filter_permanent_slots(self, slots: Dict[str, int], major_version: int) -> Dict[str, int]: """Filter out all non-permanent slots from provided *slots* dict. :param slots: slot names with LSN values - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. :param major_version: postgresql major version. :returns: a :class:`dict` object that contains only slots that are known to be permanent. @@ -1167,9 +1133,7 @@ def filter_permanent_slots(self, slots: Dict[str, int], is_standby_cluster: bool if major_version < SLOT_ADVANCE_AVAILABLE_VERSION: return {} # for legacy PostgreSQL we don't support permanent slots on standby nodes - permanent_slots: Dict[str, Any] = self._get_permanent_slots(is_standby_cluster=is_standby_cluster, - role='replica', - nofailover=False, + permanent_slots: Dict[str, Any] = self._get_permanent_slots(role='replica', nofailover=False, major_version=major_version) members_slots = {slot_name_from_member_name(m.name) for m in self.members} @@ -1203,7 +1167,7 @@ def should_enforce_hot_standby_feedback(self, my_name: str, nofailover: bool) -> if self._has_permanent_logical_slots(my_name, nofailover): return True - if self.use_slots: + if global_config.use_slots: members = [m for m in self.members if m.replicatefrom == my_name and m.name != self.leader_name] return any(self.should_enforce_hot_standby_feedback(m.name, m.nofailover) for m in members) return False diff --git a/patroni/global_config.py b/patroni/global_config.py new file mode 100644 index 000000000..7731cf59f --- /dev/null +++ b/patroni/global_config.py @@ -0,0 +1,227 @@ +"""Implements *global_config* facilities. + +The :class:`GlobalConfig` object is instantiated on import and replaces +``patroni.global_config`` module in :data:`sys.modules`, what allows to use +its properties and methods like they were module variables and functions. +""" +import sys +import types + +from copy import deepcopy +from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING + +from .utils import parse_bool, parse_int + +if TYPE_CHECKING: # pragma: no cover + from .dcs import Cluster + + +def __getattr__(mod: types.ModuleType, name: str) -> Any: + """This function exists just to make pyright happy. + + Without it pyright complains about access to unknown members of global_config module. + """ + return getattr(sys.modules[__name__], name) # pragma: no cover + + +class GlobalConfig(types.ModuleType): + """A class that wraps global configuration and provides convenient methods to access/check values.""" + + __file__ = __file__ # just to make unittest and pytest happy + + def __init__(self) -> None: + """Initialize :class:`GlobalConfig` object.""" + super().__init__(__name__) + self.__config = {} + + @staticmethod + def _cluster_has_valid_config(cluster: Optional['Cluster']) -> bool: + """Check if provided *cluster* object has a valid global configuration. + + :param cluster: the currently known cluster state from DCS. + + :returns: ``True`` if provided *cluster* object has a valid global configuration, otherwise ``False``. + """ + return bool(cluster and cluster.config and cluster.config.modify_version) + + def update(self, cluster: Optional['Cluster']) -> None: + """Update with the new global configuration from the :class:`Cluster` object view. + + .. note:: + Global configuration is updated only when configuration in the *cluster* view is valid. + + Update happens in-place and is executed only from the main heartbeat thread. + + :param cluster: the currently known cluster state from DCS. + """ + # Try to protect from the case when DCS was wiped out + if self._cluster_has_valid_config(cluster): + self.__config = cluster.config.data # pyright: ignore [reportOptionalMemberAccess] + + def from_cluster(self, cluster: Optional['Cluster']) -> 'GlobalConfig': + """Return :class:`GlobalConfig` instance from the provided :class:`Cluster` object view. + + .. note:: + If the provided *cluster* object doesn't have a valid global configuration we return + the last known valid state of the :class:`GlobalConfig` object. + + This method is used when we need to have the most up-to-date values in the global configuration, + but we don't want to update the global object. + + :param cluster: the currently known cluster state from DCS. + + :returns: :class:`GlobalConfig` object. + """ + if not self._cluster_has_valid_config(cluster): + return self + + ret = GlobalConfig() + ret.update(cluster) + return ret + + def get(self, name: str) -> Any: + """Gets global configuration value by *name*. + + :param name: parameter name. + + :returns: configuration value or ``None`` if it is missing. + """ + return self.__config.get(name) + + def check_mode(self, mode: str) -> bool: + """Checks whether the certain parameter is enabled. + + :param mode: parameter name, e.g. ``synchronous_mode``, ``failsafe_mode``, ``pause``, ``check_timeline``, and + so on. + + :returns: ``True`` if parameter *mode* is enabled in the global configuration. + """ + return bool(parse_bool(self.__config.get(mode))) + + @property + def is_paused(self) -> bool: + """``True`` if cluster is in maintenance mode.""" + return self.check_mode('pause') + + @property + def is_synchronous_mode(self) -> bool: + """``True`` if synchronous replication is requested and it is not a standby cluster config.""" + return self.check_mode('synchronous_mode') and not self.is_standby_cluster + + @property + def is_synchronous_mode_strict(self) -> bool: + """``True`` if at least one synchronous node is required.""" + return self.check_mode('synchronous_mode_strict') + + def get_standby_cluster_config(self) -> Union[Dict[str, Any], Any]: + """Get ``standby_cluster`` configuration. + + :returns: a copy of ``standby_cluster`` configuration. + """ + return deepcopy(self.get('standby_cluster')) + + @property + def is_standby_cluster(self) -> bool: + """``True`` if global configuration has a valid ``standby_cluster`` section.""" + config = self.get_standby_cluster_config() + return isinstance(config, dict) and\ + bool(config.get('host') or config.get('port') or config.get('restore_command')) + + def get_int(self, name: str, default: int = 0) -> int: + """Gets current value of *name* from the global configuration and try to return it as :class:`int`. + + :param name: name of the parameter. + :param default: default value if *name* is not in the configuration or invalid. + + :returns: currently configured value of *name* from the global configuration or *default* if it is not set or + invalid. + """ + ret = parse_int(self.get(name)) + return default if ret is None else ret + + @property + def min_synchronous_nodes(self) -> int: + """The minimum number of synchronous nodes based on whether ``synchronous_mode_strict`` is enabled or not.""" + return 1 if self.is_synchronous_mode_strict else 0 + + @property + def synchronous_node_count(self) -> int: + """Currently configured value of ``synchronous_node_count`` from the global configuration. + + Assume ``1`` if it is not set or invalid. + """ + return max(self.get_int('synchronous_node_count', 1), self.min_synchronous_nodes) + + @property + def maximum_lag_on_failover(self) -> int: + """Currently configured value of ``maximum_lag_on_failover`` from the global configuration. + + Assume ``1048576`` if it is not set or invalid. + """ + return self.get_int('maximum_lag_on_failover', 1048576) + + @property + def maximum_lag_on_syncnode(self) -> int: + """Currently configured value of ``maximum_lag_on_syncnode`` from the global configuration. + + Assume ``-1`` if it is not set or invalid. + """ + return self.get_int('maximum_lag_on_syncnode', -1) + + @property + def primary_start_timeout(self) -> int: + """Currently configured value of ``primary_start_timeout`` from the global configuration. + + Assume ``300`` if it is not set or invalid. + + .. note:: + ``master_start_timeout`` is still supported to keep backward compatibility. + """ + default = 300 + return self.get_int('primary_start_timeout', default)\ + if 'primary_start_timeout' in self.__config else self.get_int('master_start_timeout', default) + + @property + def primary_stop_timeout(self) -> int: + """Currently configured value of ``primary_stop_timeout`` from the global configuration. + + Assume ``0`` if it is not set or invalid. + + .. note:: + ``master_stop_timeout`` is still supported to keep backward compatibility. + """ + default = 0 + return self.get_int('primary_stop_timeout', default)\ + if 'primary_stop_timeout' in self.__config else self.get_int('master_stop_timeout', default) + + @property + def ignore_slots_matchers(self) -> List[Dict[str, Any]]: + """Currently configured value of ``ignore_slots`` from the global configuration. + + Assume an empty :class:`list` if not set. + """ + return self.get('ignore_slots') or [] + + @property + def max_timelines_history(self) -> int: + """Currently configured value of ``max_timelines_history`` from the global configuration. + + Assume ``0`` if not set or invalid. + """ + return self.get_int('max_timelines_history', 0) + + @property + def use_slots(self) -> bool: + """``True`` if cluster is configured to use replication slots.""" + return bool(parse_bool((self.get('postgresql') or {}).get('use_slots', True))) + + @property + def permanent_slots(self) -> Dict[str, Any]: + """Dictionary of permanent slots information from the global configuration.""" + return deepcopy(self.get('permanent_replication_slots') + or self.get('permanent_slots') + or self.get('slots') + or {}) + + +sys.modules[__name__] = GlobalConfig() diff --git a/patroni/ha.py b/patroni/ha.py index 4a21972aa..12927526f 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -10,7 +10,7 @@ from threading import RLock from typing import Any, Callable, Collection, Dict, List, NamedTuple, Optional, Union, Tuple, TYPE_CHECKING -from . import psycopg +from . import global_config, psycopg from .__main__ import Patroni from .async_executor import AsyncExecutor, CriticalTask from .collections import CaseInsensitiveSet @@ -156,7 +156,6 @@ def __init__(self, patroni: Patroni): self._rewind = Rewind(self.state_handler) self.dcs = patroni.dcs self.cluster = Cluster.empty() - self.global_config = self.patroni.config.get_global_config(None) self.old_cluster = Cluster.empty() self._leader_expiry = 0 self._leader_expiry_lock = RLock() @@ -188,20 +187,20 @@ def __init__(self, patroni: Patroni): def primary_stop_timeout(self) -> Union[int, None]: """:returns: "primary_stop_timeout" from the global configuration or `None` when not in synchronous mode.""" - ret = self.global_config.primary_stop_timeout + ret = global_config.primary_stop_timeout return ret if ret > 0 and self.is_synchronous_mode() else None def is_paused(self) -> bool: """:returns: `True` if in maintenance mode.""" - return self.global_config.is_paused + return global_config.is_paused def check_timeline(self) -> bool: """:returns: `True` if should check whether the timeline is latest during the leader race.""" - return self.global_config.check_mode('check_timeline') + return global_config.check_mode('check_timeline') def is_standby_cluster(self) -> bool: """:returns: `True` if global configuration has a valid "standby_cluster" section.""" - return self.global_config.is_standby_cluster + return global_config.is_standby_cluster def is_leader(self) -> bool: """:returns: `True` if the current node is the leader, based on expiration set when it last held the key.""" @@ -296,7 +295,6 @@ def update_lock(self, update_status: bool = False) -> bool: last_lsn = self.state_handler.last_operation() slots = self.cluster.filter_permanent_slots( {**self.state_handler.slots(), slot_name_from_member_name(self.state_handler.name): last_lsn}, - self.is_standby_cluster(), self.state_handler.major_version) except Exception: logger.exception('Exception when called state_handler.last_operation()') @@ -450,7 +448,7 @@ def bootstrap(self) -> str: return ret or 'trying to bootstrap {0}'.format(msg) # no leader, but configuration may allowed replica creation using backup tools - create_replica_methods = self.global_config.get_standby_cluster_config().get('create_replica_methods', []) \ + create_replica_methods = global_config.get_standby_cluster_config().get('create_replica_methods', []) \ if self.is_standby_cluster() else None can_bootstrap = self.state_handler.can_create_replica_without_replication_connection(create_replica_methods) concurrent_bootstrap = self.cluster.initialize == "" @@ -525,7 +523,7 @@ def recover(self) -> str: :returns: action message, describing what was performed. """ if self.has_lock() and self.update_lock(): - timeout = self.global_config.primary_start_timeout + timeout = global_config.primary_start_timeout if timeout == 0: # We are requested to prefer failing over to restarting primary. But see first if there # is anyone to fail over to. @@ -622,7 +620,7 @@ def _get_node_to_follow(self, cluster: Cluster) -> Union[Leader, Member, None]: for param in params: # It is highly unlikely to happen, but we want to protect from the case node_to_follow.data.pop(param, None) # when above-mentioned params came from outside. if self.is_standby_cluster(): - standby_config = self.global_config.get_standby_cluster_config() + standby_config = global_config.get_standby_cluster_config() node_to_follow.data.update({p: standby_config[p] for p in params if standby_config.get(p)}) return node_to_follow @@ -684,11 +682,11 @@ def follow(self, demote_reason: str, follow_reason: str, refresh: bool = True) - def is_synchronous_mode(self) -> bool: """:returns: `True` if synchronous replication is requested.""" - return self.global_config.is_synchronous_mode + return global_config.is_synchronous_mode def is_failsafe_mode(self) -> bool: """:returns: `True` if failsafe_mode is enabled in global configuration.""" - return self.global_config.check_mode('failsafe_mode') + return global_config.check_mode('failsafe_mode') def process_sync_replication(self) -> None: """Process synchronous standby beahvior. @@ -732,7 +730,7 @@ def process_sync_replication(self) -> None: return logger.info('Synchronous replication key updated by someone else.') # When strict mode and no suitable replication connections put "*" to synchronous_standby_names - if self.global_config.is_synchronous_mode_strict and not picked: + if global_config.is_synchronous_mode_strict and not picked: picked = CaseInsensitiveSet('*') logger.warning("No standbys available!") @@ -805,7 +803,7 @@ def update_cluster_history(self) -> None: cluster_history_dict: Dict[int, List[Any]] = {line[0]: list(line) for line in cluster_history} history: List[List[Any]] = list(map(list, self.state_handler.get_history(primary_timeline))) if self.cluster.config: - history = history[-self.cluster.config.max_timelines_history:] + history = history[-global_config.max_timelines_history:] for line in history: # enrich current history with promotion timestamps stored in DCS cluster_history_line = cluster_history_dict.get(line[0], []) @@ -863,7 +861,7 @@ def enforce_primary_role(self, message: str, promote_message: str) -> str: # promotion until next cycle. TODO: trigger immediate retry of run_cycle return 'Postponing promotion because synchronous replication state was updated by somebody else' self.state_handler.sync_handler.set_synchronous_standby_names( - CaseInsensitiveSet('*') if self.global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) + CaseInsensitiveSet('*') if global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) if self.state_handler.role not in ('master', 'promoted', 'primary'): # reset failsafe state when promote self._failsafe.set_is_active(0) @@ -974,7 +972,7 @@ def is_lagging(self, wal_position: int) -> bool: :returns True when node is lagging """ lag = (self.cluster.last_lsn or 0) - wal_position - return lag > self.global_config.maximum_lag_on_failover + return lag > global_config.maximum_lag_on_failover def _is_healthiest_node(self, members: Collection[Member], check_replication_lag: bool = True) -> bool: """This method tries to determine whether I am healthy enough to became a new leader candidate or not.""" @@ -1541,7 +1539,7 @@ def restart(self, restart_data: Dict[str, Any], run_async: bool = False) -> Tupl # Now that restart is scheduled we can set timeout for startup, it will get reset # once async executor runs and main loop notices PostgreSQL as up. - timeout = restart_data.get('timeout', self.global_config.primary_start_timeout) + timeout = restart_data.get('timeout', global_config.primary_start_timeout) self.set_start_timeout(timeout) def before_shutdown() -> None: @@ -1605,7 +1603,7 @@ def handle_long_action_in_progress(self) -> str: """Figure out what to do with the task AsyncExecutor is performing.""" if self.has_lock() and self.update_lock(): if self._async_executor.scheduled_action == 'doing crash recovery in a single user mode': - time_left = self.global_config.primary_start_timeout - (time.time() - self._crash_recovery_started) + time_left = global_config.primary_start_timeout - (time.time() - self._crash_recovery_started) if time_left <= 0 and self.is_failover_possible(): logger.info("Demoting self because crash recovery is taking too long") self.state_handler.cancellable.cancel(True) @@ -1690,7 +1688,7 @@ def post_bootstrap(self) -> str: self.set_is_leader(True) if self.is_synchronous_mode(): self.state_handler.sync_handler.set_synchronous_standby_names( - CaseInsensitiveSet('*') if self.global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) + CaseInsensitiveSet('*') if global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) self.state_handler.call_nowait(CallbackAction.ON_START) self.load_cluster_from_dcs() @@ -1713,7 +1711,7 @@ def handle_starting_instance(self) -> Optional[str]: self.demote('immediate-nolock') return 'stopped PostgreSQL while starting up because leader key was lost' - timeout = self._start_timeout or self.global_config.primary_start_timeout + timeout = self._start_timeout or global_config.primary_start_timeout time_left = timeout - self.state_handler.time_in_state() if time_left <= 0: @@ -1746,8 +1744,8 @@ def _run_cycle(self) -> str: try: try: self.load_cluster_from_dcs() - self.global_config = self.patroni.config.get_global_config(self.cluster) - self.state_handler.reset_cluster_info_state(self.cluster, self.patroni.nofailover, self.global_config) + global_config.update(self.cluster) + self.state_handler.reset_cluster_info_state(self.cluster, self.patroni.nofailover) except Exception: self.state_handler.reset_cluster_info_state(None) raise @@ -1767,10 +1765,10 @@ def _run_cycle(self) -> str: self.touch_member() # cluster has leader key but not initialize key - if not (self.cluster.is_unlocked() or self.sysid_valid(self.cluster.initialize)) and self.has_lock(): + if self.has_lock(False) and not self.sysid_valid(self.cluster.initialize): self.dcs.initialize(create_new=(self.cluster.initialize is None), sysid=self.state_handler.sysid) - if not (self.cluster.is_unlocked() or self.cluster.config and self.cluster.config.data) and self.has_lock(): + if self.has_lock(False) and not (self.cluster.config and self.cluster.config.data): self.dcs.set_config_value(json.dumps(self.patroni.config.dynamic_configuration, separators=(',', ':'))) self.cluster = self.dcs.get_cluster() @@ -2047,7 +2045,7 @@ def get_remote_member(self, member: Union[Leader, Member, None] = None) -> Remot config or cluster.config.data. """ data: Dict[str, Any] = {} - cluster_params = self.global_config.get_standby_cluster_config() + cluster_params = global_config.get_standby_cluster_config() if cluster_params: data.update({k: v for k, v in cluster_params.items() if k in RemoteMember.ALLOWED_KEYS}) diff --git a/patroni/postgresql/__init__.py b/patroni/postgresql/__init__.py index fdfc26c1c..c1027dad0 100644 --- a/patroni/postgresql/__init__.py +++ b/patroni/postgresql/__init__.py @@ -24,7 +24,7 @@ from .postmaster import PostmasterProcess from .slots import SlotsHandler from .sync import SyncHandler -from .. import psycopg +from .. import global_config, psycopg from ..async_executor import CriticalTask from ..collections import CaseInsensitiveSet from ..dcs import Cluster, Leader, Member, SLOT_ADVANCE_AVAILABLE_VERSION @@ -34,7 +34,6 @@ if TYPE_CHECKING: # pragma: no cover from psycopg import Connection as Connection3, Cursor from psycopg2 import connection as connection3, cursor - from ..config import GlobalConfig logger = logging.getLogger(__name__) @@ -73,7 +72,6 @@ def __init__(self, config: Dict[str, Any]) -> None: self.connection_string: str self.proxy_url: Optional[str] self._major_version = self.get_major_version() - self._global_config = None self._state_lock = Lock() self.set_state('stopped') @@ -217,7 +215,7 @@ def cluster_info_query(self) -> str: "FROM pg_catalog.pg_stat_get_wal_senders() w," " pg_catalog.pg_stat_get_activity(w.pid)" " WHERE w.state = 'streaming') r)").format(self.wal_name, self.lsn_name) - if (not self.global_config or self.global_config.is_synchronous_mode) + if global_config.is_synchronous_mode and self.role in ('master', 'primary', 'promoted') else "'on', '', NULL") if self._major_version >= 90600: @@ -426,12 +424,7 @@ def set_enforce_hot_standby_feedback(self, value: bool) -> None: self.config.write_postgresql_conf() self.reload() - @property - def global_config(self) -> Optional['GlobalConfig']: - return self._global_config - - def reset_cluster_info_state(self, cluster: Union[Cluster, None], nofailover: bool = False, - global_config: Optional['GlobalConfig'] = None) -> None: + def reset_cluster_info_state(self, cluster: Union[Cluster, None], nofailover: bool = False) -> None: """Reset monitoring query cache. It happens in the beginning of heart-beat loop and on change of `synchronous_standby_names`. @@ -440,30 +433,22 @@ def reset_cluster_info_state(self, cluster: Union[Cluster, None], nofailover: bo :param nofailover: whether this node could become a new primary. Important when there are logical permanent replication slots because "nofailover" node could do cascading replication and should enable `hot_standby_feedback` - :param global_config: last known :class:`GlobalConfig` object """ self._cluster_info_state = {} - if global_config: - self._global_config = global_config - - if not self._global_config: - return - - if self._global_config.is_standby_cluster: + if global_config.is_standby_cluster: # Standby cluster can't have logical replication slots, and we don't need to enforce hot_standby_feedback self.set_enforce_hot_standby_feedback(False) if cluster and cluster.config and cluster.config.modify_version: # We want to enable hot_standby_feedback if the replica is supposed # to have a logical slot or in case if it is the cascading replica. - self.set_enforce_hot_standby_feedback(not self._global_config.is_standby_cluster and self.can_advance_slots + self.set_enforce_hot_standby_feedback(not global_config.is_standby_cluster and self.can_advance_slots and cluster.should_enforce_hot_standby_feedback(self.name, nofailover)) self._has_permanent_slots = cluster.has_permanent_slots( my_name=self.name, - is_standby_cluster=self._global_config.is_standby_cluster, nofailover=nofailover, major_version=self.major_version) diff --git a/patroni/postgresql/config.py b/patroni/postgresql/config.py index b15f7f234..271bbdfec 100644 --- a/patroni/postgresql/config.py +++ b/patroni/postgresql/config.py @@ -12,6 +12,7 @@ from typing import Any, Collection, Dict, Iterator, List, Optional, Union, Tuple, Type, TYPE_CHECKING from .validator import recovery_parameters, transform_postgresql_parameter_value, transform_recovery_parameter_value +from .. import global_config from ..collections import CaseInsensitiveDict, CaseInsensitiveSet from ..dcs import Leader, Member, RemoteMember, slot_name_from_member_name from ..exceptions import PatroniFatalException, PostgresConnectionException @@ -595,7 +596,7 @@ def build_recovery_params(self, member: Union[Leader, Member, None]) -> CaseInse is_remote_member = isinstance(member, RemoteMember) primary_conninfo = self.primary_conninfo_params(member) if primary_conninfo: - use_slots = self.get('use_slots', True) and self._postgresql.major_version >= 90400 + use_slots = global_config.use_slots and self._postgresql.major_version >= 90400 if use_slots and not (is_remote_member and member.no_replication_slot): primary_slot_name = member.primary_slot_name if is_remote_member else self._postgresql.name recovery_params['primary_slot_name'] = slot_name_from_member_name(primary_slot_name) @@ -930,10 +931,10 @@ def get_server_parameters(self, config: Dict[str, Any]) -> CaseInsensitiveDict: parameters = config['parameters'].copy() listen_addresses, port = split_host_port(config['listen'], 5432) parameters.update(cluster_name=self._postgresql.scope, listen_addresses=listen_addresses, port=str(port)) - if not self._postgresql.global_config or self._postgresql.global_config.is_synchronous_mode: + if global_config.is_synchronous_mode: synchronous_standby_names = self._server_parameters.get('synchronous_standby_names') if synchronous_standby_names is None: - if self._postgresql.global_config and self._postgresql.global_config.is_synchronous_mode_strict\ + if global_config.is_synchronous_mode_strict\ and self._postgresql.role in ('master', 'primary', 'promoted'): parameters['synchronous_standby_names'] = '*' else: diff --git a/patroni/postgresql/slots.py b/patroni/postgresql/slots.py index 48b275e49..66bb1f008 100644 --- a/patroni/postgresql/slots.py +++ b/patroni/postgresql/slots.py @@ -13,6 +13,7 @@ from .connection import get_connection_cursor from .misc import format_lsn, fsync_dir +from .. import global_config from ..dcs import Cluster, Leader from ..file_perm import pg_perm from ..psycopg import OperationalError @@ -293,7 +294,7 @@ def ignore_replication_slot(self, cluster: Cluster, name: str) -> bool: """ slot = self._replication_slots[name] if cluster.config: - for matcher in cluster.config.ignore_slots_matchers: + for matcher in global_config.ignore_slots_matchers: if ( (matcher.get("name") is None or matcher["name"] == name) and all(not matcher.get(a) or matcher[a] == slot.get(a) @@ -510,13 +511,12 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, :returns: list of logical replication slots names that should be copied from the primary. """ ret = [] - if self._postgresql.major_version >= 90400 and self._postgresql.global_config and cluster.config: + if self._postgresql.major_version >= 90400 and cluster.config: try: self.load_replication_slots() - slots = cluster.get_replication_slots( - self._postgresql.name, self._postgresql.role, nofailover, self._postgresql.major_version, - is_standby_cluster=self._postgresql.global_config.is_standby_cluster, show_error=True) + slots = cluster.get_replication_slots(self._postgresql.name, self._postgresql.role, + nofailover, self._postgresql.major_version, show_error=True) self._drop_incorrect_slots(cluster, slots, paused) diff --git a/patroni/postgresql/sync.py b/patroni/postgresql/sync.py index 9cff04e02..577422b5d 100644 --- a/patroni/postgresql/sync.py +++ b/patroni/postgresql/sync.py @@ -5,6 +5,7 @@ from copy import deepcopy from typing import Collection, List, NamedTuple, Tuple, TYPE_CHECKING +from .. import global_config from ..collections import CaseInsensitiveDict, CaseInsensitiveSet from ..dcs import Cluster from ..psycopg import quote_ident as _quote_ident @@ -303,11 +304,8 @@ def current_state(self, cluster: Cluster) -> Tuple[CaseInsensitiveSet, CaseInsen replica_list = _ReplicaList(self._postgresql, cluster) self._process_replica_readiness(cluster, replica_list) - if TYPE_CHECKING: # pragma: no cover - assert self._postgresql.global_config is not None - sync_node_count = self._postgresql.global_config.synchronous_node_count\ - if self._postgresql.supports_multiple_sync else 1 - sync_node_maxlag = self._postgresql.global_config.maximum_lag_on_syncnode + sync_node_count = global_config.synchronous_node_count if self._postgresql.supports_multiple_sync else 1 + sync_node_maxlag = global_config.maximum_lag_on_syncnode candidates = CaseInsensitiveSet() sync_nodes = CaseInsensitiveSet() diff --git a/patroni/utils.py b/patroni/utils.py index 6957369fd..23f419e5c 100644 --- a/patroni/utils.py +++ b/patroni/utils.py @@ -33,7 +33,6 @@ if TYPE_CHECKING: # pragma: no cover from .dcs import Cluster - from .config import GlobalConfig tzutc = tz.tzutc() @@ -759,12 +758,10 @@ def iter_response_objects(response: HTTPResponse) -> Iterator[Dict[str, Any]]: prev = chunk[idx:] -def cluster_as_json(cluster: 'Cluster', global_config: Optional['GlobalConfig'] = None) -> Dict[str, Any]: +def cluster_as_json(cluster: 'Cluster') -> Dict[str, Any]: """Get a JSON representation of *cluster*. :param cluster: the :class:`~patroni.dcs.Cluster` object to be parsed as JSON. - :param global_config: optional :class:`~patroni.config.GlobalConfig` object to check the cluster state. - if not provided will be instantiated from the `Cluster.config`. :returns: JSON representation of *cluster*. @@ -793,16 +790,16 @@ def cluster_as_json(cluster: 'Cluster', global_config: Optional['GlobalConfig'] * ``from``: name of the member to be demoted; * ``to``: name of the member to be promoted. """ - if not global_config: - from patroni.config import get_global_config - global_config = get_global_config(cluster) + from . import global_config + + config = global_config.from_cluster(cluster) leader_name = cluster.leader.name if cluster.leader else None cluster_lsn = cluster.last_lsn or 0 ret: Dict[str, Any] = {'members': []} for m in cluster.members: if m.name == leader_name: - role = 'standby_leader' if global_config.is_standby_cluster else 'leader' + role = 'standby_leader' if config.is_standby_cluster else 'leader' elif cluster.sync.matches(m.name): role = 'sync_standby' else: @@ -832,7 +829,7 @@ def cluster_as_json(cluster: 'Cluster', global_config: Optional['GlobalConfig'] # sort members by name for consistency cmp: Callable[[Dict[str, Any]], bool] = lambda m: m['name'] ret['members'].sort(key=cmp) - if global_config.is_paused: + if config.is_paused: ret['pause'] = True if cluster.failover and cluster.failover.scheduled_at: ret['scheduled_switchover'] = {'at': cluster.failover.scheduled_at.isoformat()} diff --git a/tests/test_api.py b/tests/test_api.py index 71c566cd4..234a6824f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -8,8 +8,8 @@ from mock import Mock, PropertyMock, patch from socketserver import ThreadingMixIn +from patroni import global_config from patroni.api import RestApiHandler, RestApiServer -from patroni.config import GlobalConfig from patroni.dcs import ClusterConfig, Member from patroni.exceptions import PostgresConnectionException from patroni.ha import _MemberStatus @@ -148,16 +148,9 @@ class MockLogger(object): records_lost = 1 -class MockConfig(object): - - def get_global_config(self, _): - return GlobalConfig({}) - - class MockPatroni(object): ha = MockHa() - config = MockConfig() postgresql = ha.state_handler dcs = Mock() logger = MockLogger() @@ -211,7 +204,7 @@ class TestRestApiHandler(unittest.TestCase): def test_do_GET(self): MockPatroni.dcs.cluster.last_lsn = 20 MockPatroni.dcs.cluster.sync.members = [MockPostgresql.name] - with patch.object(GlobalConfig, 'is_synchronous_mode', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_synchronous_mode', PropertyMock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /replica') MockRestApiServer(RestApiHandler, 'GET /replica?lag=1M') MockRestApiServer(RestApiHandler, 'GET /replica?lag=10MB') @@ -234,7 +227,7 @@ def test_do_GET(self): with patch.object(MockHa, 'is_leader', Mock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /replica') MockRestApiServer(RestApiHandler, 'GET /read-only-sync') - with patch.object(GlobalConfig, 'is_standby_cluster', Mock(return_value=True)): + with patch.object(global_config.__class__, 'is_standby_cluster', Mock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /standby_leader') MockPatroni.dcs.cluster = None with patch.object(RestApiHandler, 'get_postgresql_status', Mock(return_value={'role': 'primary'})): @@ -244,8 +237,8 @@ def test_do_GET(self): self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /primary')) with patch.object(RestApiServer, 'query', Mock(return_value=[('', 1, '', '', '', '', False, None, None, '')])): self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /patroni')) - with patch.object(GlobalConfig, 'is_standby_cluster', Mock(return_value=True)), \ - patch.object(GlobalConfig, 'is_paused', Mock(return_value=True)): + with patch.object(global_config.__class__, 'is_standby_cluster', Mock(return_value=True)), \ + patch.object(global_config.__class__, 'is_paused', Mock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /standby_leader') # test tags @@ -475,7 +468,7 @@ def make_request(request=None, **kwargs): request = make_request(role='primary', postgres_version='9.5.2') MockRestApiServer(RestApiHandler, request) - with patch.object(GlobalConfig, 'is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): MockRestApiServer(RestApiHandler, make_request(schedule='2016-08-42 12:45TZ+1', role='primary')) # Valid timeout MockRestApiServer(RestApiHandler, make_request(timeout='60s')) @@ -537,7 +530,7 @@ def test_do_POST_switchover(self, dcs): # Switchover in pause mode with patch.object(RestApiHandler, 'write_response') as response_mock, \ - patch.object(GlobalConfig, 'is_paused', PropertyMock(return_value=True)): + patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with( 400, 'Switchover is possible only to a specific candidate in a paused state') @@ -546,7 +539,8 @@ def test_do_POST_switchover(self, dcs): for is_synchronous_mode, response in ( (True, 'switchover is not possible: can not find sync_standby'), (False, 'switchover is not possible: cluster does not have members except leader')): - with patch.object(GlobalConfig, 'is_synchronous_mode', PropertyMock(return_value=is_synchronous_mode)), \ + with patch.object(global_config.__class__, 'is_synchronous_mode', + PropertyMock(return_value=is_synchronous_mode)), \ patch.object(RestApiHandler, 'write_response') as response_mock: MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with(412, response) @@ -571,7 +565,8 @@ def test_do_POST_switchover(self, dcs): cluster.sync.matches.return_value = False for is_synchronous_mode, response in ( (True, 'candidate name does not match with sync_standby'), (False, 'candidate does not exists')): - with patch.object(GlobalConfig, 'is_synchronous_mode', PropertyMock(return_value=is_synchronous_mode)), \ + with patch.object(global_config.__class__, 'is_synchronous_mode', + PropertyMock(return_value=is_synchronous_mode)), \ patch.object(RestApiHandler, 'write_response') as response_mock: MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with(412, response) @@ -632,7 +627,7 @@ def test_do_POST_switchover(self, dcs): # Schedule in paused mode with patch.object(RestApiHandler, 'write_response') as response_mock, \ - patch.object(GlobalConfig, 'is_paused', PropertyMock(return_value=True)): + patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): dcs.manual_failover.return_value = False MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with(400, "Can't schedule switchover in the paused state") diff --git a/tests/test_config.py b/tests/test_config.py index dfb3b6e34..7bf01f564 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -5,7 +5,11 @@ from copy import deepcopy from mock import MagicMock, Mock, patch -from patroni.config import Config, ConfigParseError, GlobalConfig + +from patroni import global_config +from patroni.config import ClusterConfig, Config, ConfigParseError + +from .test_ha import get_cluster_initialized_with_only_leader class TestConfig(unittest.TestCase): @@ -248,4 +252,6 @@ def test__validate_and_adjust_timeouts(self): def test_global_config_is_synchronous_mode(self): # we should ignore synchronous_mode setting in a standby cluster config = {'standby_cluster': {'host': 'some_host'}, 'synchronous_mode': True} - self.assertFalse(GlobalConfig(config).is_synchronous_mode) + cluster = get_cluster_initialized_with_only_leader(cluster_config=ClusterConfig(1, config, 1)) + test_config = global_config.from_cluster(cluster) + self.assertFalse(test_config.is_synchronous_mode) diff --git a/tests/test_ctl.py b/tests/test_ctl.py index 96c36c16a..a174b03d9 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -7,6 +7,7 @@ from click.testing import CliRunner from datetime import datetime, timedelta from mock import patch, Mock, PropertyMock +from patroni import global_config from patroni.ctl import ctl, load_config, output_members, get_dcs, parse_dcs, \ get_all_members, get_any_member, get_cursor, query_member, PatroniCtlException, apply_config_changes, \ format_config_for_editing, show_diff, invoke_editor, format_pg_version, CONFIG_FILE_PATH, PatronictlPrettyTable @@ -147,7 +148,7 @@ def test_switchover(self): self.assertEqual(result.exit_code, 0) # Scheduled in pause mode - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0', '--force', '--scheduled', '2015-01-01T12:00:00']) self.assertEqual(result.exit_code, 1) @@ -369,7 +370,7 @@ def test_restart_reinit(self, mock_post): result = self.runner.invoke(ctl, ['restart', 'alpha', 'other', '--force', '--scheduled', '2300-10-01T14:30']) assert 'Failed: flush scheduled restart' in result.output - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['restart', 'alpha', 'other', '--force', '--scheduled', '2300-10-01T14:30']) assert result.exit_code == 1 @@ -533,7 +534,7 @@ def test_pause_cluster(self): result = self.runner.invoke(ctl, ['pause', 'dummy']) assert 'Failed' in result.output - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['pause', 'dummy']) assert 'Cluster is already paused' in result.output @@ -552,11 +553,11 @@ def test_pause_cluster(self): @patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) def test_resume_cluster(self, mock_post): mock_post.return_value.status = 200 - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=False)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=False)): result = self.runner.invoke(ctl, ['resume', 'dummy']) assert 'Cluster is not paused' in result.output - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['resume', 'dummy']) assert 'Success' in result.output diff --git a/tests/test_ha.py b/tests/test_ha.py index e197d28bc..400630184 100644 --- a/tests/test_ha.py +++ b/tests/test_ha.py @@ -4,6 +4,7 @@ import sys from mock import Mock, MagicMock, PropertyMock, patch, mock_open +from patroni import global_config from patroni.collections import CaseInsensitiveSet from patroni.config import Config from patroni.dcs import Cluster, ClusterConfig, Failover, Leader, Member, get_dcs, Status, SyncState, TimelineHistory @@ -217,6 +218,7 @@ def setUp(self): self.ha = Ha(MockPatroni(self.p, self.e)) self.ha.old_cluster = self.e.get_cluster() self.ha.cluster = get_cluster_initialized_without_leader() + global_config.update(self.ha.cluster) self.ha.load_cluster_from_dcs = Mock() def test_update_lock(self): @@ -251,8 +253,10 @@ def test_start_as_replica(self): @patch('patroni.dcs.etcd.Etcd.initialize', return_value=True) def test_bootstrap_as_standby_leader(self, initialize): self.p.data_directory_empty = true + self.ha.cluster = get_cluster_not_initialized_without_leader( + cluster_config=ClusterConfig(1, {"standby_cluster": {"port": 5432}}, 1)) + global_config.update(self.ha.cluster) self.ha.cluster = get_cluster_not_initialized_without_leader(cluster_config=ClusterConfig(0, {}, 0)) - self.ha.patroni.config._dynamic_configuration = {"standby_cluster": {"port": 5432}} self.assertEqual(self.ha.run_cycle(), 'trying to bootstrap a new standby leader') def test_bootstrap_waiting_for_standby_leader(self): @@ -318,7 +322,7 @@ def test_crash_recovery(self): self.ha.state_handler.cancellable._process = Mock() self.ha._crash_recovery_started -= 600 self.ha.cluster.config.data.update({'maximum_lag_on_failover': 10}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertEqual(self.ha.run_cycle(), 'terminated crash recovery because of startup timeout') @patch.object(Rewind, 'ensure_clean_shutdown', Mock()) @@ -509,7 +513,7 @@ def test_no_dcs_connection_primary_demote(self): def test_check_failsafe_topology(self): self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly')) self.ha.cluster = get_cluster_initialized_with_leader_and_failsafe() - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.dcs._last_failsafe = self.ha.cluster.failsafe self.assertEqual(self.ha.run_cycle(), 'demoting self because DCS is not accessible and I was a leader') self.ha.state_handler.name = self.ha.cluster.leader.name @@ -529,7 +533,7 @@ def test_check_failsafe_topology(self): def test_no_dcs_connection_primary_failsafe(self): self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly')) self.ha.cluster = get_cluster_initialized_with_leader_and_failsafe() - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.dcs._last_failsafe = self.ha.cluster.failsafe self.ha.state_handler.name = self.ha.cluster.leader.name self.assertEqual(self.ha.run_cycle(), @@ -546,7 +550,7 @@ def test_readonly_dcs_primary_failsafe(self): def test_no_dcs_connection_replica_failsafe(self): self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly')) self.ha.cluster = get_cluster_initialized_with_leader_and_failsafe() - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.update_failsafe({'name': 'leader', 'api_url': 'http://127.0.0.1:8008/patroni', 'conn_url': 'postgres://127.0.0.1:5432/postgres', 'slots': {'foo': 1000}}) self.p.is_primary = false @@ -766,7 +770,7 @@ def test_manual_switchover_from_leader(self): with patch('patroni.ha.logger.info') as mock_info: self.ha.fetch_node_status = get_node_status(wal_position=1) self.ha.cluster.config.data.update({'maximum_lag_on_failover': 5}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0), the leader with the lock') self.assertEqual(mock_info.call_args_list[0][0], ('Member %s exceeds maximum replication lag', 'leader')) @@ -1032,7 +1036,7 @@ def test_is_healthiest_node(self): def test__is_healthiest_node(self): self.p.is_primary = false self.ha.cluster = get_cluster_initialized_without_leader(sync=('postgresql1', self.p.name)) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members)) self.ha.fetch_node_status = get_node_status() # accessible, in_recovery self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members)) @@ -1049,7 +1053,7 @@ def test__is_healthiest_node(self): with patch.object(Ha, 'is_synchronous_mode', Mock(return_value=True)): self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members)) self.ha.cluster.config.data.update({'maximum_lag_on_failover': 5}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) with patch('patroni.postgresql.Postgresql.last_operation', return_value=1): self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members)) with patch('patroni.postgresql.Postgresql.replica_cached_timeline', return_value=None): @@ -1272,7 +1276,7 @@ def test_failover_immediately_on_zero_primary_start_timeout(self, demote): self.p.is_running = false self.ha.cluster = get_cluster_initialized_with_leader(sync=(self.p.name, 'other')) self.ha.cluster.config.data.update({'synchronous_mode': True, 'primary_start_timeout': 0}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.has_lock = true self.ha.update_lock = true self.ha.fetch_node_status = get_node_status() # accessible, in_recovery @@ -1282,13 +1286,13 @@ def test_failover_immediately_on_zero_primary_start_timeout(self, demote): def test_primary_stop_timeout(self): self.assertEqual(self.ha.primary_stop_timeout(), None) self.ha.cluster.config.data.update({'primary_stop_timeout': 30}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) with patch.object(Ha, 'is_synchronous_mode', Mock(return_value=True)): self.assertEqual(self.ha.primary_stop_timeout(), 30) with patch.object(Ha, 'is_synchronous_mode', Mock(return_value=False)): self.assertEqual(self.ha.primary_stop_timeout(), None) self.ha.cluster.config.data['primary_stop_timeout'] = None - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertEqual(self.ha.primary_stop_timeout(), None) @patch('patroni.postgresql.Postgresql.follow') @@ -1380,8 +1384,9 @@ def test_process_sync_replication(self): # Test sync set to '*' when synchronous_mode_strict is enabled mock_set_sync.reset_mock() self.p.sync_handler.current_state = Mock(return_value=(CaseInsensitiveSet(), CaseInsensitiveSet())) - with patch('patroni.config.GlobalConfig.is_synchronous_mode_strict', PropertyMock(return_value=True)): - self.ha.run_cycle() + self.ha.cluster.config.data['synchronous_mode_strict'] = True + global_config.update(self.ha.cluster) + self.ha.run_cycle() mock_set_sync.assert_called_once_with(CaseInsensitiveSet('*')) def test_sync_replication_become_primary(self): @@ -1514,7 +1519,6 @@ def test_effective_tags(self): @patch('patroni.postgresql.mtime', Mock(return_value=1588316884)) @patch('builtins.open', Mock(side_effect=Exception)) - @patch.object(Cluster, 'is_unlocked', Mock(return_value=False)) def test_restore_cluster_config(self): self.ha.cluster.config.data.clear() self.ha.has_lock = true diff --git a/tests/test_patroni.py b/tests/test_patroni.py index bf9e28712..8e08d4069 100644 --- a/tests/test_patroni.py +++ b/tests/test_patroni.py @@ -154,6 +154,7 @@ def test_run(self): self.p.api.start = Mock() self.p.logger.start = Mock() self.p.config._dynamic_configuration = {} + self.assertRaises(SleepException, self.p.run) with patch('patroni.dcs.Cluster.is_unlocked', Mock(return_value=True)): self.assertRaises(SleepException, self.p.run) with patch('patroni.config.Config.reload_local_configuration', Mock(return_value=False)): diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py index cdd0c160b..31454479e 100644 --- a/tests/test_postgresql.py +++ b/tests/test_postgresql.py @@ -9,9 +9,9 @@ import patroni.psycopg as psycopg +from patroni import global_config from patroni.async_executor import CriticalTask from patroni.collections import CaseInsensitiveSet -from patroni.config import GlobalConfig from patroni.dcs import RemoteMember from patroni.exceptions import PostgresConnectionException, PatroniException from patroni.postgresql import Postgresql, STATE_REJECT, STATE_NO_RESPONSE @@ -692,12 +692,12 @@ def time_in_state(*args): def test_get_server_parameters(self): config = {'parameters': {'wal_level': 'hot_standby', 'max_prepared_transactions': 100}, 'listen': '0'} - self.p._global_config = GlobalConfig({'synchronous_mode': True}) - self.p.config.get_server_parameters(config) - self.p._global_config = GlobalConfig({'synchronous_mode': True, 'synchronous_mode_strict': True}) - self.p.config.get_server_parameters(config) - self.p.config.set_synchronous_standby_names('foo') - self.assertTrue(str(self.p.config.get_server_parameters(config)).startswith(' Date: Fri, 24 Nov 2023 22:17:20 +0800 Subject: [PATCH 11/33] fix typo and add gitignore entries (#2959) Split unrelated changes from #2940 Signed-off-by: Zhao Junwang --- .gitignore | 3 ++- patroni/dcs/__init__.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c902c6eb3..07e227ee3 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ lib64 pip-log.txt # Unit test / coverage reports -.coverage +.coverage* .tox nosetests.xml coverage.xml @@ -35,6 +35,7 @@ htmlcov junit.xml features/output* dummy +result.json # Translations *.mo diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index 9298e3fce..4a9f998f4 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -557,7 +557,7 @@ def from_node(version: Optional[_Version], value: Union[str, Dict[str, Any], Non """Factory method to parse *value* as synchronisation state information. :param version: optional *version* number for the object. - :param value: (optionally JSON serialised) sychronisation state information + :param value: (optionally JSON serialised) synchronisation state information :returns: constructed :class:`SyncState` object. From bb804074f7a1d625c2776cb91a829df187795059 Mon Sep 17 00:00:00 2001 From: zhjwpku Date: Mon, 27 Nov 2023 15:28:46 +0800 Subject: [PATCH 12/33] [doc]: fix typos (#2961) --- patroni/ctl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/patroni/ctl.py b/patroni/ctl.py index 13959896f..9345d18e4 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -432,9 +432,9 @@ def print_output(columns: Optional[List[str]], rows: List[List[Any]], alignment: def watching(w: bool, watch: Optional[int], max_count: Optional[int] = None, clear: bool = True) -> Iterator[int]: - """Yield a value every ``x`` seconds. + """Yield a value every ``watch`` seconds. - Used to run a command with a watch-based aproach. + Used to run a command with a watch-based approach. :param w: if ``True`` and *watch* is ``None``, then *watch* assumes the value ``2``. :param watch: amount of seconds to wait before yielding another value. From 36e3dfbe41cc636ca384e69e6541db6e587ed28e Mon Sep 17 00:00:00 2001 From: Konstantin Demin Date: Mon, 27 Nov 2023 11:38:03 +0300 Subject: [PATCH 13/33] update Dockerfiles (#2937) - better cleanup for vim - introduce dumb-init for patroni containers --- Dockerfile | 6 +++--- Dockerfile.citus | 6 +++--- docker/entrypoint.sh | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3e638518d..b74cdf240 100644 --- a/Dockerfile +++ b/Dockerfile @@ -94,9 +94,9 @@ RUN set -ex \ /usr/share/locale/??_?? \ /usr/share/postgresql/*/man \ /usr/share/postgresql-common/pg_wrapper \ - /usr/share/vim/vim80/doc \ - /usr/share/vim/vim80/lang \ - /usr/share/vim/vim80/tutor \ + /usr/share/vim/vim*/doc \ + /usr/share/vim/vim*/lang \ + /usr/share/vim/vim*/tutor \ # /var/lib/dpkg/info/* \ && find /usr/bin -xtype l -delete \ && find /var/log -type f -exec truncate --size 0 {} \; \ diff --git a/Dockerfile.citus b/Dockerfile.citus index f52a36e7f..5f0164b4b 100644 --- a/Dockerfile.citus +++ b/Dockerfile.citus @@ -113,9 +113,9 @@ RUN set -ex \ /usr/share/locale/??_?? \ /usr/share/postgresql/*/man \ /usr/share/postgresql-common/pg_wrapper \ - /usr/share/vim/vim80/doc \ - /usr/share/vim/vim80/lang \ - /usr/share/vim/vim80/tutor \ + /usr/share/vim/vim*/doc \ + /usr/share/vim/vim*/lang \ + /usr/share/vim/vim*/tutor \ # /var/lib/dpkg/info/* \ && find /usr/bin -xtype l -delete \ && find /var/log -type f -exec truncate --size 0 {} \; \ diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index fb30bee71..1e6e91b54 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -13,6 +13,8 @@ readonly PATRONI_NAMESPACE="${PATRONI_NAMESPACE%/}" DOCKER_IP=$(hostname --ip-address) readonly DOCKER_IP +export DUMB_INIT_SETSID=0 + case "$1" in haproxy) haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D @@ -72,4 +74,4 @@ export PATRONI_SUPERUSER_SSLKEY="${PATRONI_SUPERUSER_SSLKEY:-$PGSSLKEY}" export PATRONI_SUPERUSER_SSLCERT="${PATRONI_SUPERUSER_SSLCERT:-$PGSSLCERT}" export PATRONI_SUPERUSER_SSLROOTCERT="${PATRONI_SUPERUSER_SSLROOTCERT:-$PGSSLROOTCERT}" -exec python3 /patroni.py postgres0.yml +exec dumb-init python3 /patroni.py postgres0.yml From 9afaf6eb51e6e4f91e0c1a3856e9dabf8d145aef Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Tue, 28 Nov 2023 08:37:22 +0100 Subject: [PATCH 14/33] Don't pass around is_paused to sync_replication_slots (#2963) Oversight of #2935 --- patroni/ha.py | 3 +-- patroni/postgresql/slots.py | 10 ++++------ tests/test_slots.py | 3 ++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/patroni/ha.py b/patroni/ha.py index 12927526f..3988cb750 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -1960,8 +1960,7 @@ def _sync_replication_slots(self, dcs_failed: bool) -> List[str]: if cluster: slots = self.state_handler.slots_handler.sync_replication_slots(cluster, self.patroni.nofailover, - self.patroni.replicatefrom, - self.is_paused()) + self.patroni.replicatefrom) # Don't copy replication slots if failsafe_mode is active return [] if self.failsafe_is_active() else slots diff --git a/patroni/postgresql/slots.py b/patroni/postgresql/slots.py index 66bb1f008..025a88a98 100644 --- a/patroni/postgresql/slots.py +++ b/patroni/postgresql/slots.py @@ -320,7 +320,7 @@ def drop_replication_slot(self, name: str) -> Tuple[bool, bool]: ' FULL OUTER JOIN dropped ON true'), name) return (rows[0][0], rows[0][1]) if rows else (False, False) - def _drop_incorrect_slots(self, cluster: Cluster, slots: Dict[str, Any], paused: bool) -> None: + def _drop_incorrect_slots(self, cluster: Cluster, slots: Dict[str, Any]) -> None: """Compare required slots and configured as permanent slots with those found, dropping extraneous ones. .. note:: @@ -331,11 +331,10 @@ def _drop_incorrect_slots(self, cluster: Cluster, slots: Dict[str, Any], paused: :param cluster: cluster state information object. :param slots: dictionary of desired slot names as keys with slot attributes as a dictionary value, if known. - :param paused: ``True`` if the patroni cluster is currently in a paused state. """ # drop old replication slots which are not presented in desired slots. for name in set(self._replication_slots) - set(slots): - if not paused and not self.ignore_replication_slot(cluster, name): + if not global_config.is_paused and not self.ignore_replication_slot(cluster, name): active, dropped = self.drop_replication_slot(name) if dropped: logger.info("Dropped unknown replication slot '%s'", name) @@ -494,7 +493,7 @@ class instance. Slots that exist are also advanced if their ``confirmed_flush_ls return create_slots + copy_slots def sync_replication_slots(self, cluster: Cluster, nofailover: bool, - replicatefrom: Optional[str] = None, paused: bool = False) -> List[str]: + replicatefrom: Optional[str] = None) -> List[str]: """During the HA loop read, check and alter replication slots found in the cluster. Read physical and logical slots from ``pg_replication_slots``, then compare to those configured in the DCS. @@ -506,7 +505,6 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, :param cluster: object containing stateful information for the cluster. :param nofailover: ``True`` if this node has been tagged to not be a failover candidate. :param replicatefrom: the tag containing the node to replicate from. - :param paused: ``True`` if the cluster is in maintenance mode. :returns: list of logical replication slots names that should be copied from the primary. """ @@ -518,7 +516,7 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, slots = cluster.get_replication_slots(self._postgresql.name, self._postgresql.role, nofailover, self._postgresql.major_version, show_error=True) - self._drop_incorrect_slots(cluster, slots, paused) + self._drop_incorrect_slots(cluster, slots) self._ensure_physical_slots(slots) diff --git a/tests/test_slots.py b/tests/test_slots.py index 215c9b849..ee1c4ee9b 100644 --- a/tests/test_slots.py +++ b/tests/test_slots.py @@ -52,9 +52,10 @@ def test_sync_replication_slots(self): mock_debug.assert_called_once() self.p.set_role('replica') with patch.object(Postgresql, 'is_primary', Mock(return_value=False)), \ + patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)), \ patch.object(SlotsHandler, 'drop_replication_slot') as mock_drop: config.data['slots'].pop('ls') - self.s.sync_replication_slots(cluster, False, paused=True) + self.s.sync_replication_slots(cluster, False) mock_drop.assert_not_called() self.p.set_role('primary') with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')): From 76e19ecfe2b62ff03f5b59bb2d39c14888f1238d Mon Sep 17 00:00:00 2001 From: Laotree Date: Wed, 29 Nov 2023 15:43:07 +0800 Subject: [PATCH 15/33] Update README.rst (#2965) fix setting.rst link 404, from #2661 --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index c2fc165ed..adade6b4a 100644 --- a/README.rst +++ b/README.rst @@ -151,7 +151,7 @@ run: YAML Configuration ================== -Go `here `__ for comprehensive information about settings for etcd, consul, and ZooKeeper. And for an example, see `postgres0.yml `__. +Go `here `__ for comprehensive information about settings for etcd, consul, and ZooKeeper. And for an example, see `postgres0.yml `__. ========================= Environment Configuration From 7c3ce7823125658e7f997385ca6cbe635d6def15 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 29 Nov 2023 08:44:35 +0100 Subject: [PATCH 16/33] Fix Citus transaction rollback condition check (#2964) It seems that sometimes we get an exact match, what makes behave tests to fail. --- features/steps/citus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/features/steps/citus.py b/features/steps/citus.py index 4dd2ffa66..7277cccce 100644 --- a/features/steps/citus.py +++ b/features/steps/citus.py @@ -131,5 +131,5 @@ def check_transaction(context, name, time_limit): @step("a transaction finishes in {timeout:d} seconds") def check_transaction_timeout(context, timeout): - assert (datetime.now(tzutc) - context.xact_start).seconds > timeout, \ + assert (datetime.now(tzutc) - context.xact_start).seconds >= timeout, \ "a transaction finished earlier than in {0} seconds".format(timeout) From 92f4aa2ef9a9284d1bcda8a293f1df6a09ab54b4 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 29 Nov 2023 14:22:49 +0100 Subject: [PATCH 17/33] Simplify methods related to replication slots in the Cluster class (#2958) Instead of passing around names, specific tags, and Postgres version just pass Postgresql object and objects implementing Tags interface. It should simplify implementation of #2842 --- patroni/dcs/__init__.py | 138 +++++++++++++++++---------------- patroni/ha.py | 12 ++- patroni/postgresql/__init__.py | 22 +++--- patroni/postgresql/slots.py | 23 +++--- tests/test_slots.py | 55 +++++++------ 5 files changed, 128 insertions(+), 122 deletions(-) diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index 4a9f998f4..28c3734fc 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: # pragma: no cover from ..config import Config + from ..postgresql import Postgresql SLOT_ADVANCE_AVAILABLE_VERSION = 110000 CITUS_COORDINATOR_GROUP_ID = 0 @@ -956,28 +957,29 @@ def __permanent_logical_slots(self) -> Dict[str, Any]: """Dictionary of permanent ``logical`` replication slots.""" return {name: value for name, value in self.__permanent_slots.items() if self.is_logical_slot(value)} - def get_replication_slots(self, my_name: str, role: str, nofailover: bool, major_version: int, *, - show_error: bool = False) -> Dict[str, Dict[str, Any]]: + def get_replication_slots(self, postgresql: 'Postgresql', member: Tags, *, + role: Optional[str] = None, show_error: bool = False) -> Dict[str, Dict[str, Any]]: """Lookup configured slot names in the DCS, report issues found and merge with permanent slots. Will log an error if: * Any logical slots are disabled, due to version compatibility, and *show_error* is ``True``. - :param my_name: name of this node. - :param role: role of this node. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface. + :param role: role of the node, if not set will be taken from *postgresql*. :param show_error: if ``True`` report error if any disabled logical slots or conflicting slot names are found. :returns: final dictionary of slot names, after merging with permanent slots and performing sanity checks. """ - slots: Dict[str, Dict[str, str]] = self._get_members_slots(my_name, role) - permanent_slots: Dict[str, Any] = self._get_permanent_slots(role=role, nofailover=nofailover, - major_version=major_version) + name = member.name if isinstance(member, Member) else postgresql.name + role = role or postgresql.role + + slots: Dict[str, Dict[str, str]] = self._get_members_slots(name, role) + permanent_slots: Dict[str, Any] = self._get_permanent_slots(postgresql, member, role) disabled_permanent_logical_slots: List[str] = self._merge_permanent_slots( - slots, permanent_slots, my_name, major_version) + slots, permanent_slots, name, postgresql.major_version) if disabled_permanent_logical_slots and show_error: logger.error("Permanent logical replication slots supported by Patroni only starting from PostgreSQL 11. " @@ -985,7 +987,7 @@ def get_replication_slots(self, my_name: str, role: str, nofailover: bool, major return slots - def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slots: Dict[str, Any], my_name: str, + def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slots: Dict[str, Any], name: str, major_version: int) -> List[str]: """Merge replication *slots* for members with *permanent_slots*. @@ -995,7 +997,7 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo Type is assumed to be ``physical`` if there are no attributes stored as the slot value. :param slots: Slot names with existing attributes if known. - :param my_name: name of this node. + :param name: name of this node. :param permanent_slots: dictionary containing slot name key and slot information values. :param major_version: postgresql major version. @@ -1003,9 +1005,9 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo """ disabled_permanent_logical_slots: List[str] = [] - for name, value in permanent_slots.items(): - if not slot_name_re.match(name): - logger.error("Invalid permanent replication slot name '%s'", name) + for slot_name, value in permanent_slots.items(): + if not slot_name_re.match(slot_name): + logger.error("Invalid permanent replication slot name '%s'", slot_name) logger.error("Slot name may only contain lower case letters, numbers, and the underscore chars") continue @@ -1016,24 +1018,24 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo if value['type'] == 'physical': # Don't try to create permanent physical replication slot for yourself - if name != slot_name_from_member_name(my_name): - slots[name] = value + if slot_name != slot_name_from_member_name(name): + slots[slot_name] = value continue if self.is_logical_slot(value): if major_version < SLOT_ADVANCE_AVAILABLE_VERSION: - disabled_permanent_logical_slots.append(name) - elif name in slots: + disabled_permanent_logical_slots.append(slot_name) + elif slot_name in slots: logger.error("Permanent logical replication slot {'%s': %s} is conflicting with" - " physical replication slot for cluster member", name, value) + " physical replication slot for cluster member", slot_name, value) else: - slots[name] = value + slots[slot_name] = value continue - logger.error("Bad value for slot '%s' in permanent_slots: %s", name, permanent_slots[name]) + logger.error("Bad value for slot '%s' in permanent_slots: %s", slot_name, permanent_slots[slot_name]) return disabled_permanent_logical_slots - def _get_permanent_slots(self, *, role: str, nofailover: bool, major_version: int) -> Dict[str, Any]: + def _get_permanent_slots(self, postgresql: 'Postgresql', tags: Tags, role: str) -> Dict[str, Any]: """Get configured permanent replication slots. .. note:: @@ -1045,23 +1047,23 @@ def _get_permanent_slots(self, *, role: str, nofailover: bool, major_version: in The returned dictionary for a non-standby cluster always contains permanent logical replication slots in order to show a warning if they are not supported by PostgreSQL before v11. - :param role: role of this node -- ``primary``, ``standby_leader`` or ``replica``. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param tags: reference to an object implementing :class:`Tags` interface. + :param role: role of the node -- ``primary``, ``standby_leader`` or ``replica``. :returns: dictionary of permanent slot names mapped to attributes. """ - if not global_config.use_slots or nofailover: + if not global_config.use_slots or tags.nofailover: return {} if global_config.is_standby_cluster: return self.__permanent_physical_slots \ - if major_version >= SLOT_ADVANCE_AVAILABLE_VERSION or role == 'standby_leader' else {} + if postgresql.major_version >= SLOT_ADVANCE_AVAILABLE_VERSION or role == 'standby_leader' else {} - return self.__permanent_slots if major_version >= SLOT_ADVANCE_AVAILABLE_VERSION\ + return self.__permanent_slots if postgresql.major_version >= SLOT_ADVANCE_AVAILABLE_VERSION\ or role in ('master', 'primary') else self.__permanent_logical_slots - def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str]]: + def _get_members_slots(self, name: str, role: str) -> Dict[str, Dict[str, str]]: """Get physical replication slots configuration for members that sourcing from this node. If the ``replicatefrom`` tag is set on the member - we should not create the replication slot for it on @@ -1073,7 +1075,7 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str * Conflicting slot names between members are found - :param my_name: name of this node. + :param name: name of this node. :param role: role of this node, if this is a ``primary`` or ``standby_leader`` return list of members replicating from this node. If not then return a list of members replicating as cascaded replicas from this node. @@ -1084,14 +1086,14 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str return {} # we always want to exclude the member with our name from the list - members = filter(lambda m: m.name != my_name, self.members) + members = filter(lambda m: m.name != name, self.members) if role in ('master', 'primary', 'standby_leader'): members = [m for m in members if m.replicatefrom is None - or m.replicatefrom == my_name or not self.has_member(m.replicatefrom)] + or m.replicatefrom == name or not self.has_member(m.replicatefrom)] else: # only manage slots for replicas that replicate from this one, except for the leader among them - members = [m for m in members if m.replicatefrom == my_name and m.name != self.leader_name] + members = [m for m in members if m.replicatefrom == name and m.name != self.leader_name] slots = {slot_name_from_member_name(m.name): {'type': 'physical'} for m in members} if len(slots) < len(members): @@ -1104,76 +1106,76 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str for k, v in slot_conflicts.items() if len(v) > 1)) return slots - def has_permanent_slots(self, my_name: str, *, nofailover: bool = False, - major_version: int = SLOT_ADVANCE_AVAILABLE_VERSION) -> bool: - """Check if the given member node has permanent replication slots configured. + def has_permanent_slots(self, postgresql: 'Postgresql', member: Tags) -> bool: + """Check if our node has permanent replication slots configured. - :param my_name: name of the member node to check. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface for + the node that we are checking permanent logical replication slots for. :returns: ``True`` if there are permanent replication slots configured, otherwise ``False``. """ role = 'replica' - members_slots: Dict[str, Dict[str, str]] = self._get_members_slots(my_name, role) - permanent_slots: Dict[str, Any] = self._get_permanent_slots(role=role, nofailover=nofailover, - major_version=major_version) + members_slots: Dict[str, Dict[str, str]] = self._get_members_slots(postgresql.name, role) + permanent_slots: Dict[str, Any] = self._get_permanent_slots(postgresql, member, role) slots = deepcopy(members_slots) - self._merge_permanent_slots(slots, permanent_slots, my_name, major_version) + self._merge_permanent_slots(slots, permanent_slots, postgresql.name, postgresql.major_version) return len(slots) > len(members_slots) or any(self.is_physical_slot(v) for v in permanent_slots.values()) - def filter_permanent_slots(self, slots: Dict[str, int], major_version: int) -> Dict[str, int]: + def filter_permanent_slots(self, postgresql: 'Postgresql', slots: Dict[str, int]) -> Dict[str, int]: """Filter out all non-permanent slots from provided *slots* dict. - :param slots: slot names with LSN values - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param slots: slot names with LSN values. :returns: a :class:`dict` object that contains only slots that are known to be permanent. """ - if major_version < SLOT_ADVANCE_AVAILABLE_VERSION: + if postgresql.major_version < SLOT_ADVANCE_AVAILABLE_VERSION: return {} # for legacy PostgreSQL we don't support permanent slots on standby nodes - permanent_slots: Dict[str, Any] = self._get_permanent_slots(role='replica', nofailover=False, - major_version=major_version) + permanent_slots: Dict[str, Any] = self._get_permanent_slots(postgresql, RemoteMember('', {}), 'replica') members_slots = {slot_name_from_member_name(m.name) for m in self.members} return {name: value for name, value in slots.items() if name in permanent_slots and (self.is_physical_slot(permanent_slots[name]) or self.is_logical_slot(permanent_slots[name]) and name not in members_slots)} - def _has_permanent_logical_slots(self, my_name: str, nofailover: bool) -> bool: + def _has_permanent_logical_slots(self, postgresql: 'Postgresql', member: Tags) -> bool: """Check if the given member node has permanent ``logical`` replication slots configured. - :param my_name: name of the member node to check. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. + :param postgresql: reference to a :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface for + the node that we are checking permanent logical replication slots for. :returns: ``True`` if any detected replications slots are ``logical``, otherwise ``False``. """ - slots = self.get_replication_slots(my_name, 'replica', nofailover, SLOT_ADVANCE_AVAILABLE_VERSION).values() + slots = self.get_replication_slots(postgresql, member, role='replica').values() return any(v for v in slots if v.get("type") == "logical") - def should_enforce_hot_standby_feedback(self, my_name: str, nofailover: bool) -> bool: + def should_enforce_hot_standby_feedback(self, postgresql: 'Postgresql', member: Tags) -> bool: """Determine whether ``hot_standby_feedback`` should be enabled for the given member. The ``hot_standby_feedback`` must be enabled if the current replica has ``logical`` slots, or it is working as a cascading replica for the other node that has ``logical`` slots. - :param my_name: name of the member node to check. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. + :param postgresql: reference to a :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface for + the node that we are checking permanent logical replication slots for. :returns: ``True`` if this node or any member replicating from this node has permanent logical slots, otherwise ``False``. """ - if self._has_permanent_logical_slots(my_name, nofailover): + if self._has_permanent_logical_slots(postgresql, member): return True if global_config.use_slots: - members = [m for m in self.members if m.replicatefrom == my_name and m.name != self.leader_name] - return any(self.should_enforce_hot_standby_feedback(m.name, m.nofailover) for m in members) + name = member.name if isinstance(member, Member) else postgresql.name + members = [m for m in self.members if m.replicatefrom == name and m.name != self.leader_name] + return any(self.should_enforce_hot_standby_feedback(postgresql, m) for m in members) return False - def get_my_slot_name_on_primary(self, my_name: str, replicatefrom: Optional[str]) -> str: - """Canonical slot name for physical replication. + def get_slot_name_on_primary(self, name: str, tags: Tags) -> str: + """Get the name of physical replication slot for this node on the primary. .. note:: P <-- I <-- L @@ -1181,14 +1183,14 @@ def get_my_slot_name_on_primary(self, my_name: str, replicatefrom: Optional[str] In case of cascading replication we have to check not our physical slot, but slot of the replica that connects us to the primary. - :param my_name: the member node name that is replicating. - :param replicatefrom: the Intermediate member name that is configured to replicate for cascading replication. + :param name: name of the member node to check. + :param tags: reference to an object implementing :class:`Tags` interface. - :returns: The slot name that is in use for physical replication on this no`de. + :returns: the slot name on the primary that is in use for physical replication on this node. """ - m = self.get_member(replicatefrom, False) if replicatefrom else None - return self.get_my_slot_name_on_primary(m.name, m.replicatefrom) \ - if isinstance(m, Member) else slot_name_from_member_name(my_name) + replicatefrom = self.get_member(tags.replicatefrom, False) if tags.replicatefrom else None + return self.get_slot_name_on_primary(replicatefrom.name, replicatefrom) \ + if isinstance(replicatefrom, Member) else slot_name_from_member_name(name) @property def timeline(self) -> int: diff --git a/patroni/ha.py b/patroni/ha.py index 3988cb750..ab1bc433c 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -294,8 +294,8 @@ def update_lock(self, update_status: bool = False) -> bool: try: last_lsn = self.state_handler.last_operation() slots = self.cluster.filter_permanent_slots( - {**self.state_handler.slots(), slot_name_from_member_name(self.state_handler.name): last_lsn}, - self.state_handler.major_version) + self.state_handler, + {**self.state_handler.slots(), slot_name_from_member_name(self.state_handler.name): last_lsn}) except Exception: logger.exception('Exception when called state_handler.last_operation()') if TYPE_CHECKING: # pragma: no cover @@ -1745,7 +1745,7 @@ def _run_cycle(self) -> str: try: self.load_cluster_from_dcs() global_config.update(self.cluster) - self.state_handler.reset_cluster_info_state(self.cluster, self.patroni.nofailover) + self.state_handler.reset_cluster_info_state(self.cluster, self.patroni) except Exception: self.state_handler.reset_cluster_info_state(None) raise @@ -1902,7 +1902,7 @@ def _run_cycle(self) -> str: if not is_promoting and create_slots and self.cluster.leader: err = self._async_executor.try_run_async('copy_logical_slots', self.state_handler.slots_handler.copy_logical_slots, - args=(self.cluster, create_slots)) + args=(self.cluster, self.patroni, create_slots)) if not err: ret = 'Copying logical slots {0} from the primary'.format(create_slots) return ret @@ -1958,9 +1958,7 @@ def _sync_replication_slots(self, dcs_failed: bool) -> List[str]: cluster = self._failsafe.update_cluster(self.cluster)\ if self.is_failsafe_mode() and not self.is_leader() else self.cluster if cluster: - slots = self.state_handler.slots_handler.sync_replication_slots(cluster, - self.patroni.nofailover, - self.patroni.replicatefrom) + slots = self.state_handler.slots_handler.sync_replication_slots(cluster, self.patroni) # Don't copy replication slots if failsafe_mode is active return [] if self.failsafe_is_active() else slots diff --git a/patroni/postgresql/__init__.py b/patroni/postgresql/__init__.py index c1027dad0..e373bd3ca 100644 --- a/patroni/postgresql/__init__.py +++ b/patroni/postgresql/__init__.py @@ -30,6 +30,7 @@ from ..dcs import Cluster, Leader, Member, SLOT_ADVANCE_AVAILABLE_VERSION from ..exceptions import PostgresConnectionException from ..utils import Retry, RetryFailedError, polling_loop, data_directory_is_empty, parse_int +from ..tags import Tags if TYPE_CHECKING: # pragma: no cover from psycopg import Connection as Connection3, Cursor @@ -424,18 +425,20 @@ def set_enforce_hot_standby_feedback(self, value: bool) -> None: self.config.write_postgresql_conf() self.reload() - def reset_cluster_info_state(self, cluster: Union[Cluster, None], nofailover: bool = False) -> None: + def reset_cluster_info_state(self, cluster: Optional[Cluster], tags: Optional[Tags] = None) -> None: """Reset monitoring query cache. - It happens in the beginning of heart-beat loop and on change of `synchronous_standby_names`. + .. note:: + It happens in the beginning of heart-beat loop and on change of `synchronous_standby_names`. :param cluster: currently known cluster state from DCS - :param nofailover: whether this node could become a new primary. - Important when there are logical permanent replication slots because "nofailover" - node could do cascading replication and should enable `hot_standby_feedback` + :param tags: reference to an object implementing :class:`Tags` interface. """ self._cluster_info_state = {} + if not tags: + return + if global_config.is_standby_cluster: # Standby cluster can't have logical replication slots, and we don't need to enforce hot_standby_feedback self.set_enforce_hot_standby_feedback(False) @@ -444,13 +447,8 @@ def reset_cluster_info_state(self, cluster: Union[Cluster, None], nofailover: bo # We want to enable hot_standby_feedback if the replica is supposed # to have a logical slot or in case if it is the cascading replica. self.set_enforce_hot_standby_feedback(not global_config.is_standby_cluster and self.can_advance_slots - and cluster.should_enforce_hot_standby_feedback(self.name, - nofailover)) - - self._has_permanent_slots = cluster.has_permanent_slots( - my_name=self.name, - nofailover=nofailover, - major_version=self.major_version) + and cluster.should_enforce_hot_standby_feedback(self, tags)) + self._has_permanent_slots = cluster.has_permanent_slots(self, tags) def _cluster_info_state_get(self, name: str) -> Optional[Any]: if not self._cluster_info_state: diff --git a/patroni/postgresql/slots.py b/patroni/postgresql/slots.py index 025a88a98..fb9448cd6 100644 --- a/patroni/postgresql/slots.py +++ b/patroni/postgresql/slots.py @@ -17,6 +17,7 @@ from ..dcs import Cluster, Leader from ..file_perm import pg_perm from ..psycopg import OperationalError +from ..tags import Tags if TYPE_CHECKING: # pragma: no cover from psycopg import Cursor @@ -492,8 +493,7 @@ class instance. Slots that exist are also advanced if their ``confirmed_flush_ls self._schedule_load_slots = True return create_slots + copy_slots - def sync_replication_slots(self, cluster: Cluster, nofailover: bool, - replicatefrom: Optional[str] = None) -> List[str]: + def sync_replication_slots(self, cluster: Cluster, tags: Tags) -> List[str]: """During the HA loop read, check and alter replication slots found in the cluster. Read physical and logical slots from ``pg_replication_slots``, then compare to those configured in the DCS. @@ -503,8 +503,7 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, them on replica nodes by copying slot files from the primary. :param cluster: object containing stateful information for the cluster. - :param nofailover: ``True`` if this node has been tagged to not be a failover candidate. - :param replicatefrom: the tag containing the node to replicate from. + :param tags: reference to an object implementing :class:`Tags` interface. :returns: list of logical replication slots names that should be copied from the primary. """ @@ -513,8 +512,7 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, try: self.load_replication_slots() - slots = cluster.get_replication_slots(self._postgresql.name, self._postgresql.role, - nofailover, self._postgresql.major_version, show_error=True) + slots = cluster.get_replication_slots(self._postgresql, tags, show_error=True) self._drop_incorrect_slots(cluster, slots) @@ -524,7 +522,7 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, self._logical_slots_processing_queue.clear() self._ensure_logical_slots_primary(slots) else: - self.check_logical_slots_readiness(cluster, replicatefrom) + self.check_logical_slots_readiness(cluster, tags) ret = self._ensure_logical_slots_replica(slots) self._replication_slots = slots @@ -550,7 +548,7 @@ def _get_leader_connection_cursor(self, leader: Leader) -> Iterator[Union['curso with get_connection_cursor(connect_timeout=3, options="-c statement_timeout=2000", **conn_kwargs) as cur: yield cur - def check_logical_slots_readiness(self, cluster: Cluster, replicatefrom: Optional[str]) -> bool: + def check_logical_slots_readiness(self, cluster: Cluster, tags: Tags) -> bool: """Determine whether all known logical slots are synchronised from the leader. 1) Retrieve the current ``catalog_xmin`` value for the physical slot from the cluster leader, and @@ -559,13 +557,13 @@ def check_logical_slots_readiness(self, cluster: Cluster, replicatefrom: Optiona 3) store logical slot ``catalog_xmin`` when the physical slot ``catalog_xmin`` becomes valid. :param cluster: object containing stateful information for the cluster. - :param replicatefrom: name of the member that should be used to replicate from. + :param tags: reference to an object implementing :class:`Tags` interface. :returns: ``False`` if any issue while checking logical slots readiness, ``True`` otherwise. """ catalog_xmin = None if self._logical_slots_processing_queue and cluster.leader: - slot_name = cluster.get_my_slot_name_on_primary(self._postgresql.name, replicatefrom) + slot_name = cluster.get_slot_name_on_primary(self._postgresql.name, tags) try: with self._get_leader_connection_cursor(cluster.leader) as cur: cur.execute("SELECT slot_name, catalog_xmin FROM pg_catalog.pg_get_replication_slots()" @@ -643,16 +641,17 @@ def _ready_logical_slots(self, primary_physical_catalog_xmin: Optional[int] = No if standby_logical_slot: logger.info('Logical slot %s is safe to be used after a failover', name) - def copy_logical_slots(self, cluster: Cluster, create_slots: List[str]) -> None: + def copy_logical_slots(self, cluster: Cluster, tags: Tags, create_slots: List[str]) -> None: """Create logical replication slots on standby nodes. :param cluster: object containing stateful information for the cluster. + :param tags: reference to an object implementing :class:`Tags` interface. :param create_slots: list of slot names to copy from the primary. """ leader = cluster.leader if not leader: return - slots = cluster.get_replication_slots(self._postgresql.name, 'replica', False, self._postgresql.major_version) + slots = cluster.get_replication_slots(self._postgresql, tags, role='replica') copy_slots: Dict[str, Dict[str, Any]] = {} with self._get_leader_connection_cursor(leader) as cur: try: diff --git a/tests/test_slots.py b/tests/test_slots.py index ee1c4ee9b..3be2bbd77 100644 --- a/tests/test_slots.py +++ b/tests/test_slots.py @@ -11,10 +11,18 @@ from patroni.postgresql import Postgresql from patroni.postgresql.misc import fsync_dir from patroni.postgresql.slots import SlotsAdvanceThread, SlotsHandler +from patroni.tags import Tags from . import BaseTestPostgresql, psycopg_connect, MockCursor +class TestTags(Tags): + + @property + def tags(self): + return {} + + @patch('subprocess.call', Mock(return_value=0)) @patch('patroni.psycopg.connect', psycopg_connect) @patch.object(Thread, 'start', Mock()) @@ -34,6 +42,7 @@ def setUp(self): self.cluster = Cluster(True, config, self.leader, Status(0, {'ls': 12345, 'ls2': 12345}), [self.me, self.other, self.leadermem], None, SyncState.empty(), None, None) global_config.update(self.cluster) + self.tags = TestTags() def test_sync_replication_slots(self): config = ClusterConfig(1, {'slots': {'test_3': {'database': 'a', 'plugin': 'b'}, @@ -43,36 +52,36 @@ def test_sync_replication_slots(self): [self.me, self.other, self.leadermem], None, SyncState.empty(), None, None) global_config.update(cluster) with mock.patch('patroni.postgresql.Postgresql._query', Mock(side_effect=psycopg.OperationalError)): - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) self.p.set_role('standby_leader') with patch.object(SlotsHandler, 'drop_replication_slot', Mock(return_value=(True, False))), \ patch.object(global_config.__class__, 'is_standby_cluster', PropertyMock(return_value=True)), \ patch('patroni.postgresql.slots.logger.debug') as mock_debug: - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) mock_debug.assert_called_once() self.p.set_role('replica') with patch.object(Postgresql, 'is_primary', Mock(return_value=False)), \ patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)), \ patch.object(SlotsHandler, 'drop_replication_slot') as mock_drop: config.data['slots'].pop('ls') - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) mock_drop.assert_not_called() self.p.set_role('primary') with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')): - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) with patch('patroni.dcs.logger.error', new_callable=Mock()) as errorlog_mock: alias1 = Member(0, 'test-3', 28, {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5436/postgres'}) alias2 = Member(0, 'test.3', 28, {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5436/postgres'}) cluster.members.extend([alias1, alias2]) - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) self.assertEqual(errorlog_mock.call_count, 5) ca = errorlog_mock.call_args_list[0][0][1] self.assertTrue("test-3" in ca, "non matching {0}".format(ca)) self.assertTrue("test.3" in ca, "non matching {0}".format(ca)) with patch.object(Postgresql, 'major_version', PropertyMock(return_value=90618)): - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) self.p.set_role('replica') - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) def test_cascading_replica_sync_replication_slots(self): """Test sync with a cascading replica so physical slots are present on a replica.""" @@ -87,7 +96,7 @@ def test_cascading_replica_sync_replication_slots(self): with patch.object(Postgresql, '_query') as mock_query, \ patch.object(Postgresql, 'is_primary', Mock(return_value=False)): mock_query.return_value = [('ls', 'logical', 104, 'b', 'a', 5, 12345, 105)] - ret = self.s.sync_replication_slots(cluster, False) + ret = self.s.sync_replication_slots(cluster, self.tags) self.assertEqual(ret, []) def test_process_permanent_slots(self): @@ -97,7 +106,7 @@ def test_process_permanent_slots(self): None, SyncState.empty(), None, None) global_config.update(cluster) - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) with patch.object(Postgresql, '_query') as mock_query: self.p.reset_cluster_info_state(None) mock_query.return_value = [( @@ -120,48 +129,48 @@ def test__ensure_logical_slots_replica(self): self.p.set_role('replica') self.cluster.slots['ls'] = 12346 with patch.object(SlotsHandler, 'check_logical_slots_readiness', Mock(return_value=False)): - self.assertEqual(self.s.sync_replication_slots(self.cluster, False), []) + self.assertEqual(self.s.sync_replication_slots(self.cluster, self.tags), []) with patch.object(SlotsHandler, '_query', Mock(return_value=[('ls', 'logical', 499, 'b', 'a', 5, 100, 500)])), \ patch.object(MockCursor, 'execute', Mock(side_effect=psycopg.OperationalError)), \ patch.object(SlotsAdvanceThread, 'schedule', Mock(return_value=(True, ['ls']))), \ patch.object(psycopg.OperationalError, 'diag') as mock_diag: type(mock_diag).sqlstate = PropertyMock(return_value='58P01') - self.assertEqual(self.s.sync_replication_slots(self.cluster, False), ['ls']) + self.assertEqual(self.s.sync_replication_slots(self.cluster, self.tags), ['ls']) self.cluster.slots['ls'] = 'a' - self.assertEqual(self.s.sync_replication_slots(self.cluster, False), []) + self.assertEqual(self.s.sync_replication_slots(self.cluster, self.tags), []) self.cluster.config.data['slots']['ls']['database'] = 'b' self.cluster.slots['ls'] = '500' with patch.object(MockCursor, 'rowcount', PropertyMock(return_value=1), create=True): - self.assertEqual(self.s.sync_replication_slots(self.cluster, False), ['ls']) + self.assertEqual(self.s.sync_replication_slots(self.cluster, self.tags), ['ls']) def test_copy_logical_slots(self): self.cluster.config.data['slots']['ls']['database'] = 'b' - self.s.copy_logical_slots(self.cluster, ['ls']) + self.s.copy_logical_slots(self.cluster, self.tags, ['ls']) with patch.object(MockCursor, 'execute', Mock(side_effect=psycopg.OperationalError)): - self.s.copy_logical_slots(self.cluster, ['foo']) + self.s.copy_logical_slots(self.cluster, self.tags, ['foo']) with patch.object(Cluster, 'leader', PropertyMock(return_value=None)): - self.s.copy_logical_slots(self.cluster, ['foo']) + self.s.copy_logical_slots(self.cluster, self.tags, ['foo']) @patch.object(Postgresql, 'stop', Mock(return_value=True)) @patch.object(Postgresql, 'start', Mock(return_value=True)) @patch.object(Postgresql, 'is_primary', Mock(return_value=False)) def test_check_logical_slots_readiness(self): - self.s.copy_logical_slots(self.cluster, ['ls']) + self.s.copy_logical_slots(self.cluster, self.tags, ['ls']) with patch.object(MockCursor, '__iter__', Mock(return_value=iter([('postgresql0', None)]))), \ patch.object(MockCursor, 'fetchall', Mock(side_effect=Exception)): - self.assertFalse(self.s.check_logical_slots_readiness(self.cluster, None)) + self.assertFalse(self.s.check_logical_slots_readiness(self.cluster, self.tags)) with patch.object(MockCursor, '__iter__', Mock(return_value=iter([('postgresql0', None)]))), \ patch.object(MockCursor, 'fetchall', Mock(return_value=[(False,)])): - self.assertFalse(self.s.check_logical_slots_readiness(self.cluster, None)) + self.assertFalse(self.s.check_logical_slots_readiness(self.cluster, self.tags)) with patch.object(MockCursor, '__iter__', Mock(return_value=iter([('ls', 100)]))): - self.s.check_logical_slots_readiness(self.cluster, None) + self.s.check_logical_slots_readiness(self.cluster, self.tags) @patch.object(Postgresql, 'stop', Mock(return_value=True)) @patch.object(Postgresql, 'start', Mock(return_value=True)) @patch.object(Postgresql, 'is_primary', Mock(return_value=False)) def test_on_promote(self): self.s.schedule_advance_slots({'foo': {'bar': 100}}) - self.s.copy_logical_slots(self.cluster, ['ls']) + self.s.copy_logical_slots(self.cluster, self.tags, ['ls']) self.s.on_promote() @unittest.skipIf(os.name == 'nt', "Windows not supported") @@ -192,11 +201,11 @@ def test_advance_physical_slots(self): cluster = Cluster(True, config, self.leader, Status(0, {'blabla': 12346}), [self.me, self.other, self.leadermem], None, SyncState.empty(), None, None) global_config.update(cluster) - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) with patch.object(SlotsHandler, '_query', Mock(side_effect=[[('blabla', 'physical', 12345, None, None, None, None, None)], Exception])) as mock_query, \ patch('patroni.postgresql.slots.logger.error') as mock_error: - self.s.sync_replication_slots(cluster, False) + self.s.sync_replication_slots(cluster, self.tags) self.assertEqual(mock_query.call_args[0], ("SELECT pg_catalog.pg_replication_slot_advance(%s, %s)", "blabla", '0/303A')) self.assertEqual(mock_error.call_args[0][0], From 5a77cbb08751a58e1f8a2e9473ef56a89974c1c0 Mon Sep 17 00:00:00 2001 From: Ali Mehraji Date: Thu, 30 Nov 2023 12:15:07 +0330 Subject: [PATCH 18/33] Update: etcd flags in command in docker-compose.yml and docker-compose-citus.yml (#2966) --- docker-compose-citus.yml | 6 +++--- docker-compose.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docker-compose-citus.yml b/docker-compose-citus.yml index cd63d5833..da71c50a3 100644 --- a/docker-compose-citus.yml +++ b/docker-compose-citus.yml @@ -27,19 +27,19 @@ services: ETCD_UNSUPPORTED_ARCH: arm64 container_name: demo-etcd1 hostname: etcd1 - command: etcd -name etcd1 -initial-advertise-peer-urls http://etcd1:2380 + command: etcd --name etcd1 --initial-advertise-peer-urls http://etcd1:2380 etcd2: <<: *etcd container_name: demo-etcd2 hostname: etcd2 - command: etcd -name etcd2 -initial-advertise-peer-urls http://etcd2:2380 + command: etcd --name etcd2 --initial-advertise-peer-urls http://etcd2:2380 etcd3: <<: *etcd container_name: demo-etcd3 hostname: etcd3 - command: etcd -name etcd3 -initial-advertise-peer-urls http://etcd3:2380 + command: etcd --name etcd3 --initial-advertise-peer-urls http://etcd3:2380 haproxy: image: ${PATRONI_TEST_IMAGE:-patroni-citus} diff --git a/docker-compose.yml b/docker-compose.yml index 996c2c829..6b7d7a929 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,19 +25,19 @@ services: ETCD_UNSUPPORTED_ARCH: arm64 container_name: demo-etcd1 hostname: etcd1 - command: etcd -name etcd1 -initial-advertise-peer-urls http://etcd1:2380 + command: etcd --name etcd1 --initial-advertise-peer-urls http://etcd1:2380 etcd2: <<: *etcd container_name: demo-etcd2 hostname: etcd2 - command: etcd -name etcd2 -initial-advertise-peer-urls http://etcd2:2380 + command: etcd --name etcd2 --initial-advertise-peer-urls http://etcd2:2380 etcd3: <<: *etcd container_name: demo-etcd3 hostname: etcd3 - command: etcd -name etcd3 -initial-advertise-peer-urls http://etcd3:2380 + command: etcd --name etcd3 --initial-advertise-peer-urls http://etcd3:2380 haproxy: image: ${PATRONI_TEST_IMAGE:-patroni} From 47cadc9f63e087ebdfd598f18b02d831a8238ead Mon Sep 17 00:00:00 2001 From: Sophia Ruan <104968314+XiuhuaRuan@users.noreply.github.com> Date: Thu, 30 Nov 2023 17:02:19 +0800 Subject: [PATCH 19/33] Fix the issue that REST API returns unknown after postgres restart (#2956) Close #2955 --- patroni/postgresql/connection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/patroni/postgresql/connection.py b/patroni/postgresql/connection.py index 2a50dbb5b..040dcf78d 100644 --- a/patroni/postgresql/connection.py +++ b/patroni/postgresql/connection.py @@ -147,7 +147,8 @@ def get(self, name: str, kwargs_override: Optional[Dict[str, Any]] = None) -> Na def close(self) -> None: """Close all named connections from Patroni to PostgreSQL registered in the pool.""" with self._lock: - if any(conn.close(True) for conn in self._connections.values()): + closed_connections = [conn.close(True) for conn in self._connections.values()] + if any(closed_connections): logger.info("closed patroni connections to postgres") From ef5f320602ccf05415444ee19c1497ed6b07de73 Mon Sep 17 00:00:00 2001 From: Waynerv Date: Thu, 30 Nov 2023 19:02:42 +0800 Subject: [PATCH 20/33] Cache `postgres --describe-config` output results (#2967) We don't expect GUCs list to change for the same major version and don't expect major version to change while Patroni is running. --- patroni/postgresql/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/patroni/postgresql/__init__.py b/patroni/postgresql/__init__.py index e373bd3ca..c87c29fab 100644 --- a/patroni/postgresql/__init__.py +++ b/patroni/postgresql/__init__.py @@ -118,6 +118,8 @@ def __init__(self, config: Dict[str, Any]) -> None: # Last known running process self._postmaster_proc = None + self._available_gucs = None + if self.is_running(): # If we found postmaster process we need to figure out whether postgres is accepting connections self.set_state('starting') @@ -240,7 +242,9 @@ def cluster_info_query(self) -> str: @property def available_gucs(self) -> CaseInsensitiveSet: """GUCs available in this Postgres server.""" - return self._get_gucs() + if not self._available_gucs: + self._available_gucs = self._get_gucs() + return self._available_gucs def _version_file_exists(self) -> bool: return not self.data_directory_empty() and os.path.isfile(self._version_file) From 6976939f09fae574e3e7d251d61f1ba8cb0c49a5 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Thu, 30 Nov 2023 16:50:42 +0100 Subject: [PATCH 21/33] Release/v3.2.1 (#2968) - bump version - bump pyright - update release notes --- .github/workflows/tests.yaml | 2 +- docs/releases.rst | 38 ++++++++++++++++++++++++++++++++++++ patroni/api.py | 2 +- patroni/ctl.py | 4 ++-- patroni/dcs/consul.py | 2 +- patroni/postgresql/citus.py | 3 ++- patroni/version.py | 2 +- 7 files changed, 46 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index df911e542..aafe6b56f 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -174,7 +174,7 @@ jobs: - uses: jakebailey/pyright-action@v1 with: - version: 1.1.336 + version: 1.1.338 docs: runs-on: ubuntu-latest diff --git a/docs/releases.rst b/docs/releases.rst index cd2714b69..c68977cd7 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -3,6 +3,44 @@ Release notes ============= +Version 3.2.1 +------------- + +**Bugfixes** + +- Limit accepted values for ``--format`` argument in ``patronictl`` (Alexander Kukushkin) + + It used to accept any arbitrary string and produce no output if the value wasn't recognized. + +- Verify that replica nodes received checkpoint LSN on shutdown before releasing the leader key (Alexander Kukushkin) + + Previously in some cases, we were using LSN of the SWITCH record that is followed by CHECKPOINT (if archiving mode is enabled). As a result the former primary sometimes had to do ``pg_rewind``, but there would be no data loss involved. + +- Do a real HTTP request when performing node name uniqueness check (Alexander Kukushkin) + + When running Patroni in containers it is possible that the traffic is routed using ``docker-proxy``, which listens on the port and accepts incoming connections. It was causing false positives. + +- Fixed Citus support with Etcd v2 (Alexander Kukushkin) + + Patroni was failing to deploy a new Citus cluster with Etcd v2. + +- Fixed ``pg_rewind`` behavior with Postgres v16+ (Alexander Kukushkin) + + The error message format of ``pg_waldump`` changed in v16 which caused ``pg_rewind`` to be called by Patroni even when it was not necessary. + +- Fixed bug with custom bootstrap (Alexander Kukushkin) + + Patroni was falsely applying ``--command`` argument, which is a bootstrap command itself. + +- Fixed the issue with REST API health check endpoints (Sophia Ruan) + + There were chances that after Postgres restart it could return ``unknown`` state for Postgres because connections were not properly closed. + +- Cache ``postgres --describe-config`` output results (Waynerv) + + They are used to figure out which GUCs are available to validate PostgreSQL configuration and we don't expect this list to change while Patroni is running. + + Version 3.2.0 ------------- diff --git a/patroni/api.py b/patroni/api.py index 2adc5f985..5761d359e 100644 --- a/patroni/api.py +++ b/patroni/api.py @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) -def check_access(func: Callable[['RestApiHandler'], None]) -> Callable[..., None]: +def check_access(func: Callable[..., None]) -> Callable[..., None]: """Check the source ip, authorization header, or client certificates. .. note:: diff --git a/patroni/ctl.py b/patroni/ctl.py index 9345d18e4..3e981b459 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -819,7 +819,7 @@ def query( raise PatroniCtlException('You need to specify either --command or --file') sql = command - connect_parameters = {} + connect_parameters: Dict[str, str] = {} if username: connect_parameters['username'] = username if password: @@ -1093,7 +1093,7 @@ def restart(cluster_name: str, group: Optional[int], member_names: List[str], version = click.prompt('Restart if the PostgreSQL version is less than provided (e.g. 9.5.2) ', type=str, default='') - content = {} + content: Dict[str, Any] = {} if pending: content['restart_pending'] = True diff --git a/patroni/dcs/consul.py b/patroni/dcs/consul.py index 1324e6065..27cab7783 100644 --- a/patroni/dcs/consul.py +++ b/patroni/dcs/consul.py @@ -423,7 +423,7 @@ def _cluster_loader(self, path: str) -> Cluster: _, results = self.retry(self._client.kv.get, path, recurse=True, consistency=self._consistency) if results is None: return Cluster.empty() - nodes = {} + nodes: Dict[str, Dict[str, Any]] = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') nodes[node['Key'][len(path):]] = node diff --git a/patroni/postgresql/citus.py b/patroni/postgresql/citus.py index 26923f374..b50dc1d09 100644 --- a/patroni/postgresql/citus.py +++ b/patroni/postgresql/citus.py @@ -100,7 +100,8 @@ def schedule_cache_rebuild(self) -> None: def on_demote(self) -> None: with self._condition: self._pg_dist_node.clear() - self._tasks[:] = [] + empty_tasks: List[PgDistNode] = [] + self._tasks[:] = empty_tasks self._in_flight = None def query(self, sql: str, *params: Any) -> List[Tuple[Any, ...]]: diff --git a/patroni/version.py b/patroni/version.py index e5bcac2dd..96592c7da 100644 --- a/patroni/version.py +++ b/patroni/version.py @@ -2,4 +2,4 @@ :var __version__: the current Patroni version. """ -__version__ = '3.2.0' +__version__ = '3.2.1' From 0e6a2ff3a9e004396d56103a86e278423e392140 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Tue, 5 Dec 2023 08:30:20 +0100 Subject: [PATCH 22/33] Don't let replica restore initialize key when DCS was wiped (#2970) It was happening from the branch where Patroni was supposed to be complain about converting standalone PG cluster to be governed by Patroni and exit. --- patroni/ha.py | 5 ++--- tests/test_ha.py | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/patroni/ha.py b/patroni/ha.py index ab1bc433c..0d3e05a41 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -1849,10 +1849,9 @@ def _run_cycle(self) -> str: logger.fatal('system ID mismatch, node %s belongs to a different cluster: %s != %s', self.state_handler.name, self.cluster.initialize, data_sysid) sys.exit(1) - elif self.cluster.is_unlocked() and not self.is_paused(): + elif self.cluster.is_unlocked() and not self.is_paused() and not self.state_handler.cb_called: # "bootstrap", but data directory is not empty - if not self.state_handler.cb_called and self.state_handler.is_running() \ - and not self.state_handler.is_primary(): + if self.state_handler.is_running() and not self.state_handler.is_primary(): self._join_aborted = True logger.error('No initialize key in DCS and PostgreSQL is running as replica, aborting start') logger.error('Please first start Patroni on the node running as primary') diff --git a/tests/test_ha.py b/tests/test_ha.py index 400630184..5b1d4562d 100644 --- a/tests/test_ha.py +++ b/tests/test_ha.py @@ -1585,6 +1585,11 @@ def test_abort_join(self, exit_mock): self.p.is_primary = false self.ha.run_cycle() exit_mock.assert_called_once_with(1) + self.p.set_role('replica') + self.ha.dcs.initialize = Mock() + with patch.object(Postgresql, 'cb_called', PropertyMock(return_value=True)): + self.assertEqual(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock') + self.ha.dcs.initialize.assert_not_called() @patch.object(Cluster, 'is_unlocked', Mock(return_value=False)) def test_after_pause(self): From a4e0a2220dd8dceffc38e23dc44e3ee12a048a8a Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 6 Dec 2023 15:28:03 +0100 Subject: [PATCH 23/33] Disable SSL for MacOS GH action runners (#2976) Latest runners release (20231127.1) somehow broke our tests. Connections to postgres somehow failing with strange error: ``` could not accept SSL connection: Socket operation on non-socket ``` --- features/environment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/features/environment.py b/features/environment.py index a0367657c..db10c93d6 100644 --- a/features/environment.py +++ b/features/environment.py @@ -1073,6 +1073,8 @@ def before_all(context): context.keyfile = os.path.join(context.pctl.output_dir, 'patroni.key') context.certfile = os.path.join(context.pctl.output_dir, 'patroni.crt') try: + if sys.platform == 'darwin' and 'GITHUB_ACTIONS' in os.environ: + raise Exception with open(os.devnull, 'w') as null: ret = subprocess.call(['openssl', 'req', '-nodes', '-new', '-x509', '-subj', '/CN=batman.patroni', '-addext', 'subjectAltName=IP:127.0.0.1', '-keyout', context.keyfile, From bbddca6a76bac41ccd5fe142bc198fa7d42fa1ed Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 6 Dec 2023 15:55:51 +0100 Subject: [PATCH 24/33] Use consistent read when fetching just updated sync key (#2974) Consul doesn't provide any interface to immediately get `ModifyIndex` for the key that we just updated, therefore we have to perform an explicit read operation. By default stale reads are allowed and sometimes we may read stale data. As a result write_sync_state() call was considered as failed. To mitigate the problem we switch to `consistent` reads when that executed after update of the `/sync` key. Close #2972 --- patroni/dcs/consul.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patroni/dcs/consul.py b/patroni/dcs/consul.py index 27cab7783..fe66c6d81 100644 --- a/patroni/dcs/consul.py +++ b/patroni/dcs/consul.py @@ -666,7 +666,7 @@ def set_sync_state_value(self, value: str, version: Optional[int] = None) -> Uni if ret: # We have no other choise, only read after write :( if not retry.ensure_deadline(0.5): return False - _, ret = self.retry(self._client.kv.get, self.sync_path) + _, ret = self.retry(self._client.kv.get, self.sync_path, consistency='consistent') if ret and (ret.get('Value') or b'').decode('utf-8') == value: return ret['ModifyIndex'] return False From efdedc7049527117b35849dc5c01525d7f1133e8 Mon Sep 17 00:00:00 2001 From: Polina Bungina <27892524+hughcapet@users.noreply.github.com> Date: Wed, 6 Dec 2023 15:57:05 +0100 Subject: [PATCH 25/33] Reload postgres config if a server param was reset (#2975) Fix the case when a parameter value was changed and then reset back to the initial value without restart - before this fix, the second change was not reflected in the Postgres config. This commit also includes the related unit test refactoring. --- patroni/postgresql/config.py | 6 ++ patroni/utils.py | 21 ++++--- tests/__init__.py | 56 +++++++++++------ tests/test_postgresql.py | 116 ++++++++++++++++++++++++++++------- 4 files changed, 150 insertions(+), 49 deletions(-) diff --git a/patroni/postgresql/config.py b/patroni/postgresql/config.py index 271bbdfec..7dfa89344 100644 --- a/patroni/postgresql/config.py +++ b/patroni/postgresql/config.py @@ -1098,6 +1098,12 @@ def reload_config(self, config: Dict[str, Any], sighup: bool = False) -> None: local_connection_address_changed = True else: logger.info('Changed %s from %s to %s', r[0], r[1], new_value) + elif r[0] in self._server_parameters \ + and not compare_values(r[3], r[2], r[1], self._server_parameters[r[0]]): + # Check if any parameter was set back to the current pg_settings value + # We can use pg_settings value here, as it is proved to be equal to new_value + logger.info('Changed %s from %s to %s', r[0], self._server_parameters[r[0]], r[1]) + conf_changed = True for param, value in changes.items(): if '.' in param: # Check that user-defined-paramters have changed (parameters with period in name) diff --git a/patroni/utils.py b/patroni/utils.py index 23f419e5c..2b1ded5b8 100644 --- a/patroni/utils.py +++ b/patroni/utils.py @@ -400,22 +400,23 @@ def parse_real(value: Any, base_unit: Optional[str] = None) -> Optional[float]: return convert_to_base_unit(val, unit, base_unit) -def compare_values(vartype: str, unit: Optional[str], old_value: Any, new_value: Any) -> bool: - """Check if *old_value* and *new_value* are equivalent after parsing them as *vartype*. +def compare_values(vartype: str, unit: Optional[str], settings_value: Any, config_value: Any) -> bool: + """Check if the value from ``pg_settings`` and from Patroni config are equivalent after parsing them as *vartype*. - :param vartpe: the target type to parse *old_value* and *new_value* before comparing them. Accepts any among of the - following (case sensitive): + :param vartype: the target type to parse *settings_value* and *config_value* before comparing them. + Accepts any among of the following (case sensitive): * ``bool``: parse values using :func:`parse_bool`; or * ``integer``: parse values using :func:`parse_int`; or * ``real``: parse values using :func:`parse_real`; or * ``enum``: parse values as lowercase strings; or * ``string``: parse values as strings. This one is used by default if no valid value is passed as *vartype*. - :param unit: base unit to be used as argument when calling :func:`parse_int` or :func:`parse_real` for *new_value*. - :param old_value: value to be compared with *new_value*. - :param new_value: value to be compared with *old_value*. + :param unit: base unit to be used as argument when calling :func:`parse_int` or :func:`parse_real` + for *config_value*. + :param settings_value: value to be compared with *config_value*. + :param config_value: value to be compared with *settings_value*. - :returns: ``True`` if *old_value* is equivalent to *new_value* when both are parsed as *vartype*. + :returns: ``True`` if *settings_value* is equivalent to *config_value* when both are parsed as *vartype*. :Example: @@ -455,8 +456,8 @@ def compare_values(vartype: str, unit: Optional[str], old_value: Any, new_value: } converter = converters.get(vartype) or converters['string'] - old_converted = converter(old_value, None) - new_converted = converter(new_value, unit) + old_converted = converter(settings_value, None) + new_converted = converter(config_value, unit) return old_converted is not None and new_converted is not None and old_converted == new_converted diff --git a/tests/__init__.py b/tests/__init__.py index bd70ba3d8..b013e4e16 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -25,8 +25,41 @@ class SleepException(Exception): 'max_wal_senders', 'max_worker_processes', 'port', 'search_path', 'shared_preload_libraries', 'stats_temp_directory', 'synchronous_standby_names', 'track_commit_timestamp', 'unix_socket_directories', 'vacuum_cost_delay', 'vacuum_cost_limit', 'wal_keep_size', 'wal_level', 'wal_log_hints', 'zero_damaged_pages', + 'autovacuum', 'wal_segment_size', 'wal_block_size', 'shared_buffers', 'wal_buffers', }) +GET_PG_SETTINGS_RESULT = [ + ('wal_segment_size', '2048', '8kB', 'integer', 'internal'), + ('wal_block_size', '8192', None, 'integer', 'internal'), + ('shared_buffers', '16384', '8kB', 'integer', 'postmaster'), + ('wal_buffers', '-1', '8kB', 'integer', 'postmaster'), + ('max_connections', '100', None, 'integer', 'postmaster'), + ('max_prepared_transactions', '200', None, 'integer', 'postmaster'), + ('max_worker_processes', '8', None, 'integer', 'postmaster'), + ('max_locks_per_transaction', '64', None, 'integer', 'postmaster'), + ('max_wal_senders', '5', None, 'integer', 'postmaster'), + ('search_path', 'public', None, 'string', 'user'), + ('port', '5432', None, 'integer', 'postmaster'), + ('listen_addresses', '127.0.0.2, 127.0.0.3', None, 'string', 'postmaster'), + ('autovacuum', 'on', None, 'bool', 'sighup'), + ('unix_socket_directories', '/tmp', None, 'string', 'postmaster'), + ('shared_preload_libraries', 'citus', None, 'string', 'postmaster'), + ('wal_keep_size', '128', 'MB', 'integer', 'sighup'), + ('cluster_name', 'batman', None, 'string', 'postmaster'), + ('vacuum_cost_delay', '200', 'ms', 'real', 'user'), + ('vacuum_cost_limit', '-1', None, 'integer', 'user'), + ('max_stack_depth', '2048', 'kB', 'integer', 'superuser'), + ('constraint_exclusion', '', None, 'enum', 'user'), + ('force_parallel_mode', '1', None, 'enum', 'user'), + ('zero_damaged_pages', 'off', None, 'bool', 'superuser'), + ('stats_temp_directory', '/tmp', None, 'string', 'sighup'), + ('track_commit_timestamp', 'off', None, 'bool', 'postmaster'), + ('wal_log_hints', 'on', None, 'bool', 'superuser'), + ('hot_standby', 'on', None, 'bool', 'superuser'), + ('max_replication_slots', '5', None, 'integer', 'superuser'), + ('wal_level', 'logical', None, 'enum', 'superuser'), +] + class MockResponse(object): @@ -133,22 +166,9 @@ def execute(self, sql, *params): ('archive_command', 'my archive command'), ('cluster_name', 'my_cluster')] elif sql.startswith('SELECT name, setting'): - self.results = [('wal_segment_size', '2048', '8kB', 'integer', 'internal'), - ('wal_block_size', '8192', None, 'integer', 'internal'), - ('shared_buffers', '16384', '8kB', 'integer', 'postmaster'), - ('wal_buffers', '-1', '8kB', 'integer', 'postmaster'), - ('max_connections', '100', None, 'integer', 'postmaster'), - ('max_prepared_transactions', '0', None, 'integer', 'postmaster'), - ('max_worker_processes', '8', None, 'integer', 'postmaster'), - ('max_locks_per_transaction', '64', None, 'integer', 'postmaster'), - ('max_wal_senders', '5', None, 'integer', 'postmaster'), - ('search_path', 'public', None, 'string', 'user'), - ('port', '5433', None, 'integer', 'postmaster'), - ('listen_addresses', '*', None, 'string', 'postmaster'), - ('autovacuum', 'on', None, 'bool', 'sighup'), - ('unix_socket_directories', '/tmp', None, 'string', 'postmaster')] + self.results = GET_PG_SETTINGS_RESULT elif sql.startswith('SELECT COUNT(*) FROM pg_catalog.pg_settings'): - self.results = [(1,)] + self.results = [(0,)] elif sql.startswith('IDENTIFY_SYSTEM'): self.results = [('1', 3, '0/402EEC0', '')] elif sql.startswith('TIMELINE_HISTORY '): @@ -218,11 +238,11 @@ class PostgresInit(unittest.TestCase): _PARAMETERS = {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'f.oo': 'bar', 'search_path': 'public', 'hot_standby': 'on', 'max_wal_senders': 5, 'wal_keep_segments': 8, 'wal_log_hints': 'on', 'max_locks_per_transaction': 64, - 'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 0, + 'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 200, 'track_commit_timestamp': 'off', 'unix_socket_directories': '/tmp', - 'trigger_file': 'bla', 'stats_temp_directory': '/tmp', 'zero_damaged_pages': '', + 'trigger_file': 'bla', 'stats_temp_directory': '/tmp', 'zero_damaged_pages': 'off', 'force_parallel_mode': '1', 'constraint_exclusion': '', - 'max_stack_depth': 'Z', 'vacuum_cost_limit': -1, 'vacuum_cost_delay': 200} + 'max_stack_depth': 2048, 'vacuum_cost_limit': -1, 'vacuum_cost_delay': 200} @patch('patroni.psycopg._connect', psycopg_connect) @patch('patroni.postgresql.CallbackExecutor', Mock()) diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py index 31454479e..8a5c491b8 100644 --- a/tests/test_postgresql.py +++ b/tests/test_postgresql.py @@ -5,6 +5,7 @@ import subprocess import time +from copy import deepcopy from mock import Mock, MagicMock, PropertyMock, patch, mock_open import patroni.psycopg as psycopg @@ -25,7 +26,8 @@ from patroni.utils import RetryFailedError from threading import Thread, current_thread -from . import BaseTestPostgresql, MockCursor, MockPostmaster, psycopg_connect, mock_available_gucs +from . import (BaseTestPostgresql, MockCursor, MockPostmaster, psycopg_connect, mock_available_gucs, + GET_PG_SETTINGS_RESULT) mtime_ret = {} @@ -559,31 +561,103 @@ def test_replica_method_can_work_without_replication_connection(self): @patch('time.sleep', Mock()) @patch.object(Postgresql, 'is_running', Mock(return_value=True)) - def test_reload_config(self): - parameters = self._PARAMETERS.copy() - parameters.pop('f.oo') - parameters['wal_buffers'] = '512' - config = {'pg_hba': [''], 'pg_ident': [''], 'use_unix_socket': True, 'use_unix_socket_repl': True, - 'authentication': {}, - 'retry_timeout': 10, 'listen': '*', 'krbsrvname': 'postgres', 'parameters': parameters} + @patch('patroni.postgresql.config.logger.info') + @patch('patroni.postgresql.config.logger.warning') + def test_reload_config(self, mock_warning, mock_info): + config = deepcopy(self.p.config._config) + + # Nothing changed + self.p.reload_config(config) + mock_info.assert_called_once_with('No PostgreSQL configuration items changed, nothing to reload.') + mock_warning.assert_not_called() + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # Handle wal_buffers + self.p.config._config['parameters']['wal_buffers'] = '512' self.p.reload_config(config) - parameters['b.ar'] = 'bar' - with patch.object(MockCursor, 'fetchall', - Mock(side_effect=[[('wal_block_size', '8191', None, 'integer', 'internal'), - ('wal_segment_size', '2048', '8kB', 'integer', 'internal'), - ('shared_buffers', '16384', '8kB', 'integer', 'postmaster'), - ('wal_buffers', '-1', '8kB', 'integer', 'postmaster'), - ('port', '5433', None, 'integer', 'postmaster')], Exception])): + mock_info.assert_called_once_with('No PostgreSQL configuration items changed, nothing to reload.') + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + config = deepcopy(self.p.config._config) + + # hba/ident_changed + config['pg_hba'] = [''] + config['pg_ident'] = [''] + self.p.reload_config(config) + mock_info.assert_called_once_with('Reloading PostgreSQL configuration.') + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # Postmaster parameter change (pending_restart) + init_max_worker_processes = config['parameters']['max_worker_processes'] + config['parameters']['max_worker_processes'] *= 2 + with patch('patroni.postgresql.Postgresql._query', Mock(side_effect=[GET_PG_SETTINGS_RESULT, [(1,)]])): self.p.reload_config(config) - parameters['autovacuum'] = 'on' + self.assertEqual(mock_info.call_args_list[0][0], ('Changed %s from %s to %s (restart might be required)', + 'max_worker_processes', str(init_max_worker_processes), + config['parameters']['max_worker_processes'])) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, True) + + mock_info.reset_mock() + + # Reset to the initial value without restart + config['parameters']['max_worker_processes'] = init_max_worker_processes + self.p.reload_config(config) + self.assertEqual(mock_info.call_args_list[0][0], ('Changed %s from %s to %s', 'max_worker_processes', + init_max_worker_processes * 2, + str(config['parameters']['max_worker_processes']))) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # User-defined parameter changed (removed) + config['parameters'].pop('f.oo') self.p.reload_config(config) - parameters['autovacuum'] = 'off' - parameters.pop('search_path') - config['listen'] = '*:5433' + self.assertEqual(mock_info.call_args_list[0][0], ('Changed %s from %s to %s', 'f.oo', 'bar', None)) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # Non-postmaster parameter change + config['parameters']['autovacuum'] = 'off' self.p.reload_config(config) - parameters['unix_socket_directories'] = '.' + self.assertEqual(mock_info.call_args_list[0][0], ("Changed %s from %s to %s", 'autovacuum', 'on', 'off')) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, False) + + config['parameters']['autovacuum'] = 'on' + mock_info.reset_mock() + + # Remove invalid parameter + config['parameters']['invalid'] = 'value' self.p.reload_config(config) - self.p.config.resolve_connection_addresses() + self.assertEqual(mock_warning.call_args_list[0][0], + ('Removing invalid parameter `%s` from postgresql.parameters', 'invalid')) + config['parameters'].pop('invalid') + + mock_warning.reset_mock() + mock_info.reset_mock() + + # Non-empty result (outside changes) and exception while querying pending_restart parameters + with patch('patroni.postgresql.Postgresql._query', + Mock(side_effect=[GET_PG_SETTINGS_RESULT, [(1,)], GET_PG_SETTINGS_RESULT, Exception])): + self.p.reload_config(config, True) + self.assertEqual(mock_info.call_args_list[0][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, True) + + # Invalid values, just to increase silly coverage in postgresql.validator. + # One day we will have proper tests there. + config['parameters']['autovacuum'] = 'of' # Bool.transform() + config['parameters']['vacuum_cost_limit'] = 'smth' # Number.transform() + self.p.reload_config(config, True) + self.assertEqual(mock_warning.call_args_list[-1][0][0], 'Exception %r when running query') def test_resolve_connection_addresses(self): self.p.config._config['use_unix_socket'] = self.p.config._config['use_unix_socket_repl'] = True From f0719d148c54dfc6a73ef848025e9d162d9c2d39 Mon Sep 17 00:00:00 2001 From: Polina Bungina <27892524+hughcapet@users.noreply.github.com> Date: Wed, 13 Dec 2023 08:40:47 +0100 Subject: [PATCH 26/33] Actually allow failover to an async candidate in sync mode (#2980) --- patroni/ctl.py | 2 +- tests/test_ctl.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/patroni/ctl.py b/patroni/ctl.py index 3e981b459..3a0d2a177 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -1272,7 +1272,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i config.is_synchronous_mode, not cluster.sync.is_empty, not cluster.sync.matches(candidate, True))): - if click.confirm(f'Are you sure you want to failover to the asynchronous node {candidate}'): + if not click.confirm(f'Are you sure you want to failover to the asynchronous node {candidate}?'): raise PatroniCtlException('Aborting ' + action) scheduled_at_str = None diff --git a/tests/test_ctl.py b/tests/test_ctl.py index a174b03d9..badbeca1a 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -226,18 +226,20 @@ def test_failover(self): self.assertIn('Supplying a leader name using this command is deprecated', result.output) failover_func_mock.assert_called_once_with('switchover', 'dummy', None, 'leader', None, False) - # Failover to an async member in sync mode (confirm) cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) cluster.members.append(Member(0, 'async', 28, {'api_url': 'http://127.0.0.1:8012/patroni'})) cluster.config.data['synchronous_mode'] = True with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=cluster)): + # Failover to an async member in sync mode (confirm) result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='y\ny') self.assertIn('Are you sure you want to failover to the asynchronous node async', result.output) + self.assertEqual(result.exit_code, 0) - # Failover to an async member in sync mode (abort) - result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='N') - self.assertEqual(result.exit_code, 1) + # Failover to an async member in sync mode (abort) + result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='N') + self.assertEqual(result.exit_code, 1) + self.assertIn('Aborting failover', result.output) @patch('patroni.dynamic_loader.iter_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) def test_get_dcs(self): From c1ee99d81da8b35e370350a433c5f5c4889baff5 Mon Sep 17 00:00:00 2001 From: Polina Bungina <27892524+hughcapet@users.noreply.github.com> Date: Mon, 18 Dec 2023 10:44:05 +0100 Subject: [PATCH 27/33] Update PG version in a couple of places (#2986) * All dockerfiles to use PG16 by default * PGVERSION env in the test pipelines to 16.1-1 by default * 11->14 in the dcs-pg mapping for test pipelines * Code comments fixes --- .github/workflows/install_deps.py | 2 +- .github/workflows/mapping.py | 2 +- .github/workflows/run_tests.py | 2 +- .github/workflows/tests.yaml | 2 +- Dockerfile | 2 +- Dockerfile.citus | 4 ++-- kubernetes/Dockerfile | 2 +- kubernetes/Dockerfile.citus | 8 ++++---- patroni/postgresql/postmaster.py | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/install_deps.py b/.github/workflows/install_deps.py index 6480f66a3..b089bde07 100644 --- a/.github/workflows/install_deps.py +++ b/.github/workflows/install_deps.py @@ -110,7 +110,7 @@ def install_etcd(): def install_postgres(): - version = os.environ.get('PGVERSION', '15.1-1') + version = os.environ.get('PGVERSION', '16.1-1') platform = {'darwin': 'osx', 'win32': 'windows-x64', 'cygwin': 'windows-x64'}[sys.platform] if platform == 'osx': return subprocess.call(['brew', 'install', 'expect', 'postgresql@{0}'.format(version.split('.')[0])]) diff --git a/.github/workflows/mapping.py b/.github/workflows/mapping.py index f75efec43..279438b04 100644 --- a/.github/workflows/mapping.py +++ b/.github/workflows/mapping.py @@ -1 +1 @@ -versions = {'etcd': '9.6', 'etcd3': '16', 'consul': '13', 'exhibitor': '12', 'raft': '11', 'kubernetes': '15'} +versions = {'etcd': '9.6', 'etcd3': '16', 'consul': '13', 'exhibitor': '12', 'raft': '14', 'kubernetes': '15'} diff --git a/.github/workflows/run_tests.py b/.github/workflows/run_tests.py index 9a078e4f7..cece186f4 100644 --- a/.github/workflows/run_tests.py +++ b/.github/workflows/run_tests.py @@ -30,7 +30,7 @@ def main(): unbuffer = ['timeout', '900', 'unbuffer'] else: if sys.platform == 'darwin': - version = os.environ.get('PGVERSION', '15.1-1') + version = os.environ.get('PGVERSION', '16.1-1') path = '/usr/local/opt/postgresql@{0}/bin:.'.format(version.split('.')[0]) unbuffer = ['unbuffer'] else: diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index aafe6b56f..f1f55e2a5 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -85,7 +85,7 @@ jobs: env: DCS: ${{ matrix.dcs }} ETCDVERSION: 3.4.23 - PGVERSION: 15.1-1 # for windows and macos + PGVERSION: 16.1-1 # for windows and macos strategy: fail-fast: false matrix: diff --git a/Dockerfile b/Dockerfile index b74cdf240..d4fddfea5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ## This Dockerfile is meant to aid in the building and debugging patroni whilst developing on your local machine ## It has all the necessary components to play/debug with a single node appliance, running etcd -ARG PG_MAJOR=15 +ARG PG_MAJOR=16 ARG COMPRESS=false ARG PGHOME=/home/postgres ARG PGDATA=$PGHOME/data diff --git a/Dockerfile.citus b/Dockerfile.citus index 5f0164b4b..6f02215b3 100644 --- a/Dockerfile.citus +++ b/Dockerfile.citus @@ -1,6 +1,6 @@ ## This Dockerfile is meant to aid in the building and debugging patroni whilst developing on your local machine ## It has all the necessary components to play/debug with a single node appliance, running etcd -ARG PG_MAJOR=15 +ARG PG_MAJOR=16 ARG COMPRESS=false ARG PGHOME=/home/postgres ARG PGDATA=$PGHOME/data @@ -40,7 +40,7 @@ RUN set -ex \ echo "deb [signed-by=/etc/apt/trusted.gpg.d/citusdata_community.gpg] https://packagecloud.io/citusdata/community/debian/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/citusdata_community.list \ && curl -sL https://packagecloud.io/citusdata/community/gpgkey | gpg --dearmor > /etc/apt/trusted.gpg.d/citusdata_community.gpg \ && apt-get update -y \ - && apt-get -y install postgresql-$PG_MAJOR-citus-11.3; \ + && apt-get -y install postgresql-$PG_MAJOR-citus-12.1; \ fi \ \ # Cleanup all locales but en_US.UTF-8 diff --git a/kubernetes/Dockerfile b/kubernetes/Dockerfile index 29a683bd5..e41bf1cd9 100644 --- a/kubernetes/Dockerfile +++ b/kubernetes/Dockerfile @@ -1,4 +1,4 @@ -FROM postgres:15 +FROM postgres:16 LABEL maintainer="Alexander Kukushkin " RUN export DEBIAN_FRONTEND=noninteractive \ diff --git a/kubernetes/Dockerfile.citus b/kubernetes/Dockerfile.citus index f9564521d..7af9e5ae9 100644 --- a/kubernetes/Dockerfile.citus +++ b/kubernetes/Dockerfile.citus @@ -1,4 +1,4 @@ -FROM postgres:15 +FROM postgres:16 LABEL maintainer="Alexander Kukushkin " RUN export DEBIAN_FRONTEND=noninteractive \ @@ -11,7 +11,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ ## Make sure we have a en_US.UTF-8 locale available && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \ && if [ $(dpkg --print-architecture) = 'arm64' ]; then \ - apt-get install -y postgresql-server-dev-15 \ + apt-get install -y postgresql-server-dev-16 \ gcc make autoconf \ libc6-dev flex libcurl4-gnutls-dev \ libicu-dev libkrb5-dev liblz4-dev \ @@ -24,7 +24,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ echo "deb [signed-by=/etc/apt/trusted.gpg.d/citusdata_community.gpg] https://packagecloud.io/citusdata/community/debian/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/citusdata_community.list \ && curl -sL https://packagecloud.io/citusdata/community/gpgkey | gpg --dearmor > /etc/apt/trusted.gpg.d/citusdata_community.gpg \ && apt-get update -y \ - && apt-get -y install postgresql-15-citus-12.0; \ + && apt-get -y install postgresql-16-citus-12.1; \ fi \ && pip3 install --break-system-packages setuptools \ && pip3 install --break-system-packages 'git+https://github.com/zalando/patroni.git#egg=patroni[kubernetes]' \ @@ -38,7 +38,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ && chmod 664 /etc/passwd \ # Clean up && apt-get remove -y git python3-pip python3-wheel \ - postgresql-server-dev-15 gcc make autoconf \ + postgresql-server-dev-16 gcc make autoconf \ libc6-dev flex libicu-dev libkrb5-dev liblz4-dev \ libpam0g-dev libreadline-dev libselinux1-dev libssl-dev libxslt1-dev libzstd-dev uuid-dev \ && apt-get autoremove -y \ diff --git a/patroni/postgresql/postmaster.py b/patroni/postgresql/postmaster.py index 4505e7f7d..97eb10e4d 100644 --- a/patroni/postgresql/postmaster.py +++ b/patroni/postgresql/postmaster.py @@ -176,7 +176,7 @@ def pg_ctl_kill(self, mode: str, pg_ctl: str) -> Optional[bool]: return not self.is_running() def wait_for_user_backends_to_close(self, stop_timeout: Optional[float]) -> None: - # These regexps are cross checked against versions PostgreSQL 9.1 .. 15 + # These regexps are cross checked against versions PostgreSQL 9.1 .. 16 aux_proc_re = re.compile("(?:postgres:)( .*:)? (?:(?:archiver|startup|autovacuum launcher|autovacuum worker|" "checkpointer|logger|stats collector|wal receiver|wal writer|writer)(?: process )?|" "walreceiver|wal sender process|walsender|walwriter|background writer|" From 206ee91b07eed4c405cfabb724105df974038a99 Mon Sep 17 00:00:00 2001 From: Polina Bungina <27892524+hughcapet@users.noreply.github.com> Date: Wed, 20 Dec 2023 09:54:04 +0100 Subject: [PATCH 28/33] Exclude leader from failover candidates in ctl (#2983) Exclude actual leader (not the passed leader argument) from the candidates list in the `patronictl failover` prompt. Abort `patronictl failover` execution if candidate specified is the same as the current cluster leader --- patroni/ctl.py | 36 +++++++++++++++++++----------------- tests/test_ctl.py | 8 +++++++- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/patroni/ctl.py b/patroni/ctl.py index 3a0d2a177..de1fe892c 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -1185,7 +1185,7 @@ def reinit(cluster_name: str, group: Optional[int], member_names: List[str], for def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[int], - leader: Optional[str], candidate: Optional[str], + switchover_leader: Optional[str], candidate: Optional[str], force: bool, scheduled: Optional[str] = None) -> None: """Perform a failover or a switchover operation in the cluster. @@ -1199,7 +1199,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a failover or switchover. If ``None``, user will be prompted for filling it -- unless *force* is ``True``, in which case an exception is raised. - :param leader: name of the current leader member. + :param switchover_leader: name of the leader member passed as switchover option. :param candidate: name of a standby member to be promoted. Nodes that are tagged with ``nofailover`` cannot be used. :param force: perform the failover or switchover without asking for confirmations. :param scheduled: timestamp when the switchover should be scheduled to occur. If ``now`` perform immediately. @@ -1208,10 +1208,11 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i :class:`PatroniCtlException`: if: * Patroni is running on a Citus cluster, but no *group* was specified; or * a switchover was requested by the cluster has no leader; or - * *leader* does not match the current leader of the cluster; or + * *switchover_leader* does not match the current leader of the cluster; or * cluster has no candidates available for the operation; or * no *candidate* is given for a failover operation; or - * *leader* and *candidate* are the same; or + * current leader and *candidate* are the same; or + * *candidate* is tagged as nofailover; or * *candidate* is not a member of the cluster; or * trying to schedule a switchover in a cluster that is in maintenance mode; or * user aborts the operation. @@ -1231,23 +1232,24 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i config = global_config.from_cluster(cluster) + cluster_leader = cluster.leader and cluster.leader.name # leader has to be be defined for switchover only if action == 'switchover': - if cluster.leader is None or not cluster.leader.name: + if not cluster_leader: raise PatroniCtlException('This cluster has no leader') - if leader is None: + if switchover_leader is None: if force: - leader = cluster.leader.name + switchover_leader = cluster_leader else: prompt = 'Standby Leader' if config.is_standby_cluster else 'Primary' - leader = click.prompt(prompt, type=str, default=(cluster.leader and cluster.leader.name)) + switchover_leader = click.prompt(prompt, type=str, default=cluster_leader) - if cluster.leader.name != leader: - raise PatroniCtlException(f'Member {leader} is not the leader of cluster {cluster_name}') + if cluster_leader != switchover_leader: + raise PatroniCtlException(f'Member {switchover_leader} is not the leader of cluster {cluster_name}') # excluding members with nofailover tag - candidate_names = [str(m.name) for m in cluster.members if m.name != leader and not m.nofailover] + candidate_names = [str(m.name) for m in cluster.members if m.name != cluster_leader and not m.nofailover] # We sort the names for consistent output to the client candidate_names.sort() @@ -1260,10 +1262,10 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i if action == 'failover' and not candidate: raise PatroniCtlException('Failover could be performed only to a specific candidate') - if candidate == leader: - raise PatroniCtlException(action.title() + ' target and source are the same.') - if candidate and candidate not in candidate_names: + if candidate == cluster_leader: + raise PatroniCtlException( + f'Member {candidate} is already the leader of cluster {cluster_name}') raise PatroniCtlException( f'Member {candidate} does not exist in cluster {cluster_name} or is tagged as nofailover') @@ -1292,7 +1294,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i failover_value = {'candidate': candidate} if action == 'switchover': - failover_value['leader'] = leader + failover_value['leader'] = switchover_leader if scheduled_at_str: failover_value['scheduled_at'] = scheduled_at_str @@ -1300,7 +1302,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i # By now we have established that the leader exists and the candidate exists if not force: - demote_msg = f', demoting current leader {cluster.leader.name}' if cluster.leader else '' + demote_msg = f', demoting current leader {cluster_leader}' if cluster_leader else '' if scheduled_at_str: # only switchover can be scheduled if not click.confirm(f'Are you sure you want to schedule switchover of cluster ' @@ -1334,7 +1336,7 @@ def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[i logging.exception(r) logging.warning('Failing over to DCS') click.echo('{0} Could not {1} using Patroni api, falling back to DCS'.format(timestamp(), action)) - dcs.manual_failover(leader, candidate, scheduled_at=scheduled_at) + dcs.manual_failover(switchover_leader, candidate, scheduled_at=scheduled_at) output_members(cluster, cluster_name, group=group) diff --git a/tests/test_ctl.py b/tests/test_ctl.py index badbeca1a..f9ee62ced 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -157,7 +157,8 @@ def test_switchover(self): # Target and source are equal result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nleader\n\ny') self.assertEqual(result.exit_code, 1) - self.assertIn('Switchover target and source are the same', result.output) + self.assertIn("Candidate ['other']", result.output) + self.assertIn('Member leader is already the leader of cluster dummy', result.output) # Candidate is not a member of the cluster result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nReality\n\ny') @@ -220,6 +221,11 @@ def test_failover(self): result = self.runner.invoke(ctl, ['failover', 'dummy'], input='0\n') self.assertIn('Failover could be performed only to a specific candidate', result.output) + # Candidate is the same as the leader + result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0'], input='leader\n') + self.assertIn("Candidate ['other']", result.output) + self.assertIn('Member leader is already the leader of cluster dummy', result.output) + # Temp test to check a fallback to switchover if leader is specified with patch('patroni.ctl._do_failover_or_switchover') as failover_func_mock: result = self.runner.invoke(ctl, ['failover', '--leader', 'leader', 'dummy'], input='0\n') From 5c3e1a693e2217e990ae42cd27c1d9795df38d68 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 20 Dec 2023 10:49:33 +0100 Subject: [PATCH 29/33] Implement validation of the `log` section (#2989) Somehow it was always forgotten. --- patroni/validator.py | 12 ++++++++++++ tests/test_validator.py | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/patroni/validator.py b/patroni/validator.py index d0b168be6..bd69cc0e7 100644 --- a/patroni/validator.py +++ b/patroni/validator.py @@ -937,6 +937,18 @@ def validate_watchdog_mode(value: Any) -> None: schema = Schema({ "name": str, "scope": str, + Optional("log"): { + Optional("level"): EnumValidator(('DEBUG', 'INFO', 'WARN', 'WARNING', 'ERROR', 'FATAL', 'CRITICAL'), + case_sensitive=True, raise_assert=True), + Optional("traceback_level"): EnumValidator(('DEBUG', 'ERROR'), raise_assert=True), + Optional("format"): str, + Optional("dateformat"): str, + Optional("max_queue_size"): int, + Optional("dir"): str, + Optional("file_num"): int, + Optional("file_size"): int, + Optional("loggers"): dict + }, Optional("ctl"): { Optional("insecure"): bool, Optional("cacert"): str, diff --git a/tests/test_validator.py b/tests/test_validator.py index 1f647dbe6..d5e82992d 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -13,6 +13,20 @@ config = { "name": "string", "scope": "string", + "log": { + "level": "DEBUG", + "traceback_level": "DEBUG", + "format": "%(asctime)s %(levelname)s: %(message)s", + "dateformat": "%Y-%m-%d %H:%M:%S", + "max_queue_size": 100, + "dir": "/tmp", + "file_num": 10, + "file_size": 1000000, + "loggers": { + "patroni.postmaster": "WARNING", + "urllib3": "DEBUG" + } + }, "restapi": { "listen": "127.0.0.2:800", "connect_address": "127.0.0.2:800", From bcfd8438a50a1f36b65be76ee2d17d1da3b903e3 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Thu, 21 Dec 2023 08:58:26 +0100 Subject: [PATCH 30/33] Abstract CitusHandler and decouple it from configuration (#2950) the main issue was that the configuration for Citus handler and for DCS existed in two places, while ideally AbstractDCS should not know many details about what kind of MPP is in use. To solve the problem we first dynamically create an object implementing AbstractMPP interfaces, which is a configuration for DCS. Later this object is used to instantiate the class implementing AbstractMPPHandler interface. This is just a starting point, which does some heavy lifting. As a next steps all kind of variables named after Citus in files different from patroni/postgres/mpp/citus.py should be renamed. In other words this commit takes over the most complex part of #2940, which was never implemented. Co-authored-by: zhjwpku --- patroni/__main__.py | 2 +- patroni/api.py | 4 +- patroni/ctl.py | 8 +- patroni/dcs/__init__.py | 33 +-- patroni/dcs/consul.py | 9 +- patroni/dcs/etcd.py | 13 +- patroni/dcs/etcd3.py | 10 +- patroni/dcs/exhibitor.py | 5 +- patroni/dcs/kubernetes.py | 20 +- patroni/dcs/raft.py | 10 +- patroni/dcs/zookeeper.py | 10 +- patroni/ha.py | 4 +- patroni/postgresql/__init__.py | 6 +- patroni/postgresql/mpp/__init__.py | 296 ++++++++++++++++++++++++++ patroni/postgresql/{ => mpp}/citus.py | 103 ++++++--- patroni/postgresql/slots.py | 2 +- tests/__init__.py | 36 ++-- tests/test_citus.py | 34 ++- tests/test_consul.py | 22 +- tests/test_ctl.py | 11 +- tests/test_etcd.py | 11 +- tests/test_etcd3.py | 10 +- tests/test_exhibitor.py | 6 +- tests/test_ha.py | 22 +- tests/test_kubernetes.py | 17 +- tests/test_mpp.py | 52 +++++ tests/test_raft.py | 21 +- tests/test_zookeeper.py | 10 +- 28 files changed, 602 insertions(+), 185 deletions(-) create mode 100644 patroni/postgresql/mpp/__init__.py rename patroni/postgresql/{ => mpp}/citus.py (84%) create mode 100644 tests/test_mpp.py diff --git a/patroni/__main__.py b/patroni/__main__.py index 229ccfb9b..2c253da44 100644 --- a/patroni/__main__.py +++ b/patroni/__main__.py @@ -68,7 +68,7 @@ def __init__(self, config: 'Config') -> None: self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() - self.postgresql = Postgresql(self.config['postgresql']) + self.postgresql = Postgresql(self.config['postgresql'], self.dcs.mpp) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) diff --git a/patroni/api.py b/patroni/api.py index 5761d359e..5c9d66030 100644 --- a/patroni/api.py +++ b/patroni/api.py @@ -1153,8 +1153,8 @@ def do_POST_switchover(self) -> None: def do_POST_citus(self) -> None: """Handle a ``POST`` request to ``/citus`` path. - Call :func:`~patroni.postgresql.CitusHandler.handle_event` to handle the request, then write a response with - HTTP status code ``200``. + Call :func:`~patroni.postgresql.mpp.AbstractMPPHandler.handle_event` to handle the request, + then write a response with HTTP status code ``200``. .. note:: If unable to parse the request body, then the request is silently discarded. diff --git a/patroni/ctl.py b/patroni/ctl.py index de1fe892c..6a5fe0e66 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -51,6 +51,7 @@ from .dcs import get_dcs as _get_dcs, AbstractDCS, Cluster, Member from .exceptions import PatroniException from .postgresql.misc import postgres_version_to_int +from .postgresql.mpp import get_mpp from .utils import cluster_as_json, patch_config, polling_loop from .request import PatroniRequest from .version import __version__ @@ -313,7 +314,7 @@ def ctl(ctx: click.Context, config_file: str, dcs_url: Optional[str], insecure: config = load_config(config_file, dcs_url) # backward compatibility for configuration file where ctl section is not defined config.setdefault('ctl', {})['insecure'] = config.get('ctl', {}).get('insecure') or insecure - ctx.obj = {'__config': config} + ctx.obj = {'__config': config, '__mpp': get_mpp(config)} def is_citus_cluster() -> bool: @@ -321,7 +322,7 @@ def is_citus_cluster() -> bool: :returns: ``True`` if configuration has ``citus`` section, otherwise ``False``. """ - return bool(_get_configuration().get('citus')) + return click.get_current_context().obj['__mpp'].is_enabled() def get_dcs(scope: str, group: Optional[int]) -> AbstractDCS: @@ -340,12 +341,13 @@ def get_dcs(scope: str, group: Optional[int]) -> AbstractDCS: config = _get_configuration() config.update({'scope': scope, 'patronictl': True}) if group is not None: - config['citus'] = {'group': group} + config['citus'] = {'group': group, 'database': 'postgres'} config.setdefault('name', scope) try: dcs = _get_dcs(config) if is_citus_cluster() and group is None: dcs.is_citus_coordinator = lambda: True + click.get_current_context().obj['__mpp'] = dcs.mpp return dcs except PatroniException as e: raise PatroniCtlException(str(e)) diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index 28c3734fc..a210615bf 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -25,10 +25,9 @@ if TYPE_CHECKING: # pragma: no cover from ..config import Config from ..postgresql import Postgresql + from ..postgresql.mpp import AbstractMPP SLOT_ADVANCE_AVAILABLE_VERSION = 110000 -CITUS_COORDINATOR_GROUP_ID = 0 -citus_group_re = re.compile('^(0|[1-9][0-9]*)$') slot_name_re = re.compile('^[a-z0-9_]{1,63}$') logger = logging.getLogger(__name__) @@ -130,10 +129,9 @@ def get_dcs(config: Union['Config', Dict[str, Any]]) -> 'AbstractDCS': p: config[p] for p in ('namespace', 'name', 'scope', 'loop_wait', 'patronictl', 'ttl', 'retry_timeout') if p in config}) - # From citus section we only need "group" parameter, but will propagate everything just in case. - if isinstance(config.get('citus'), dict): - config[name].update(config['citus']) - return dcs_class(config[name]) + + from patroni.postgresql.mpp import get_mpp + return dcs_class(config[name], get_mpp(config)) available_implementations = ', '.join(sorted([n for n, _ in iter_dcs_classes()])) raise PatroniFatalException("Can not find suitable configuration of distributed configuration store\n" @@ -1338,15 +1336,15 @@ class AbstractDCS(abc.ABC): _SYNC = 'sync' _FAILSAFE = 'failsafe' - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: 'AbstractMPP') -> None: """Prepare DCS paths, Citus group ID, initial values for state information and processing dependencies. :ivar config: :class:`dict`, reference to config section of selected DCS. i.e.: ``zookeeper`` for zookeeper, ``etcd`` for etcd, etc... """ + self._mpp = mpp self._name = config['name'] self._base_path = re.sub('/+', '/', '/'.join(['', config.get('namespace', 'service'), config['scope']])) - self._citus_group = str(config['group']) if isinstance(config.get('group'), int) else None self._set_loop_wait(config.get('loop_wait', 10)) self._ctl = bool(config.get('patronictl', False)) @@ -1359,6 +1357,11 @@ def __init__(self, config: Dict[str, Any]) -> None: self._last_failsafe: Optional[Dict[str, str]] = {} self.event = Event() + @property + def mpp(self) -> 'AbstractMPP': + """Get the effective underlying MPP, if any has been configured.""" + return self._mpp + def client_path(self, path: str) -> str: """Construct the absolute key name from appropriate parts for the DCS type. @@ -1367,8 +1370,8 @@ def client_path(self, path: str) -> str: :returns: absolute key name for the current Patroni cluster. """ components = [self._base_path] - if self._citus_group: - components.append(self._citus_group) + if self._mpp.is_enabled(): + components.append(str(self._mpp.group)) components.append(path.lstrip('/')) return '/'.join(components) @@ -1522,9 +1525,9 @@ def __get_patroni_cluster(self, path: Optional[str] = None) -> Cluster: def is_citus_coordinator(self) -> bool: """:class:`Cluster` instance has a Citus Coordinator group ID. - :returns: ``True`` if the given node is running as Citus Coordinator (``group=0``). + :returns: ``True`` if the given node is running as the MPP Coordinator. """ - return self._citus_group == str(CITUS_COORDINATOR_GROUP_ID) + return self._mpp.is_coordinator() def get_citus_coordinator(self) -> Optional[Cluster]: """Load the Patroni cluster for the Citus Coordinator. @@ -1532,10 +1535,10 @@ def get_citus_coordinator(self) -> Optional[Cluster]: .. note:: This method is only executed on the worker nodes (``group!=0``) to find the coordinator. - :returns: Select :class:`Cluster` instance associated with the Citus Coordinator group ID. + :returns: Select :class:`Cluster` instance associated with the MPP Coordinator group ID. """ try: - return self.__get_patroni_cluster(f'{self._base_path}/{CITUS_COORDINATOR_GROUP_ID}/') + return self.__get_patroni_cluster(f'{self._base_path}/{self._mpp.coordinator_group_id}/') except Exception as e: logger.error('Failed to load Citus coordinator cluster from %s: %r', self.__class__.__name__, e) return None @@ -1549,7 +1552,7 @@ def _get_citus_cluster(self) -> Cluster: groups = self._load_cluster(self._base_path + '/', self._citus_cluster_loader) if TYPE_CHECKING: # pragma: no cover assert isinstance(groups, dict) - cluster = groups.pop(CITUS_COORDINATOR_GROUP_ID, Cluster.empty()) + cluster = groups.pop(self._mpp.coordinator_group_id, Cluster.empty()) cluster.workers.update(groups) return cluster diff --git a/patroni/dcs/consul.py b/patroni/dcs/consul.py index fe66c6d81..19d65306a 100644 --- a/patroni/dcs/consul.py +++ b/patroni/dcs/consul.py @@ -16,8 +16,9 @@ from typing import Any, Callable, Dict, List, Mapping, NamedTuple, Optional, Union, Tuple, TYPE_CHECKING from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, ReturnFalseException, catch_return_false_exception, citus_group_re + TimelineHistory, ReturnFalseException, catch_return_false_exception from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare, parse_bool, Retry, RetryFailedError, split_host_port, uri, USER_AGENT if TYPE_CHECKING: # pragma: no cover from ..config import Config @@ -232,8 +233,8 @@ def replace_char(match: Any) -> str: class Consul(AbstractDCS): - def __init__(self, config: Dict[str, Any]) -> None: - super(Consul, self).__init__(config) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Consul, self).__init__(config, mpp) self._base_path = self._base_path[1:] self._scope = config['scope'] self._session = None @@ -435,7 +436,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: clusters: Dict[int, Dict[str, Cluster]] = defaultdict(dict) for node in results or []: key = node['Key'][len(path):].split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): node['Value'] = (node['Value'] or b'').decode('utf-8') clusters[int(key[0])][key[1]] = node return {group: self._cluster_from_nodes(nodes) for group, nodes in clusters.items()} diff --git a/patroni/dcs/etcd.py b/patroni/dcs/etcd.py index 3be699a64..b9d3e0abc 100644 --- a/patroni/dcs/etcd.py +++ b/patroni/dcs/etcd.py @@ -22,8 +22,9 @@ from urllib3.exceptions import HTTPError, ReadTimeoutError, ProtocolError from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, ReturnFalseException, catch_return_false_exception, citus_group_re + TimelineHistory, ReturnFalseException, catch_return_false_exception from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..request import get as requests_get from ..utils import Retry, RetryFailedError, split_host_port, uri, USER_AGENT if TYPE_CHECKING: # pragma: no cover @@ -470,9 +471,9 @@ def _prepare_request(self, kwargs: Dict[str, Any], params: Optional[Dict[str, An class AbstractEtcd(AbstractDCS): - def __init__(self, config: Dict[str, Any], client_cls: Type[AbstractEtcdClientWithFailover], + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP, client_cls: Type[AbstractEtcdClientWithFailover], retry_errors_cls: Union[Type[Exception], Tuple[Type[Exception], ...]]) -> None: - super(AbstractEtcd, self).__init__(config) + super(AbstractEtcd, self).__init__(config, mpp) self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=retry_errors_cls) self._ttl = int(config.get('ttl') or 30) @@ -645,8 +646,8 @@ def wrapper(self: AbstractEtcd, *args: Any, **kwargs: Any) -> Any: class Etcd(AbstractEtcd): - def __init__(self, config: Dict[str, Any]) -> None: - super(Etcd, self).__init__(config, EtcdClient, (etcd.EtcdLeaderElectionInProgress, EtcdRaftInternal)) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Etcd, self).__init__(config, mpp, EtcdClient, (etcd.EtcdLeaderElectionInProgress, EtcdRaftInternal)) self.__do_not_watch = False @property @@ -726,7 +727,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: clusters: Dict[int, Dict[str, etcd.EtcdResult]] = defaultdict(dict) for node in result.leaves: key = node.key[len(result.key):].lstrip('/').split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): clusters[int(key[0])][key[1]] = node return {group: self._cluster_from_nodes(result.etcd_index, nodes) for group, nodes in clusters.items()} diff --git a/patroni/dcs/etcd3.py b/patroni/dcs/etcd3.py index ea7e52f24..7cc2a1155 100644 --- a/patroni/dcs/etcd3.py +++ b/patroni/dcs/etcd3.py @@ -16,9 +16,10 @@ from typing import Any, Callable, Collection, Dict, Iterator, List, Optional, Tuple, Type, TYPE_CHECKING, Union from . import ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, catch_return_false_exception, citus_group_re + TimelineHistory, catch_return_false_exception from .etcd import AbstractEtcdClientWithFailover, AbstractEtcd, catch_etcd_errors, DnsCachingResolver, Retry from ..exceptions import DCSError, PatroniException +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare, enable_keepalive, iter_response_objects, RetryFailedError, USER_AGENT logger = logging.getLogger(__name__) @@ -671,8 +672,9 @@ def txn(self, compare: Dict[str, Any], success: Dict[str, Any], class Etcd3(AbstractEtcd): - def __init__(self, config: Dict[str, Any]) -> None: - super(Etcd3, self).__init__(config, PatroniEtcd3Client, (DeadlineExceeded, Unavailable, FailedPrecondition)) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Etcd3, self).__init__(config, mpp, PatroniEtcd3Client, + (DeadlineExceeded, Unavailable, FailedPrecondition)) self.__do_not_watch = False self._lease = None self._last_lease_refresh = 0 @@ -796,7 +798,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: path = self._base_path + '/' for node in self._client.get_cluster(path): key = node['key'][len(path):].split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): clusters[int(key[0])][key[1]] = node return {group: self._cluster_from_nodes(nodes) for group, nodes in clusters.items()} diff --git a/patroni/dcs/exhibitor.py b/patroni/dcs/exhibitor.py index 2b06073be..03d235758 100644 --- a/patroni/dcs/exhibitor.py +++ b/patroni/dcs/exhibitor.py @@ -7,6 +7,7 @@ from . import Cluster from .zookeeper import ZooKeeper +from ..postgresql.mpp import AbstractMPP from ..request import get as requests_get from ..utils import uri @@ -66,10 +67,10 @@ def zookeeper_hosts(self) -> str: class Exhibitor(ZooKeeper): - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: interval = config.get('poll_interval', 300) self._ensemble_provider = ExhibitorEnsembleProvider(config['hosts'], config['port'], poll_interval=interval) - super(Exhibitor, self).__init__({**config, 'hosts': self._ensemble_provider.zookeeper_hosts}) + super(Exhibitor, self).__init__({**config, 'hosts': self._ensemble_provider.zookeeper_hosts}, mpp) def _load_cluster( self, path: str, loader: Callable[[str], Union[Cluster, Dict[int, Cluster]]] diff --git a/patroni/dcs/kubernetes.py b/patroni/dcs/kubernetes.py index aee87bd3d..343b496cb 100644 --- a/patroni/dcs/kubernetes.py +++ b/patroni/dcs/kubernetes.py @@ -19,9 +19,9 @@ from threading import Condition, Lock, Thread from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Type, Union, TYPE_CHECKING -from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, CITUS_COORDINATOR_GROUP_ID, citus_group_re +from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, TimelineHistory from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare, iter_response_objects, keepalive_socket_options, \ Retry, RetryFailedError, tzutc, uri, USER_AGENT if TYPE_CHECKING: # pragma: no cover @@ -748,7 +748,7 @@ class Kubernetes(AbstractDCS): _CITUS_LABEL = 'citus-group' - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: self._labels = deepcopy(config['labels']) self._labels[config.get('scope_label', 'cluster-name')] = config['scope'] self._label_selector = ','.join('{0}={1}'.format(k, v) for k, v in self._labels.items()) @@ -759,9 +759,9 @@ def __init__(self, config: Dict[str, Any]) -> None: self._standby_leader_label_value = config.get('standby_leader_label_value', 'master') self._tmp_role_label = config.get('tmp_role_label') self._ca_certs = os.environ.get('PATRONI_KUBERNETES_CACERT', config.get('cacert')) or SERVICE_CERT_FILENAME - super(Kubernetes, self).__init__({**config, 'namespace': ''}) - if self._citus_group: - self._labels[self._CITUS_LABEL] = self._citus_group + super(Kubernetes, self).__init__({**config, 'namespace': ''}, mpp) + if self._mpp.is_enabled(): + self._labels[self._CITUS_LABEL] = str(self._mpp.group) self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=KubernetesRetriableException) @@ -944,12 +944,12 @@ def _citus_cluster_loader(self, path: Dict[str, Any]) -> Dict[int, Cluster]: for name, pod in path['pods'].items(): group = pod.metadata.labels.get(self._CITUS_LABEL) - if group and citus_group_re.match(group): + if group and self._mpp.group_re.match(group): clusters[group]['pods'][name] = pod for name, kind in path['nodes'].items(): group = kind.metadata.labels.get(self._CITUS_LABEL) - if group and citus_group_re.match(group): + if group and self._mpp.group_re.match(group): clusters[group]['nodes'][name] = kind return {int(group): self._cluster_from_nodes(group, value['nodes'], value['pods'].values()) for group, value in clusters.items()} @@ -976,12 +976,12 @@ def __load_cluster( def _load_cluster( self, path: str, loader: Callable[[Any], Union[Cluster, Dict[int, Cluster]]] ) -> Union[Cluster, Dict[int, Cluster]]: - group = self._citus_group if path == self.client_path('') else None + group = str(self._mpp.group) if self._mpp.is_enabled() and path == self.client_path('') else None return self.__load_cluster(group, loader) def get_citus_coordinator(self) -> Optional[Cluster]: try: - ret = self.__load_cluster(str(CITUS_COORDINATOR_GROUP_ID), self._cluster_loader) + ret = self.__load_cluster(str(self._mpp.coordinator_group_id), self._cluster_loader) if TYPE_CHECKING: # pragma: no cover assert isinstance(ret, Cluster) return ret diff --git a/patroni/dcs/raft.py b/patroni/dcs/raft.py index 98c48f44e..0528cfb0a 100644 --- a/patroni/dcs/raft.py +++ b/patroni/dcs/raft.py @@ -12,9 +12,9 @@ from pysyncobj.utility import TcpUtility from typing import Any, Callable, Collection, Dict, List, Optional, Set, Union, TYPE_CHECKING -from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, citus_group_re +from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, TimelineHistory from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import validate_directory if TYPE_CHECKING: # pragma: no cover from ..config import Config @@ -285,8 +285,8 @@ def destroy(self) -> None: class Raft(AbstractDCS): - def __init__(self, config: Dict[str, Any]) -> None: - super(Raft, self).__init__(config) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Raft, self).__init__(config, mpp) self._ttl = int(config.get('ttl') or 30) ready_event = threading.Event() @@ -387,7 +387,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: response = self._sync_obj.get(path, recursive=True) for key, value in (response or {}).items(): key = key[len(path):].split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): clusters[int(key[0])][key[1]] = value return {group: self._cluster_from_nodes(nodes) for group, nodes in clusters.items()} diff --git a/patroni/dcs/zookeeper.py b/patroni/dcs/zookeeper.py index 3704b579d..6bf77ae45 100644 --- a/patroni/dcs/zookeeper.py +++ b/patroni/dcs/zookeeper.py @@ -12,9 +12,9 @@ from kazoo.security import ACL, make_acl from typing import Any, Callable, Dict, List, Optional, Union, Tuple, TYPE_CHECKING -from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, citus_group_re +from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, TimelineHistory from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare if TYPE_CHECKING: # pragma: no cover from ..config import Config @@ -87,8 +87,8 @@ def _call(self, request: Tuple[Any], async_object: AsyncResult) -> Optional[bool class ZooKeeper(AbstractDCS): - def __init__(self, config: Dict[str, Any]) -> None: - super(ZooKeeper, self).__init__(config) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(ZooKeeper, self).__init__(config, mpp) hosts: Union[str, List[str]] = config.get('hosts', []) if isinstance(hosts, list): @@ -261,7 +261,7 @@ def _cluster_loader(self, path: str) -> Cluster: def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: ret: Dict[int, Cluster] = {} for node in self.get_children(path): - if citus_group_re.match(node): + if self._mpp.group_re.match(node): ret[int(node)] = self._cluster_loader(path + node + '/') return ret diff --git a/patroni/ha.py b/patroni/ha.py index 0d3e05a41..dea78163a 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -332,7 +332,7 @@ def notify_citus_coordinator(self, event: str) -> None: if coordinator and coordinator.leader and coordinator.leader.conn_url: try: data = {'type': event, - 'group': self.state_handler.citus_handler.group(), + 'group': self.state_handler.citus_handler.group, 'leader': self.state_handler.name, 'timeout': self.dcs.ttl, 'cooldown': self.patroni.config['retry_timeout']} @@ -847,7 +847,7 @@ def enforce_primary_role(self, message: str, promote_message: str) -> str: self.state_handler.set_role('master') self.process_sync_replication() self.update_cluster_history() - self.state_handler.citus_handler.sync_pg_dist_node(self.cluster) + self.state_handler.citus_handler.sync_meta_data(self.cluster) return message elif self.state_handler.role in ('master', 'promoted', 'primary'): self.process_sync_replication() diff --git a/patroni/postgresql/__init__.py b/patroni/postgresql/__init__.py index c87c29fab..f6e3ab545 100644 --- a/patroni/postgresql/__init__.py +++ b/patroni/postgresql/__init__.py @@ -19,8 +19,8 @@ from .cancellable import CancellableSubprocess from .config import ConfigHandler, mtime from .connection import ConnectionPool, get_connection_cursor -from .citus import CitusHandler from .misc import parse_history, parse_lsn, postgres_major_version_to_int +from .mpp import AbstractMPP from .postmaster import PostmasterProcess from .slots import SlotsHandler from .sync import SyncHandler @@ -63,7 +63,7 @@ class Postgresql(object): "pg_catalog.pg_{0}_{1}_diff(COALESCE(pg_catalog.pg_last_{0}_receive_{1}(), '0/0'), '0/0')::bigint, " "pg_catalog.pg_is_in_recovery() AND pg_catalog.pg_is_{0}_replay_paused()") - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: self.name: str = config['name'] self.scope: str = config['scope'] self._data_dir: str = config['data_dir'] @@ -80,7 +80,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self._pending_restart = False self.connection_pool = ConnectionPool() self._connection = self.connection_pool.get('heartbeat') - self.citus_handler = CitusHandler(self, config.get('citus')) + self.citus_handler = mpp.get_handler_impl(self) self.config = ConfigHandler(self, config) self.config.check_directories() diff --git a/patroni/postgresql/mpp/__init__.py b/patroni/postgresql/mpp/__init__.py new file mode 100644 index 000000000..3494793b5 --- /dev/null +++ b/patroni/postgresql/mpp/__init__.py @@ -0,0 +1,296 @@ +"""Abstract classes for MPP handler. + +MPP stands for Massively Parallel Processing, and Citus belongs to this architecture. Currently, Citus is the only +supported MPP cluster. However, we may consider adapting other databases such as TimescaleDB, GPDB, etc. into Patroni. +""" +import abc + +from typing import Any, Dict, Iterator, Optional, Union, Tuple, Type, TYPE_CHECKING + +from ...dcs import Cluster +from ...dynamic_loader import iter_classes +from ...exceptions import PatroniException + +if TYPE_CHECKING: # pragma: no cover + from .. import Postgresql + from ...config import Config + + +class AbstractMPP(abc.ABC): + """An abstract class which should be passed to :class:`AbstractDCS`. + + .. note:: + We create :class:`AbstractMPP` and :class:`AbstractMPPHandler` to solve the chicken-egg initialization problem. + When initializing DCS, we dynamically create an object implementing :class:`AbstractMPP`, later this object is + used to instantiate an object implementing :class:`AbstractMPPHandler`. + """ + + group_re: Any # re.Pattern[str] + + def __init__(self, config: Dict[str, Union[str, int]]) -> None: + """Init method for :class:`AbstractMPP`. + + :param config: configuration of MPP section. + """ + self._config = config + + def is_enabled(self) -> bool: + """Check if MPP is enabled for a given MPP. + + .. note:: + We just check that the :attr:`_config` object isn't empty and expect + it to be empty only in case of :class:`Null`. + + :returns: ``True`` if MPP is enabled, otherwise ``False``. + """ + return bool(self._config) + + @staticmethod + @abc.abstractmethod + def validate_config(config: Any) -> bool: + """Check whether provided config is good for a given MPP. + + :param config: configuration of MPP section. + + :returns: ``True`` is config passes validation, otherwise ``False``. + """ + + @property + @abc.abstractmethod + def group(self) -> Any: + """The group for a given MPP implementation.""" + + @property + @abc.abstractmethod + def coordinator_group_id(self) -> Any: + """The group id of the coordinator PostgreSQL cluster.""" + + def is_coordinator(self) -> bool: + """Check whether this node is running in the coordinator PostgreSQL cluster. + + :returns: ``True`` if MPP is enabled and the group id of this node + matches with the :attr:`coordinator_group_id`, otherwise ``False``. + """ + return self.is_enabled() and self.group == self.coordinator_group_id + + def is_worker(self) -> bool: + """Check whether this node is running as a MPP worker PostgreSQL cluster. + + :returns: ``True`` if MPP is enabled and this node is known to be not running + as the coordinator PostgreSQL cluster, otherwise ``False``. + """ + return self.is_enabled() and not self.is_coordinator() + + def _get_handler_cls(self) -> Iterator[Type['AbstractMPPHandler']]: + """Find Handler classes inherited from a class type of this object. + + :yields: handler classes for this object. + """ + for cls in self.__class__.__subclasses__(): + if issubclass(cls, AbstractMPPHandler) and cls.__name__.startswith(self.__class__.__name__): + yield cls + + def get_handler_impl(self, postgresql: 'Postgresql') -> 'AbstractMPPHandler': + """Find and instantiate Handler implementation of this object. + + :param postgresql: a reference to :class:`Postgresql` object. + + :raises: + :exc:`PatroniException`: if the Handler class haven't been found. + + :returns: an instantiated class that implements Handler for this object. + """ + for cls in self._get_handler_cls(): + return cls(postgresql, self._config) + raise PatroniException(f'Failed to initialize {self.__class__.__name__}Handler object') + + +class AbstractMPPHandler(AbstractMPP): + """An abstract class which defines interfaces that should be implemented by real handlers.""" + + def __init__(self, postgresql: 'Postgresql', config: Dict[str, Union[str, int]]) -> None: + """Init method for :class:`AbstractMPPHandler`. + + :param postgresql: a reference to :class:`Postgresql` object. + :param config: configuration of MPP section. + """ + super().__init__(config) + self._postgresql = postgresql + + @abc.abstractmethod + def handle_event(self, cluster: Cluster, event: Dict[str, Any]) -> None: + """Handle an event sent from a worker node. + + :param cluster: the currently known cluster state from DCS. + :param event: the event to be handled. + """ + + @abc.abstractmethod + def sync_meta_data(self, cluster: Cluster) -> None: + """Sync meta data on the coordinator. + + :param cluster: the currently known cluster state from DCS. + """ + + @abc.abstractmethod + def on_demote(self) -> None: + """On demote handler. + + Is called when the primary was demoted. + """ + + @abc.abstractmethod + def schedule_cache_rebuild(self) -> None: + """Cache rebuild handler. + + Is called to notify handler that it has to refresh its metadata cache from the database. + """ + + @abc.abstractmethod + def bootstrap(self) -> None: + """Bootstrap handler. + + Is called when the new cluster is initialized (through ``initdb`` or a custom bootstrap method). + """ + + @abc.abstractmethod + def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: + """Adjust GUCs in the current PostgreSQL configuration. + + :param parameters: dictionary of GUCs, with key as GUC name and the corresponding value as current GUC value. + """ + + @abc.abstractmethod + def ignore_replication_slot(self, slot: Dict[str, str]) -> bool: + """Check whether provided replication *slot* existing in the database should not be removed. + + .. note:: + MPP database may create replication slots for its own use, for example to migrate data between workers + using logical replication, and we don't want to suddenly drop them. + + :param slot: dictionary containing the replication slot settings, like ``name``, ``database``, ``type``, and + ``plugin``. + + :returns: ``True`` if the replication slots should not be removed, otherwise ``False``. + """ + + +class Null(AbstractMPP): + """Dummy implementation of :class:`AbstractMPP`.""" + + def __init__(self) -> None: + """Init method for :class:`Null`.""" + super().__init__({}) + + @staticmethod + def validate_config(config: Any) -> bool: + """Check whether provided config is good for :class:`Null`. + + :returns: always ``True``. + """ + return True + + @property + def group(self) -> None: + """The group for :class:`Null`. + + :returns: always ``None``. + """ + return None + + @property + def coordinator_group_id(self) -> None: + """The group id of the coordinator PostgreSQL cluster. + + :returns: always ``None``. + """ + return None + + +class NullHandler(Null, AbstractMPPHandler): + """Dummy implementation of :class:`AbstractMPPHandler`.""" + + def __init__(self, postgresql: 'Postgresql', config: Dict[str, Union[str, int]]) -> None: + """Init method for :class:`NullHandler`. + + :param postgresql: a reference to :class:`Postgresql` object. + :param config: configuration of MPP section. + """ + AbstractMPPHandler.__init__(self, postgresql, config) + + def handle_event(self, cluster: Cluster, event: Dict[str, Any]) -> None: + """Handle an event sent from a worker node. + + :param cluster: the currently known cluster state from DCS. + :param event: the event to be handled. + """ + + def sync_meta_data(self, cluster: Cluster) -> None: + """Sync meta data on the coordinator. + + :param cluster: the currently known cluster state from DCS. + """ + + def on_demote(self) -> None: + """On demote handler. + + Is called when the primary was demoted. + """ + + def schedule_cache_rebuild(self) -> None: + """Cache rebuild handler. + + Is called to notify handler that it has to refresh its metadata cache from the database. + """ + + def bootstrap(self) -> None: + """Bootstrap handler. + + Is called when the new cluster is initialized (through ``initdb`` or a custom bootstrap method). + """ + + def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: + """Adjust GUCs in the current PostgreSQL configuration. + + :param parameters: dictionary of GUCs, with key as GUC name and corresponding value as current GUC value. + """ + + def ignore_replication_slot(self, slot: Dict[str, str]) -> bool: + """Check whether provided replication *slot* existing in the database should not be removed. + + .. note:: + MPP database may create replication slots for its own use, for example to migrate data between workers + using logical replication, and we don't want to suddenly drop them. + + :param slot: dictionary containing the replication slot settings, like ``name``, ``database``, ``type``, and + ``plugin``. + + :returns: always ``False``. + """ + return False + + +def iter_mpp_classes( + config: Optional[Union['Config', Dict[str, Any]]] = None +) -> Iterator[Tuple[str, Type[AbstractMPP]]]: + """Attempt to import MPP modules that are present in the given configuration. + + :param config: configuration information with possible MPP names as keys. If given, only attempt to import MPP + modules defined in the configuration. Else, if ``None``, attempt to import any supported MPP module. + + :yields: tuples, each containing the module ``name`` and the imported MPP class object. + """ + yield from iter_classes(__package__, AbstractMPP, config) + + +def get_mpp(config: Union['Config', Dict[str, Any]]) -> AbstractMPP: + """Attempt to load and instantiate a MPP module from known available implementations. + + :param config: object or dictionary with Patroni configuration. + + :returns: The successfully loaded MPP or fallback to :class:`Null`. + """ + for name, mpp_class in iter_mpp_classes(config): + if mpp_class.validate_config(config[name]): + return mpp_class(config[name]) + return Null() diff --git a/patroni/postgresql/citus.py b/patroni/postgresql/mpp/citus.py similarity index 84% rename from patroni/postgresql/citus.py rename to patroni/postgresql/mpp/citus.py index b50dc1d09..a17d1435b 100644 --- a/patroni/postgresql/citus.py +++ b/patroni/postgresql/mpp/citus.py @@ -6,12 +6,15 @@ from urllib.parse import urlparse from typing import Any, Dict, List, Optional, Union, Tuple, TYPE_CHECKING -from ..dcs import CITUS_COORDINATOR_GROUP_ID, Cluster -from ..psycopg import connect, quote_ident +from . import AbstractMPP, AbstractMPPHandler +from ...dcs import Cluster +from ...psycopg import connect, quote_ident +from ...utils import parse_int if TYPE_CHECKING: # pragma: no cover - from . import Postgresql + from .. import Postgresql +CITUS_COORDINATOR_GROUP_ID = 0 CITUS_SLOT_NAME_RE = re.compile(r'^citus_shard_(move|split)_slot(_[1-9][0-9]*){2,3}$') logger = logging.getLogger(__name__) @@ -63,13 +66,45 @@ def __repr__(self) -> str: return str(self) -class CitusHandler(Thread): +class Citus(AbstractMPP): - def __init__(self, postgresql: 'Postgresql', config: Optional[Dict[str, Union[str, int]]]) -> None: - super(CitusHandler, self).__init__() + group_re = re.compile('^(0|[1-9][0-9]*)$') + + @staticmethod + def validate_config(config: Union[Any, Dict[str, Union[str, int]]]) -> bool: + """Check whether provided config is good for a given MPP. + + :param config: configuration of ``citus`` MPP section. + + :returns: ``True`` is config passes validation, otherwise ``False``. + """ + return isinstance(config, dict) \ + and isinstance(config.get('database'), str) \ + and parse_int(config.get('group')) is not None + + @property + def group(self) -> int: + """The group of this Citus node.""" + return int(self._config['group']) + + @property + def coordinator_group_id(self) -> int: + """The group id of the Citus coordinator PostgreSQL cluster.""" + return CITUS_COORDINATOR_GROUP_ID + + +class CitusHandler(Citus, AbstractMPPHandler, Thread): + """Define the interfaces for handling an underlying Citus cluster.""" + + def __init__(self, postgresql: 'Postgresql', config: Dict[str, Union[str, int]]) -> None: + """"Initialize a new instance of :class:`CitusHandler`. + + :param postgresql: the Postgres node. + :param config: the ``citus`` MPP config section. + """ + Thread.__init__(self) + AbstractMPPHandler.__init__(self, postgresql, config) self.daemon = True - self._postgresql = postgresql - self._config = config if config: self._connection = postgresql.connection_pool.get( 'citus', {'dbname': config['database'], @@ -81,19 +116,11 @@ def __init__(self, postgresql: 'Postgresql', config: Optional[Dict[str, Union[st self._condition = Condition() # protects _pg_dist_node, _tasks, _in_flight, and _schedule_load_pg_dist_node self.schedule_cache_rebuild() - def is_enabled(self) -> bool: - return isinstance(self._config, dict) - - def group(self) -> Optional[int]: - return int(self._config['group']) if isinstance(self._config, dict) else None - - def is_coordinator(self) -> bool: - return self.is_enabled() and self.group() == CITUS_COORDINATOR_GROUP_ID - - def is_worker(self) -> bool: - return self.is_enabled() and not self.is_coordinator() - def schedule_cache_rebuild(self) -> None: + """Cache rebuild handler. + + Is called to notify handler that it has to refresh its metadata cache from the database. + """ with self._condition: self._schedule_load_pg_dist_node = True @@ -134,8 +161,8 @@ def load_pg_dist_node(self) -> bool: self._pg_dist_node = {r[1]: PgDistNode(r[1], r[2], r[3], 'after_promote', r[0]) for r in rows} return True - def sync_pg_dist_node(self, cluster: Cluster) -> None: - """Maintain the `pg_dist_node` from the coordinator leader every heartbeat loop. + def sync_meta_data(self, cluster: Cluster) -> None: + """Maintain the ``pg_dist_node`` from the coordinator leader every heartbeat loop. We can't always rely on REST API calls from worker nodes in order to maintain `pg_dist_node`, therefore at least once per heartbeat @@ -296,16 +323,16 @@ def _add_task(self, task: PgDistNode) -> bool: with self._condition: i = self.find_task_by_group(task.group) - # The `PgDistNode.timeout` == None is an indicator that it was scheduled from the sync_pg_dist_node(). + # The `PgDistNode.timeout` == None is an indicator that it was scheduled from the sync_meta_data(). if task.timeout is None: # We don't want to override the already existing task created from REST API. if i is not None and self._tasks[i].timeout is not None: return False # There is a little race condition with tasks created from REST API - the call made "before" the member - # key is updated in DCS. Therefore it is possible that :func:`sync_pg_dist_node` will try to create a - # task based on the outdated values of "state"/"role". To solve it we introduce an artificial timeout. - # Only when the timeout is reached new tasks could be scheduled from sync_pg_dist_node() + # key is updated in DCS. Therefore it is possible that :func:`sync_meta_data` will try to create a task + # based on the outdated values of "state"/"role". To solve it we introduce an artificial timeout. + # Only when the timeout is reached new tasks could be scheduled from sync_meta_data() if self._in_flight and self._in_flight.group == task.group and self._in_flight.timeout is not None\ and self._in_flight.deadline > time.time(): return False @@ -353,9 +380,10 @@ def handle_event(self, cluster: Cluster, event: Dict[str, Any]) -> None: task.wait() def bootstrap(self) -> None: - if not isinstance(self._config, dict): # self.is_enabled() - return + """Bootstrap handler. + Is called when the new cluster is initialized (through ``initdb`` or a custom bootstrap method). + """ conn_kwargs = {**self._postgresql.connection_pool.conn_kwargs, 'options': '-c synchronous_commit=local -c statement_timeout=0'} if self._config['database'] != self._postgresql.database: @@ -388,9 +416,10 @@ def bootstrap(self) -> None: conn.close() def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: - if not self.is_enabled(): - return + """Adjust GUCs in the current PostgreSQL configuration. + :param parameters: dictionary of GUCs, with key as GUC name and the corresponding value as current GUC value. + """ # citus extension must be on the first place in shared_preload_libraries shared_preload_libraries = list(filter( lambda el: el and el != 'citus', @@ -408,8 +437,18 @@ def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: parameters['citus.local_hostname'] = self._postgresql.connection_pool.conn_kwargs.get('host', 'localhost') def ignore_replication_slot(self, slot: Dict[str, str]) -> bool: - if isinstance(self._config, dict) and self._postgresql.is_primary() and\ - slot['type'] == 'logical' and slot['database'] == self._config['database']: + """Check whether provided replication *slot* existing in the database should not be removed. + + .. note:: + MPP database may create replication slots for its own use, for example to migrate data between workers + using logical replication, and we don't want to suddenly drop them. + + :param slot: dictionary containing the replication slot settings, like ``name``, ``database``, ``type``, and + ``plugin``. + + :returns: ``True`` if the replication slots should not be removed, otherwise ``False``. + """ + if self._postgresql.is_primary() and slot['type'] == 'logical' and slot['database'] == self._config['database']: m = CITUS_SLOT_NAME_RE.match(slot['name']) return bool(m and {'move': 'pgoutput', 'split': 'citus'}.get(m.group(1)) == slot['plugin']) return False diff --git a/patroni/postgresql/slots.py b/patroni/postgresql/slots.py index fb9448cd6..7f7fd294a 100644 --- a/patroni/postgresql/slots.py +++ b/patroni/postgresql/slots.py @@ -291,7 +291,7 @@ def ignore_replication_slot(self, cluster: Cluster, name: str) -> bool: :param name: name of the slot to ignore :returns: ``True`` if slot *name* matches any slot specified in ``ignore_slots`` configuration, - otherwise will pass through and return result of :meth:`CitusHandler.ignore_replication_slot`. + otherwise will pass through and return result of :meth:`AbstractMPPHandler.ignore_replication_slot`. """ slot = self._replication_slots[name] if cluster.config: diff --git a/tests/__init__.py b/tests/__init__.py index b013e4e16..986bd88b0 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,6 +12,7 @@ from patroni.dcs import Leader, Member from patroni.postgresql import Postgresql from patroni.postgresql.config import ConfigHandler +from patroni.postgresql.mpp import get_mpp from patroni.utils import RetryFailedError, tzutc @@ -252,23 +253,24 @@ class PostgresInit(unittest.TestCase): @patch.object(Postgresql, 'get_postgres_role_from_data_directory', Mock(return_value='primary')) def setUp(self): data_dir = os.path.join('data', 'test0') - self.p = Postgresql({'name': 'postgresql0', 'scope': 'batman', 'data_dir': data_dir, - 'config_dir': data_dir, 'retry_timeout': 10, - 'krbsrvname': 'postgres', 'pgpass': os.path.join(data_dir, 'pgpass0'), - 'listen': '127.0.0.2, 127.0.0.3:5432', - 'connect_address': '127.0.0.2:5432', 'proxy_address': '127.0.0.2:5433', - 'authentication': {'superuser': {'username': 'foo', 'password': 'test'}, - 'replication': {'username': '', 'password': 'rep-pass'}, - 'rewind': {'username': 'rewind', 'password': 'test'}}, - 'remove_data_directory_on_rewind_failure': True, - 'use_pg_rewind': True, 'pg_ctl_timeout': 'bla', 'use_unix_socket': True, - 'parameters': self._PARAMETERS, - 'recovery_conf': {'foo': 'bar'}, - 'pg_hba': ['host all all 0.0.0.0/0 md5'], - 'pg_ident': ['krb realm postgres'], - 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_reload': 'true', - 'on_restart': 'true', 'on_role_change': 'true'}, - 'citus': {'group': 0, 'database': 'citus'}}) + config = {'name': 'postgresql0', 'scope': 'batman', 'data_dir': data_dir, + 'config_dir': data_dir, 'retry_timeout': 10, + 'krbsrvname': 'postgres', 'pgpass': os.path.join(data_dir, 'pgpass0'), + 'listen': '127.0.0.2, 127.0.0.3:5432', + 'connect_address': '127.0.0.2:5432', 'proxy_address': '127.0.0.2:5433', + 'authentication': {'superuser': {'username': 'foo', 'password': 'test'}, + 'replication': {'username': '', 'password': 'rep-pass'}, + 'rewind': {'username': 'rewind', 'password': 'test'}}, + 'remove_data_directory_on_rewind_failure': True, + 'use_pg_rewind': True, 'pg_ctl_timeout': 'bla', 'use_unix_socket': True, + 'parameters': self._PARAMETERS, + 'recovery_conf': {'foo': 'bar'}, + 'pg_hba': ['host all all 0.0.0.0/0 md5'], + 'pg_ident': ['krb realm postgres'], + 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_reload': 'true', + 'on_restart': 'true', 'on_role_change': 'true'}, + 'citus': {'group': 0, 'database': 'citus'}} + self.p = Postgresql(config, get_mpp(config)) class BaseTestPostgresql(PostgresInit): diff --git a/tests/test_citus.py b/tests/test_citus.py index 7279893e0..dbf6d9cf6 100644 --- a/tests/test_citus.py +++ b/tests/test_citus.py @@ -1,12 +1,12 @@ import time from mock import Mock, patch -from patroni.postgresql.citus import CitusHandler +from patroni.postgresql.mpp.citus import CitusHandler from . import BaseTestPostgresql, MockCursor, psycopg_connect, SleepException from .test_ha import get_cluster_initialized_with_leader -@patch('patroni.postgresql.citus.Thread', Mock()) +@patch('patroni.postgresql.mpp.citus.Thread', Mock()) @patch('patroni.psycopg.connect', psycopg_connect) class TestCitus(BaseTestPostgresql): @@ -17,9 +17,9 @@ def setUp(self): self.cluster.workers[1] = self.cluster @patch('time.time', Mock(side_effect=[100, 130, 160, 190, 220, 250, 280, 310, 340, 370])) - @patch('patroni.postgresql.citus.logger.exception', Mock(side_effect=SleepException)) - @patch('patroni.postgresql.citus.logger.warning') - @patch('patroni.postgresql.citus.PgDistNode.wait', Mock()) + @patch('patroni.postgresql.mpp.citus.logger.exception', Mock(side_effect=SleepException)) + @patch('patroni.postgresql.mpp.citus.logger.warning') + @patch('patroni.postgresql.mpp.citus.PgDistNode.wait', Mock()) @patch.object(CitusHandler, 'is_alive', Mock(return_value=True)) def test_run(self, mock_logger_warning): # `before_demote` or `before_promote` REST API calls starting a @@ -39,10 +39,10 @@ def test_run(self, mock_logger_warning): @patch.object(CitusHandler, 'is_alive', Mock(return_value=False)) @patch.object(CitusHandler, 'start', Mock()) - def test_sync_pg_dist_node(self): + def test_sync_meta_data(self): with patch.object(CitusHandler, 'is_enabled', Mock(return_value=False)): - self.c.sync_pg_dist_node(self.cluster) - self.c.sync_pg_dist_node(self.cluster) + self.c.sync_meta_data(self.cluster) + self.c.sync_meta_data(self.cluster) def test_handle_event(self): self.c.handle_event(self.cluster, {}) @@ -51,22 +51,22 @@ def test_handle_event(self): 'leader': 'leader', 'timeout': 30, 'cooldown': 10}) def test_add_task(self): - with patch('patroni.postgresql.citus.logger.error') as mock_logger, \ - patch('patroni.postgresql.citus.urlparse', Mock(side_effect=Exception)): + with patch('patroni.postgresql.mpp.citus.logger.error') as mock_logger, \ + patch('patroni.postgresql.mpp.citus.urlparse', Mock(side_effect=Exception)): self.c.add_task('', 1, None) mock_logger.assert_called_once() - with patch('patroni.postgresql.citus.logger.debug') as mock_logger: + with patch('patroni.postgresql.mpp.citus.logger.debug') as mock_logger: self.c.add_task('before_demote', 1, 'postgres://host:5432/postgres', 30) mock_logger.assert_called_once() self.assertTrue(mock_logger.call_args[0][0].startswith('Adding the new task:')) - with patch('patroni.postgresql.citus.logger.debug') as mock_logger: + with patch('patroni.postgresql.mpp.citus.logger.debug') as mock_logger: self.c.add_task('before_promote', 1, 'postgres://host:5432/postgres', 30) mock_logger.assert_called_once() self.assertTrue(mock_logger.call_args[0][0].startswith('Overriding existing task:')) - # add_task called from sync_pg_dist_node should not override already scheduled or in flight task until deadline + # add_task called from sync_meta_data should not override already scheduled or in flight task until deadline self.assertIsNotNone(self.c.add_task('after_promote', 1, 'postgres://host:5432/postgres', 30)) self.assertIsNone(self.c.add_task('after_promote', 1, 'postgres://host:5432/postgres')) self.c._in_flight = self.c._tasks.pop() @@ -106,7 +106,7 @@ def test_process_tasks(self): self.c.process_tasks() self.c.add_task('after_promote', 0, 'postgres://host3:5432/postgres') - with patch('patroni.postgresql.citus.logger.error') as mock_logger, \ + with patch('patroni.postgresql.mpp.citus.logger.error') as mock_logger, \ patch.object(CitusHandler, 'query', Mock(side_effect=Exception)): self.c.process_tasks() mock_logger.assert_called_once() @@ -115,7 +115,7 @@ def test_process_tasks(self): def test_on_demote(self): self.c.on_demote() - @patch('patroni.postgresql.citus.logger.error') + @patch('patroni.postgresql.mpp.citus.logger.error') @patch.object(MockCursor, 'execute', Mock(side_effect=Exception)) def test_load_pg_dist_node(self, mock_logger): # load_pg_dist_node() triggers, query fails and exception is property handled @@ -140,10 +140,6 @@ def test_adjust_postgres_gucs(self): self.assertEqual(parameters['wal_level'], 'logical') self.assertEqual(parameters['citus.local_hostname'], '/tmp') - def test_bootstrap(self): - self.c._config = None - self.c.bootstrap() - def test_ignore_replication_slot(self): self.assertFalse(self.c.ignore_replication_slot({'name': 'foo', 'type': 'physical', 'database': 'bar', 'plugin': 'wal2json'})) diff --git a/tests/test_consul.py b/tests/test_consul.py index 83ee67d83..494d11266 100644 --- a/tests/test_consul.py +++ b/tests/test_consul.py @@ -3,8 +3,10 @@ from consul import ConsulException, NotFound from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.consul import AbstractDCS, Cluster, Consul, ConsulInternalError, \ ConsulError, ConsulClient, HTTPClient, InvalidSessionTTL, InvalidSession, RetryFailedError +from patroni.postgresql.mpp import get_mpp from . import SleepException @@ -91,13 +93,17 @@ class TestConsul(unittest.TestCase): @patch.object(consul.Consul.KV, 'get', kv_get) @patch.object(consul.Consul.KV, 'delete', Mock()) def setUp(self): - Consul({'ttl': 30, 'scope': 't', 'name': 'p', 'url': 'https://l:1', 'retry_timeout': 10, - 'verify': 'on', 'key': 'foo', 'cert': 'bar', 'cacert': 'buz', 'token': 'asd', 'dc': 'dc1', - 'register_service': True}) - Consul({'ttl': 30, 'scope': 't_', 'name': 'p', 'url': 'https://l:1', 'retry_timeout': 10, - 'verify': 'on', 'cert': 'bar', 'cacert': 'buz', 'register_service': True}) - self.c = Consul({'ttl': 30, 'scope': 'test', 'name': 'postgresql1', 'host': 'localhost:1', 'retry_timeout': 10, - 'register_service': True, 'service_check_tls_server_name': True}) + self.assertIsInstance(get_dcs({'ttl': 30, 'scope': 't', 'name': 'p', 'retry_timeout': 10, + 'consul': {'url': 'https://l:1', 'verify': 'on', + 'key': 'foo', 'cert': 'bar', 'cacert': 'buz', + 'token': 'asd', 'dc': 'dc1', 'register_service': True}}), Consul) + self.assertIsInstance(get_dcs({'ttl': 30, 'scope': 't_', 'name': 'p', 'retry_timeout': 10, + 'consul': {'url': 'https://l:1', 'verify': 'on', + 'cert': 'bar', 'cacert': 'buz', 'register_service': True}}), Consul) + self.c = get_dcs({'ttl': 30, 'scope': 'test', 'name': 'postgresql1', 'retry_timeout': 10, + 'consul': {'host': 'localhost:1', 'register_service': True, + 'service_check_tls_server_name': True}}) + self.assertIsInstance(self.c, Consul) self.c._base_path = 'service/good' self.c.get_cluster() @@ -130,7 +136,7 @@ def test_get_cluster(self): self.assertIsInstance(self.c.get_cluster(), Cluster) def test__get_citus_cluster(self): - self.c._citus_group = '0' + self.c._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.c.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) diff --git a/tests/test_ctl.py b/tests/test_ctl.py index f9ee62ced..f5341f88a 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -12,6 +12,7 @@ get_all_members, get_any_member, get_cursor, query_member, PatroniCtlException, apply_config_changes, \ format_config_for_editing, show_diff, invoke_editor, format_pg_version, CONFIG_FILE_PATH, PatronictlPrettyTable from patroni.dcs import Cluster, Failover +from patroni.postgresql.mpp import get_mpp from patroni.psycopg import OperationalError from patroni.utils import tzutc from prettytable import PrettyTable, ALL @@ -69,7 +70,7 @@ def test_load_config(self, mock_logger_debug): @patch('patroni.psycopg.connect', psycopg_connect) def test_get_cursor(self): with click.Context(click.Command('query')) as ctx: - ctx.obj = {'__config': {}} + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} for role in self.TEST_ROLES: self.assertIsNone(get_cursor(get_cluster_initialized_without_leader(), None, {}, role=role)) self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {}, role=role)) @@ -107,7 +108,7 @@ def test_parse_dcs(self): def test_output_members(self): with click.Context(click.Command('list')) as ctx: - ctx.obj = {'__config': {}} + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) cluster = get_cluster_initialized_with_leader(Failover(1, 'foo', 'bar', scheduled_at)) del cluster.members[1].data['conn_url'] @@ -250,7 +251,7 @@ def test_failover(self): @patch('patroni.dynamic_loader.iter_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) def test_get_dcs(self): with click.Context(click.Command('list')) as ctx: - ctx.obj = {'__config': {'dummy': {}}} + ctx.obj = {'__config': {'dummy': {}}, '__mpp': get_mpp({})} self.assertRaises(PatroniCtlException, get_dcs, 'dummy', 0) @patch('patroni.psycopg.connect', psycopg_connect) @@ -439,7 +440,7 @@ def test_ctl(self): def test_get_any_member(self): with click.Context(click.Command('list')) as ctx: - ctx.obj = {'__config': {}} + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} for role in self.TEST_ROLES: self.assertIsNone(get_any_member(get_cluster_initialized_without_leader(), None, role=role)) @@ -448,7 +449,7 @@ def test_get_any_member(self): def test_get_all_members(self): with click.Context(click.Command('list')) as ctx: - ctx.obj = {'__config': {}} + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} for role in self.TEST_ROLES: self.assertEqual(list(get_all_members(get_cluster_initialized_without_leader(), None, role=role)), []) diff --git a/tests/test_etcd.py b/tests/test_etcd.py index 874aac5cc..d7a423bec 100644 --- a/tests/test_etcd.py +++ b/tests/test_etcd.py @@ -5,8 +5,10 @@ from dns.exception import DNSException from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.etcd import AbstractDCS, EtcdClient, Cluster, Etcd, EtcdError, DnsCachingResolver from patroni.exceptions import DCSError +from patroni.postgresql.mpp import get_mpp from patroni.utils import Retry from urllib3.exceptions import ReadTimeoutError @@ -138,8 +140,9 @@ class TestClient(unittest.TestCase): @patch.object(EtcdClient, '_get_machines_list', Mock(return_value=['http://localhost:2379', 'http://localhost:4001'])) def setUp(self): - self.etcd = Etcd({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 3, - 'srv': 'test', 'scope': 'test', 'name': 'foo'}) + self.etcd = get_dcs({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 3, + 'etcd': {'srv': 'test'}, 'scope': 'test', 'name': 'foo'}) + self.assertIsInstance(self.etcd, Etcd) self.client = self.etcd._client self.client.http.request = http_request self.client.http.request_encode_body = http_request @@ -235,7 +238,7 @@ class TestEtcd(unittest.TestCase): Mock(return_value=['http://localhost:2379', 'http://localhost:4001'])) def setUp(self): self.etcd = Etcd({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 10, - 'host': 'localhost:2379', 'scope': 'test', 'name': 'foo'}) + 'host': 'localhost:2379', 'scope': 'test', 'name': 'foo'}, get_mpp({})) def test_base_path(self): self.assertEqual(self.etcd._base_path, '/patroni/test') @@ -270,7 +273,7 @@ def test_get_cluster(self): self.assertRaises(EtcdError, self.etcd.get_cluster) def test__get_citus_cluster(self): - self.etcd._citus_group = '0' + self.etcd._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.etcd.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) diff --git a/tests/test_etcd3.py b/tests/test_etcd3.py index 10ab1ea50..fcfd4e4b6 100644 --- a/tests/test_etcd3.py +++ b/tests/test_etcd3.py @@ -4,10 +4,12 @@ import urllib3 from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.etcd import DnsCachingResolver from patroni.dcs.etcd3 import PatroniEtcd3Client, Cluster, Etcd3, Etcd3Client, \ Etcd3Error, Etcd3ClientError, ReAuthenticateMode, RetryFailedError, InvalidAuthToken, Unavailable, \ Unknown, UnsupportedEtcdVersion, UserEmpty, AuthFailed, AuthOldRevision, base64_encode +from patroni.postgresql.mpp import get_mpp from threading import Thread from . import SleepException, MockResponse @@ -80,9 +82,9 @@ class BaseTestEtcd3(unittest.TestCase): @patch.object(Thread, 'start', Mock()) @patch.object(urllib3.PoolManager, 'urlopen', mock_urlopen) def setUp(self): - self.etcd3 = Etcd3({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 10, - 'host': 'localhost:2378', 'scope': 'test', 'name': 'foo', - 'username': 'etcduser', 'password': 'etcdpassword'}) + self.etcd3 = get_dcs({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 10, 'name': 'foo', 'scope': 'test', + 'etcd3': {'host': 'localhost:2378', 'username': 'etcduser', 'password': 'etcdpassword'}}) + self.assertIsInstance(self.etcd3, Etcd3) self.client = self.etcd3._client self.kv_cache = self.client._kv_cache @@ -236,7 +238,7 @@ def test_get_cluster(self): self.assertRaises(Etcd3Error, self.etcd3.get_cluster) def test__get_citus_cluster(self): - self.etcd3._citus_group = '0' + self.etcd3._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.etcd3.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) diff --git a/tests/test_exhibitor.py b/tests/test_exhibitor.py index a908e1fc2..5a72eb21b 100644 --- a/tests/test_exhibitor.py +++ b/tests/test_exhibitor.py @@ -2,6 +2,7 @@ import urllib3 from mock import Mock, patch +from patroni.dcs import get_dcs from patroni.dcs.exhibitor import ExhibitorEnsembleProvider, Exhibitor from patroni.dcs.zookeeper import ZooKeeperError @@ -26,8 +27,9 @@ class TestExhibitor(unittest.TestCase): status=200, body=b'{"servers":["127.0.0.1","127.0.0.2","127.0.0.3"],"port":2181}'))) @patch('patroni.dcs.zookeeper.PatroniKazooClient', MockKazooClient) def setUp(self): - self.e = Exhibitor({'hosts': ['localhost', 'exhibitor'], 'port': 8181, 'scope': 'test', - 'name': 'foo', 'ttl': 30, 'retry_timeout': 10}) + self.e = get_dcs({'exhibitor': {'hosts': ['localhost', 'exhibitor'], 'port': 8181}, + 'scope': 'test', 'name': 'foo', 'ttl': 30, 'retry_timeout': 10}) + self.assertIsInstance(self.e, Exhibitor) @patch.object(ExhibitorEnsembleProvider, 'poll', Mock(return_value=True)) @patch.object(MockKazooClient, 'get_children', Mock(side_effect=Exception)) diff --git a/tests/test_ha.py b/tests/test_ha.py index 5b1d4562d..45f641647 100644 --- a/tests/test_ha.py +++ b/tests/test_ha.py @@ -197,7 +197,7 @@ def run_async(self, func, args=()): @patch('patroni.async_executor.AsyncExecutor.busy', PropertyMock(return_value=False)) @patch('patroni.async_executor.AsyncExecutor.run_async', run_async) @patch('patroni.postgresql.rewind.Thread', Mock()) -@patch('patroni.postgresql.citus.CitusHandler.start', Mock()) +@patch('patroni.postgresql.mpp.citus.CitusHandler.start', Mock()) @patch('subprocess.call', Mock(return_value=0)) @patch('time.sleep', Mock()) class TestHa(PostgresInit): @@ -593,8 +593,8 @@ def test_bootstrap_initialize_lock_failed(self): self.assertEqual(self.ha.bootstrap(), 'failed to acquire initialize lock') @patch('patroni.psycopg.connect', psycopg_connect) - @patch('patroni.postgresql.citus.connect', psycopg_connect) - @patch('patroni.postgresql.citus.quote_ident', Mock()) + @patch('patroni.postgresql.mpp.citus.connect', psycopg_connect) + @patch('patroni.postgresql.mpp.citus.quote_ident', Mock()) @patch.object(Postgresql, 'connection', Mock(return_value=None)) def test_bootstrap_initialized_new_cluster(self): self.ha.cluster = get_cluster_not_initialized_without_leader() @@ -615,8 +615,8 @@ def test_bootstrap_release_initialize_key_on_failure(self): self.assertRaises(PatroniFatalException, self.ha.post_bootstrap) @patch('patroni.psycopg.connect', psycopg_connect) - @patch('patroni.postgresql.citus.connect', psycopg_connect) - @patch('patroni.postgresql.citus.quote_ident', Mock()) + @patch('patroni.postgresql.mpp.citus.connect', psycopg_connect) + @patch('patroni.postgresql.mpp.citus.quote_ident', Mock()) @patch.object(Postgresql, 'connection', Mock(return_value=None)) def test_bootstrap_release_initialize_key_on_watchdog_failure(self): self.ha.cluster = get_cluster_not_initialized_without_leader() @@ -659,7 +659,7 @@ def test_restart(self): @patch.object(ConfigHandler, 'replace_pg_hba', Mock()) @patch.object(ConfigHandler, 'replace_pg_ident', Mock()) @patch.object(PostmasterProcess, 'start', Mock(return_value=MockPostmaster())) - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_worker_restart(self): self.ha.has_lock = true self.ha.patroni.request = Mock() @@ -694,7 +694,7 @@ def test_restart_in_progress(self): self.ha.is_paused = true self.assertEqual(self.ha.run_cycle(), 'PAUSE: restart in progress') - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_manual_failover_from_leader(self): self.ha.has_lock = true # I am the leader @@ -733,7 +733,7 @@ def test_manual_failover_from_leader(self): ('Member %s exceeds maximum replication lag', 'b')) self.ha.cluster.members.pop() - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_manual_switchover_from_leader(self): self.ha.has_lock = true # I am the leader @@ -774,7 +774,7 @@ def test_manual_switchover_from_leader(self): self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0), the leader with the lock') self.assertEqual(mock_info.call_args_list[0][0], ('Member %s exceeds maximum replication lag', 'leader')) - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_scheduled_switchover_from_leader(self): self.ha.has_lock = true # I am the leader @@ -1544,7 +1544,7 @@ def stop(*args, **kwargs): self.ha.is_failover_possible = true self.ha.shutdown() - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_shutdown_citus_worker(self): self.ha.is_leader = true self.p.is_running = Mock(side_effect=[Mock(), False]) @@ -1656,7 +1656,7 @@ def test_acquire_lock(self): self.assertRaises(DCSError, self.ha.acquire_lock) self.assertFalse(self.ha.acquire_lock()) - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_notify_citus_coordinator(self): self.ha.patroni.request = Mock() self.ha.notify_citus_coordinator('before_demote') diff --git a/tests/test_kubernetes.py b/tests/test_kubernetes.py index b6db7fb31..c493d7999 100644 --- a/tests/test_kubernetes.py +++ b/tests/test_kubernetes.py @@ -8,9 +8,11 @@ import urllib3 from mock import Mock, PropertyMock, mock_open, patch +from patroni.dcs import get_dcs from patroni.dcs.kubernetes import Cluster, k8s_client, k8s_config, K8sConfig, K8sConnectionFailed, \ K8sException, K8sObject, Kubernetes, KubernetesError, KubernetesRetriableException, \ Retry, RetryFailedError, SERVICE_HOST_ENV_NAME, SERVICE_PORT_ENV_NAME +from patroni.postgresql.mpp import get_mpp from threading import Thread from . import MockResponse, SleepException @@ -225,11 +227,12 @@ class BaseTestKubernetes(unittest.TestCase): @patch.object(k8s_client.CoreV1Api, 'list_namespaced_pod', mock_list_namespaced_pod, create=True) @patch.object(k8s_client.CoreV1Api, 'list_namespaced_config_map', mock_list_namespaced_config_map, create=True) def setUp(self, config=None): - config = config or {} - config.update(ttl=30, scope='test', name='p-0', loop_wait=10, group=0, - retry_timeout=10, labels={'f': 'b'}, bypass_api_service=True) - self.k = Kubernetes(config) - self.k._citus_group = None + config = {'ttl': 30, 'scope': 'test', 'name': 'p-0', 'loop_wait': 10, 'retry_timeout': 10, + 'kubernetes': {'labels': {'f': 'b'}, 'bypass_api_service': True, **(config or {})}, + 'citus': {'group': 0, 'database': 'postgres'}} + self.k = get_dcs(config) + self.assertIsInstance(self.k, Kubernetes) + self.k._mpp = get_mpp({}) self.assertRaises(AttributeError, self.k._pods._build_cache) self.k._pods._is_ready = True self.assertRaises(TypeError, self.k._kinds._build_cache) @@ -254,7 +257,7 @@ def test_get_cluster(self): self.assertRaises(KubernetesError, self.k.get_cluster) def test__get_citus_cluster(self): - self.k._citus_group = '0' + self.k._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.k.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) @@ -466,7 +469,7 @@ class TestCacheBuilder(BaseTestKubernetes): @patch('patroni.dcs.kubernetes.ObjectCache._watch', mock_watch) @patch.object(urllib3.HTTPResponse, 'read_chunked') def test__build_cache(self, mock_read_chunked): - self.k._citus_group = '0' + self.k._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) mock_read_chunked.return_value = [json.dumps( {'type': 'MODIFIED', 'object': {'metadata': { 'name': self.k.config_path, 'resourceVersion': '2', 'annotations': {self.k._CONFIG: 'foo'}}}} diff --git a/tests/test_mpp.py b/tests/test_mpp.py new file mode 100644 index 000000000..9eb876334 --- /dev/null +++ b/tests/test_mpp.py @@ -0,0 +1,52 @@ +from typing import Any +from patroni.exceptions import PatroniException +from patroni.postgresql.mpp import AbstractMPP, get_mpp, Null + +from . import BaseTestPostgresql +from .test_ha import get_cluster_initialized_with_leader + + +class TestMPP(BaseTestPostgresql): + + def setUp(self): + super(TestMPP, self).setUp() + self.cluster = get_cluster_initialized_with_leader() + + def test_get_handler_impl_exception(self): + class DummyMPP(AbstractMPP): + def __init__(self) -> None: + super().__init__({}) + + @staticmethod + def validate_config(config: Any) -> bool: + return True + + @property + def group(self) -> None: + return None + + @property + def coordinator_group_id(self) -> None: + return None + + @property + def type(self) -> str: + return "dummy" + + mpp = DummyMPP() + self.assertRaises(PatroniException, mpp.get_handler_impl, self.p) + + def test_null_handler(self): + config = {} + mpp = get_mpp(config) + self.assertIsInstance(mpp, Null) + self.assertIsNone(mpp.group) + self.assertTrue(mpp.validate_config(config)) + nullHandler = mpp.get_handler_impl(self.p) + self.assertIsNone(nullHandler.handle_event(self.cluster, {})) + self.assertIsNone(nullHandler.sync_meta_data(self.cluster)) + self.assertIsNone(nullHandler.on_demote()) + self.assertIsNone(nullHandler.schedule_cache_rebuild()) + self.assertIsNone(nullHandler.bootstrap()) + self.assertIsNone(nullHandler.adjust_postgres_gucs({})) + self.assertFalse(nullHandler.ignore_replication_slot({})) diff --git a/tests/test_raft.py b/tests/test_raft.py index 9bb109e99..387a5a568 100644 --- a/tests/test_raft.py +++ b/tests/test_raft.py @@ -4,8 +4,10 @@ import time from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.raft import Cluster, DynMemberSyncObj, KVStoreTTL, \ Raft, RaftError, SyncObjUtility, TCPTransport, _TCPTransport +from patroni.postgresql.mpp import get_mpp from pysyncobj import SyncObjConf, FAIL_REASON @@ -128,9 +130,10 @@ class TestRaft(unittest.TestCase): _TMP = tempfile.gettempdir() def test_raft(self): - raft = Raft({'ttl': 30, 'scope': 'test', 'name': 'pg', 'self_addr': '127.0.0.1:1234', - 'retry_timeout': 10, 'data_dir': self._TMP, - 'database': 'citus', 'group': 0}) + raft = get_dcs({'ttl': 30, 'scope': 'test', 'name': 'pg', 'retry_timeout': 10, + 'raft': {'self_addr': '127.0.0.1:1234', 'data_dir': self._TMP}, + 'citus': {'group': 0, 'database': 'postgres'}}) + self.assertIsInstance(raft, Raft) raft.reload_config({'retry_timeout': 20, 'ttl': 60, 'loop_wait': 10}) self.assertTrue(raft._sync_obj.set(raft.members_path + 'legacy', '{"version":"2.0.0"}')) self.assertTrue(raft.touch_member('')) @@ -139,9 +142,9 @@ def test_raft(self): self.assertTrue(raft.set_config_value('{}')) self.assertTrue(raft.write_sync_state('foo', 'bar')) self.assertFalse(raft.write_sync_state('foo', 'bar', 1)) - raft._citus_group = '1' + raft._mpp = get_mpp({'citus': {'group': 1, 'database': 'postgres'}}) self.assertTrue(raft.manual_failover('foo', 'bar')) - raft._citus_group = '0' + raft._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) self.assertTrue(raft.take_leader()) cluster = raft.get_cluster() self.assertIsInstance(cluster, Cluster) @@ -157,9 +160,9 @@ def test_raft(self): self.assertTrue(raft.delete_sync_state()) self.assertTrue(raft.set_history_value('')) self.assertTrue(raft.delete_cluster()) - raft._citus_group = '1' + raft._mpp = get_mpp({'citus': {'group': 1, 'database': 'postgres'}}) self.assertTrue(raft.delete_cluster()) - raft._citus_group = None + raft._mpp = get_mpp({}) raft.get_cluster() raft.watch(None, 0.001) raft._sync_obj.destroy() @@ -175,5 +178,5 @@ def setUp(self): def test_init(self, mock_event, mock_kvstore): mock_kvstore.return_value.applied_local_log = False mock_event.return_value.is_set.side_effect = [False, True] - self.assertIsNotNone(Raft({'ttl': 30, 'scope': 'test', 'name': 'pg', 'patronictl': True, - 'self_addr': '1', 'data_dir': self._TMP})) + self.assertIsInstance(get_dcs({'ttl': 30, 'scope': 'test', 'name': 'pg', 'patronictl': True, + 'raft': {'self_addr': '1', 'data_dir': self._TMP}}), Raft) diff --git a/tests/test_zookeeper.py b/tests/test_zookeeper.py index 3ce3ea75e..3cd034675 100644 --- a/tests/test_zookeeper.py +++ b/tests/test_zookeeper.py @@ -7,8 +7,10 @@ from kazoo.protocol.states import KeeperState, WatchedEvent, ZnodeStat from kazoo.retry import RetryFailedError from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.zookeeper import Cluster, PatroniKazooClient, \ PatroniSequentialThreadingHandler, ZooKeeper, ZooKeeperError +from patroni.postgresql.mpp import get_mpp class MockKazooClient(Mock): @@ -148,9 +150,9 @@ class TestZooKeeper(unittest.TestCase): @patch('patroni.dcs.zookeeper.PatroniKazooClient', MockKazooClient) def setUp(self): - self.zk = ZooKeeper({'hosts': ['localhost:2181'], 'scope': 'test', - 'name': 'foo', 'ttl': 30, 'retry_timeout': 10, 'loop_wait': 10, - 'set_acls': {'CN=principal2': ['ALL']}}) + self.zk = get_dcs({'scope': 'test', 'name': 'foo', 'ttl': 30, 'retry_timeout': 10, 'loop_wait': 10, + 'zookeeper': {'hosts': ['localhost:2181'], 'set_acls': {'CN=principal2': ['ALL']}}}) + self.assertIsInstance(self.zk, ZooKeeper) def test_reload_config(self): self.zk.reload_config({'ttl': 20, 'retry_timeout': 10, 'loop_wait': 10}) @@ -177,7 +179,7 @@ def test_get_cluster(self): self.assertEqual(cluster.last_lsn, 500) def test__get_citus_cluster(self): - self.zk._citus_group = '0' + self.zk._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) for _ in range(0, 2): cluster = self.zk.get_cluster() self.assertIsInstance(cluster, Cluster) From dd548c49645c78d0b85cc723bf3bec500528c3da Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Thu, 21 Dec 2023 09:25:51 +0100 Subject: [PATCH 31/33] Create citus database and extension idempotently (#2990) Consider a task: we want to create an extension _before_ citus in a database. Currently `post_bootstrab` script is executed before `CitusHandler.bootstrap()` method, which seems to allow doing that, but in fact `CitusHandler.bootstrap()` will fail to create already existing database and as a result the whole bootstrap will fail. Changing the order of execution of `post_bootstrab` hook and `CitusHandler.bootstrap()` seems to be useless, because it will not allow creating another extension _before_ citus. Therefore the only way of solving it is making CREATE DATABASE and CREATE EXTENSION idempotent. It will allow to create citus database and all dependencies from the `post_bootstrab` hook. --- patroni/postgresql/mpp/citus.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/patroni/postgresql/mpp/citus.py b/patroni/postgresql/mpp/citus.py index a17d1435b..b8c205ce8 100644 --- a/patroni/postgresql/mpp/citus.py +++ b/patroni/postgresql/mpp/citus.py @@ -8,7 +8,7 @@ from . import AbstractMPP, AbstractMPPHandler from ...dcs import Cluster -from ...psycopg import connect, quote_ident +from ...psycopg import connect, quote_ident, quote_literal from ...utils import parse_int if TYPE_CHECKING: # pragma: no cover @@ -389,9 +389,16 @@ def bootstrap(self) -> None: if self._config['database'] != self._postgresql.database: conn = connect(**conn_kwargs) try: + database = self._config['database'] + sql = """DO $$ +BEGIN + PERFORM * FROM pg_catalog.pg_database WHERE datname = {0}; + IF NOT FOUND THEN + CREATE DATABASE {1}; + END IF; +END;$$""".format(quote_literal(database), quote_ident(database, conn)) with conn.cursor() as cur: - cur.execute('CREATE DATABASE {0}'.format( - quote_ident(self._config['database'], conn)).encode('utf-8')) + cur.execute(sql.encode('utf-8')) finally: conn.close() @@ -399,7 +406,7 @@ def bootstrap(self) -> None: conn = connect(**conn_kwargs) try: with conn.cursor() as cur: - cur.execute('CREATE EXTENSION citus') + cur.execute('CREATE EXTENSION IF NOT EXISTS citus') superuser = self._postgresql.config.superuser params = {k: superuser[k] for k in ('password', 'sslcert', 'sslkey') if k in superuser} From 8acefefc4238b9d32cda2e8670ba53e212837c7c Mon Sep 17 00:00:00 2001 From: zhjwpku Date: Fri, 29 Dec 2023 16:01:46 +0800 Subject: [PATCH 32/33] Fix Citus bootstrap - CREATE DATABASE cannot be executed from a function (#2994) This was introduced by #2990: pod cannot be started and show the following logs: ``` 2023-12-26 03:29:25.569 UTC [47] CONTEXT: SQL statement "CREATE DATABASE "citus"" PL/pgSQL function inline_code_block line 5 at SQL statement 2023-12-26 03:29:25.569 UTC [47] STATEMENT: DO $$ BEGIN PERFORM * FROM pg_catalog.pg_database WHERE datname = 'citus'; IF NOT FOUND THEN CREATE DATABASE "citus"; END IF; END;$$ 2023-12-26 03:29:25,570 ERROR: post_bootstrap Traceback (most recent call last): File "/usr/local/lib/python3.11/dist-packages/patroni/postgresql/bootstrap.py", line 474, in post_bootstrap self._postgresql.citus_handler.bootstrap() File "/usr/local/lib/python3.11/dist-packages/patroni/postgresql/mpp/citus.py", line 401, in bootstrap cur.execute(sql.encode('utf-8')) psycopg2.errors.ActiveSqlTransaction: CREATE DATABASE cannot be executed from a function CONTEXT: SQL statement "CREATE DATABASE "citus"" PL/pgSQL function inline_code_block line 5 at SQL statement ``` --------- Signed-off-by: Zhao Junwang --- patroni/postgresql/mpp/citus.py | 15 +++++---------- patroni/psycopg.py | 5 ++++- tests/__init__.py | 2 ++ tests/test_citus.py | 8 ++++++++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/patroni/postgresql/mpp/citus.py b/patroni/postgresql/mpp/citus.py index b8c205ce8..f3d6394c3 100644 --- a/patroni/postgresql/mpp/citus.py +++ b/patroni/postgresql/mpp/citus.py @@ -8,7 +8,7 @@ from . import AbstractMPP, AbstractMPPHandler from ...dcs import Cluster -from ...psycopg import connect, quote_ident, quote_literal +from ...psycopg import connect, quote_ident, DuplicateDatabase from ...utils import parse_int if TYPE_CHECKING: # pragma: no cover @@ -389,16 +389,11 @@ def bootstrap(self) -> None: if self._config['database'] != self._postgresql.database: conn = connect(**conn_kwargs) try: - database = self._config['database'] - sql = """DO $$ -BEGIN - PERFORM * FROM pg_catalog.pg_database WHERE datname = {0}; - IF NOT FOUND THEN - CREATE DATABASE {1}; - END IF; -END;$$""".format(quote_literal(database), quote_ident(database, conn)) with conn.cursor() as cur: - cur.execute(sql.encode('utf-8')) + cur.execute('CREATE DATABASE {0}'.format( + quote_ident(self._config['database'], conn)).encode('utf-8')) + except DuplicateDatabase as e: + logger.debug('Exception when creating database: %r', e) finally: conn.close() diff --git a/patroni/psycopg.py b/patroni/psycopg.py index 4a92047ca..5d47ad5c5 100644 --- a/patroni/psycopg.py +++ b/patroni/psycopg.py @@ -9,7 +9,8 @@ from psycopg import Connection from psycopg2 import connection, cursor -__all__ = ['connect', 'quote_ident', 'quote_literal', 'DatabaseError', 'Error', 'OperationalError', 'ProgrammingError'] +__all__ = ['connect', 'quote_ident', 'quote_literal', 'DatabaseError', 'Error', 'OperationalError', 'ProgrammingError', + 'DuplicateDatabase'] _legacy = False try: @@ -18,6 +19,7 @@ if parse_version(__version__) < MIN_PSYCOPG2: raise ImportError from psycopg2 import connect as _connect, Error, DatabaseError, OperationalError, ProgrammingError + from psycopg2.errors import DuplicateDatabase from psycopg2.extensions import adapt try: @@ -43,6 +45,7 @@ def quote_literal(value: Any, conn: Optional[Any] = None) -> str: return value.getquoted().decode('utf-8') except ImportError: from psycopg import connect as __connect, sql, Error, DatabaseError, OperationalError, ProgrammingError + from psycopg.errors import DuplicateDatabase def _connect(dsn: Optional[str] = None, **kwargs: Any) -> 'Connection[Any]': """Call :func:`psycopg.connect` with *dsn* and ``**kwargs``. diff --git a/tests/__init__.py b/tests/__init__.py index 986bd88b0..2f3730f69 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -129,6 +129,8 @@ def execute(self, sql, *params): sql = sql.decode('utf-8') if sql.startswith('blabla'): raise psycopg.ProgrammingError() + if sql.startswith('CREATE DATABASE'): + raise psycopg.DuplicateDatabase() elif sql == 'CHECKPOINT' or sql.startswith('SELECT pg_catalog.pg_create_'): raise psycopg.OperationalError() elif sql.startswith('RetryFailedError'): diff --git a/tests/test_citus.py b/tests/test_citus.py index dbf6d9cf6..f1d8a020f 100644 --- a/tests/test_citus.py +++ b/tests/test_citus.py @@ -157,3 +157,11 @@ def test_ignore_replication_slot(self): 'type': 'logical', 'database': 'citus', 'plugin': 'pgoutput'})) self.assertTrue(self.c.ignore_replication_slot({'name': 'citus_shard_split_slot_1_2_3', 'type': 'logical', 'database': 'citus', 'plugin': 'citus'})) + + @patch('patroni.postgresql.mpp.citus.logger.debug') + @patch('patroni.postgresql.mpp.citus.connect', psycopg_connect) + @patch('patroni.postgresql.mpp.citus.quote_ident', Mock()) + def test_bootstrap_duplicate_database(self, mock_logger): + self.c.bootstrap() + mock_logger.assert_called_once() + self.assertTrue(mock_logger.call_args[0][0].startswith('Exception when creating database')) From 71ccf91e3672be7879f5e5d7317d13b58f87220f Mon Sep 17 00:00:00 2001 From: Polina Bungina <27892524+hughcapet@users.noreply.github.com> Date: Tue, 2 Jan 2024 11:30:18 +0300 Subject: [PATCH 33/33] Don't filter out contradictory nofailover tag (#2992) * Ensure that nofailover will always be used if both nofailover and failover_priority tags are provided * Call _validate_failover_tags from reload_local_configuration() as well * Properly check values in the _validate_failover_tags(): nofailover value should be casted to boolean like it is done when accessed in other places --- features/priority_failover.feature | 18 ++++++++ features/steps/basic_replication.py | 2 +- features/steps/patroni_api.py | 6 +++ patroni/config.py | 13 +++--- patroni/tags.py | 10 +++-- postgres0.yml | 2 +- postgres1.yml | 2 +- postgres2.yml | 2 +- tests/test_config.py | 68 +++++++++++++---------------- tests/test_patroni.py | 16 +++++++ 10 files changed, 89 insertions(+), 50 deletions(-) diff --git a/features/priority_failover.feature b/features/priority_failover.feature index b33dd0456..acb1cb5a4 100644 --- a/features/priority_failover.feature +++ b/features/priority_failover.feature @@ -21,3 +21,21 @@ Feature: priority replication And I sleep for 5 seconds Then postgres3 role is the primary after 10 seconds And there is one of ["postgres3 has equally tolerable WAL position and priority 2, while this node has priority 1","Wal position of postgres3 is ahead of my wal position"] INFO in the postgres2 patroni log after 5 seconds + + Scenario: check conflicting configuration handling + When I set nofailover tag in postgres2 config + And I issue an empty POST request to http://127.0.0.1:8010/reload + Then I receive a response code 202 + And there is one of ["Conflicting configuration between nofailover: True and failover_priority: 1. Defaulting to nofailover: True"] WARNING in the postgres2 patroni log after 5 seconds + And "members/postgres2" key in DCS has tags={'failover_priority': '1', 'nofailover': True} after 10 seconds + When I issue a POST request to http://127.0.0.1:8010/failover with {"candidate": "postgres2"} + Then I receive a response code 412 + And I receive a response text "failover is not possible: no good candidates have been found" + When I reset nofailover tag in postgres1 config + And I issue an empty POST request to http://127.0.0.1:8009/reload + Then I receive a response code 202 + And there is one of ["Conflicting configuration between nofailover: False and failover_priority: 0. Defaulting to nofailover: False"] WARNING in the postgres1 patroni log after 5 seconds + And "members/postgres1" key in DCS has tags={'failover_priority': '0', 'nofailover': False} after 10 seconds + And I issue a POST request to http://127.0.0.1:8010/failover with {"candidate": "postgres1"} + Then I receive a response code 200 + And postgres1 role is the primary after 10 seconds diff --git a/features/steps/basic_replication.py b/features/steps/basic_replication.py index d70c6d0ee..f2db7110f 100644 --- a/features/steps/basic_replication.py +++ b/features/steps/basic_replication.py @@ -123,6 +123,6 @@ def check_patroni_log(context, message_list, level, node, timeout): messsages_of_level = context.pctl.read_patroni_log(node, level) if any(any(message in line for line in messsages_of_level) for message in message_list): break - time.sleep(1) + sleep(1) else: assert False, f"There were none of {message_list} {level} in the {node} patroni log after {timeout} seconds" diff --git a/features/steps/patroni_api.py b/features/steps/patroni_api.py index 2c76d32df..74a7c0da8 100644 --- a/features/steps/patroni_api.py +++ b/features/steps/patroni_api.py @@ -128,6 +128,12 @@ def scheduled_restart(context, url, in_seconds, data): context.execute_steps(u"""Given I issue a POST request to {0}/restart with {1}""".format(url, json.dumps(data))) +@step('I {action:w} {tag:w} tag in {pg_name:w} config') +def add_bool_tag_to_config(context, action, tag, pg_name): + value = action == 'set' + context.pctl.add_tag_to_config(pg_name, tag, value) + + @step('I add tag {tag:w} {value:w} to {pg_name:w} config') def add_tag_to_config(context, tag, value, pg_name): context.pctl.add_tag_to_config(pg_name, tag, value) diff --git a/patroni/config.py b/patroni/config.py index e523bc080..f7d648d5f 100644 --- a/patroni/config.py +++ b/patroni/config.py @@ -142,10 +142,10 @@ def __init__(self, configfile: str, self.__effective_configuration = self._build_effective_configuration({}, self._local_configuration) self._data_dir = self.__effective_configuration.get('postgresql', {}).get('data_dir', "") self._cache_file = os.path.join(self._data_dir, self.__CACHE_FILENAME) - if validator: # patronictl uses validator=None and we don't want to load anything from local cache in this case - self._load_cache() + if validator: # patronictl uses validator=None + self._load_cache() # we don't want to load anything from local cache for ctl + self._validate_failover_tags() # irrelevant for ctl self._cache_needs_saving = False - self._validate_failover_tags() @property def config_file(self) -> Optional[str]: @@ -356,6 +356,7 @@ def reload_local_configuration(self) -> Optional[bool]: new_configuration = self._build_effective_configuration(self._dynamic_configuration, configuration) self._local_configuration = configuration self.__effective_configuration = new_configuration + self._validate_failover_tags() return True else: logger.info('No local configuration items changed.') @@ -814,10 +815,12 @@ def _validate_failover_tags(self) -> None: bedrock source of truth) """ tags = self.get('tags', {}) + if 'nofailover' not in tags: + return nofailover_tag = tags.get('nofailover') failover_priority_tag = parse_int(tags.get('failover_priority')) if failover_priority_tag is not None \ - and (nofailover_tag is True and failover_priority_tag > 0 - or nofailover_tag is False and failover_priority_tag <= 0): + and (bool(nofailover_tag) is True and failover_priority_tag > 0 + or bool(nofailover_tag) is False and failover_priority_tag <= 0): logger.warning('Conflicting configuration between nofailover: %s and failover_priority: %s. ' 'Defaulting to nofailover: %s', nofailover_tag, failover_priority_tag, nofailover_tag) diff --git a/patroni/tags.py b/patroni/tags.py index 998ff6934..eedc96742 100644 --- a/patroni/tags.py +++ b/patroni/tags.py @@ -22,14 +22,18 @@ def _filter_tags(tags: Dict[str, Any]) -> Dict[str, Any]: A custom tag is any tag added to the configuration ``tags`` section that is not one of ``clonefrom``, ``nofailover``, ``noloadbalance`` or ``nosync``. - For the Patroni predefined tags, the returning object will only contain them if they are enabled as they - all are boolean values that default to disabled. + For most of the Patroni predefined tags, the returning object will only contain them if they are enabled as + they all are boolean values that default to disabled. + However ``nofailover`` tag is always returned if ``failover_priority`` tag is defined. In this case, we need + both values to see if they are contradictory and the ``nofailover`` value should be used. :returns: a dictionary of tags set for this node. The key is the tag name, and the value is the corresponding tag value. """ return {tag: value for tag, value in tags.items() - if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value} + if any((tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync'), + value, + tag == 'nofailover' and 'failover_priority' in tags))} @property @abc.abstractmethod diff --git a/postgres0.yml b/postgres0.yml index 8a975156c..84796a469 100644 --- a/postgres0.yml +++ b/postgres0.yml @@ -132,7 +132,7 @@ postgresql: # safety_margin: 5 tags: - nofailover: false + # failover_priority: 1 noloadbalance: false clonefrom: false nosync: false diff --git a/postgres1.yml b/postgres1.yml index 6ca2aa646..c86e8790d 100644 --- a/postgres1.yml +++ b/postgres1.yml @@ -124,6 +124,6 @@ postgresql: #pre_promote: /path/to/pre_promote.sh tags: - nofailover: false + # failover_priority: 1 noloadbalance: false clonefrom: false diff --git a/postgres2.yml b/postgres2.yml index ee61a0232..7384568ec 100644 --- a/postgres2.yml +++ b/postgres2.yml @@ -114,7 +114,7 @@ postgresql: # krb_server_keyfile: /var/spool/keytabs/postgres unix_socket_directories: '..' # parent directory of data_dir tags: - nofailover: false + # failover_priority: 1 noloadbalance: false clonefrom: false # replicatefrom: postgresql1 diff --git a/tests/test_config.py b/tests/test_config.py index 7bf01f564..a02a33fda 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -159,48 +159,40 @@ def test_invalid_path(self): @patch('patroni.config.logger') def test__validate_failover_tags(self, mock_logger, mock_get): """Ensures that only one of `nofailover` or `failover_priority` can be provided""" - mock_logger.warning.reset_mock() config = Config("postgres0.yml") + # Providing one of `nofailover` or `failover_priority` is fine - just_nofailover = {"nofailover": True} - mock_get.side_effect = [just_nofailover] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() - just_failover_priority = {"failover_priority": 1} - mock_get.side_effect = [just_failover_priority] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() + for single_param in ({"nofailover": True}, {"failover_priority": 1}, {"failover_priority": 0}): + mock_get.side_effect = [single_param] * 2 + self.assertIsNone(config._validate_failover_tags()) + mock_logger.warning.assert_not_called() + # Providing both `nofailover` and `failover_priority` is fine if consistent - consistent_false = {"nofailover": False, "failover_priority": 1} - mock_get.side_effect = [consistent_false] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() - consistent_true = {"nofailover": True, "failover_priority": 0} - mock_get.side_effect = [consistent_true] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() + for consistent_state in ( + {"nofailover": False, "failover_priority": 1}, + {"nofailover": True, "failover_priority": 0}, + {"nofailover": "False", "failover_priority": 0} + ): + mock_get.side_effect = [consistent_state] * 2 + self.assertIsNone(config._validate_failover_tags()) + mock_logger.warning.assert_not_called() + # Providing both inconsistently should log a warning - inconsistent_false = {"nofailover": False, "failover_priority": 0} - mock_get.side_effect = [inconsistent_false] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_called_once_with( - 'Conflicting configuration between nofailover: %s and failover_priority: %s.' - + ' Defaulting to nofailover: %s', - False, - 0, - False - ) - mock_logger.warning.reset_mock() - inconsistent_true = {"nofailover": True, "failover_priority": 1} - mock_get.side_effect = [inconsistent_true] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_called_once_with( - 'Conflicting configuration between nofailover: %s and failover_priority: %s.' - + ' Defaulting to nofailover: %s', - True, - 1, - True - ) + for inconsistent_state in ( + {"nofailover": False, "failover_priority": 0}, + {"nofailover": True, "failover_priority": 1}, + {"nofailover": "False", "failover_priority": 1}, + {"nofailover": "", "failover_priority": 0} + ): + mock_get.side_effect = [inconsistent_state] * 2 + self.assertIsNone(config._validate_failover_tags()) + mock_logger.warning.assert_called_once_with( + 'Conflicting configuration between nofailover: %s and failover_priority: %s.' + + ' Defaulting to nofailover: %s', + inconsistent_state['nofailover'], + inconsistent_state['failover_priority'], + inconsistent_state['nofailover']) + mock_logger.warning.reset_mock() def test__process_postgresql_parameters(self): expected_params = { diff --git a/tests/test_patroni.py b/tests/test_patroni.py index 8e08d4069..2f8428b1c 100644 --- a/tests/test_patroni.py +++ b/tests/test_patroni.py @@ -175,6 +175,20 @@ def test_schedule_next_run(self): self.p.next_run = time.time() - self.p.dcs.loop_wait - 1 self.p.schedule_next_run() + def test__filter_tags(self): + tags = {'noloadbalance': False, 'clonefrom': False, 'nosync': False, 'smth': 'random'} + self.assertEqual(self.p._filter_tags(tags), {'smth': 'random'}) + + tags['clonefrom'] = True + tags['smth'] = False + self.assertEqual(self.p._filter_tags(tags), {'clonefrom': True, 'smth': False}) + + tags = {'nofailover': False, 'failover_priority': 0} + self.assertEqual(self.p._filter_tags(tags), tags) + + tags = {'nofailover': True, 'failover_priority': 1} + self.assertEqual(self.p._filter_tags(tags), tags) + def test_noloadbalance(self): self.p.tags['noloadbalance'] = True self.assertTrue(self.p.noloadbalance) @@ -186,9 +200,11 @@ def test_nofailover(self): # Setting `nofailover: True` has precedence (True, 0, True), (True, 1, True), + ('False', 1, True), # because we use bool() for the value # Similarly, setting `nofailover: False` has precedence (False, 0, False), (False, 1, False), + ('', 0, False), # Only when we have `nofailover: None` should we got based on priority (None, 0, True), (None, 1, False),