Skip to content

Commit

Permalink
Don't filter out contradictory nofailover tag (patroni#2992)
Browse files Browse the repository at this point in the history
* Ensure that nofailover will always be used if both nofailover and
failover_priority tags are provided
* Call _validate_failover_tags from reload_local_configuration() as well
* Properly check values in the _validate_failover_tags(): nofailover value should be casted to boolean like it is done when accessed in other places
  • Loading branch information
hughcapet authored Jan 2, 2024
1 parent 8acefef commit 71ccf91
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 50 deletions.
18 changes: 18 additions & 0 deletions features/priority_failover.feature
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,21 @@ Feature: priority replication
And I sleep for 5 seconds
Then postgres3 role is the primary after 10 seconds
And there is one of ["postgres3 has equally tolerable WAL position and priority 2, while this node has priority 1","Wal position of postgres3 is ahead of my wal position"] INFO in the postgres2 patroni log after 5 seconds

Scenario: check conflicting configuration handling
When I set nofailover tag in postgres2 config
And I issue an empty POST request to http://127.0.0.1:8010/reload
Then I receive a response code 202
And there is one of ["Conflicting configuration between nofailover: True and failover_priority: 1. Defaulting to nofailover: True"] WARNING in the postgres2 patroni log after 5 seconds
And "members/postgres2" key in DCS has tags={'failover_priority': '1', 'nofailover': True} after 10 seconds
When I issue a POST request to http://127.0.0.1:8010/failover with {"candidate": "postgres2"}
Then I receive a response code 412
And I receive a response text "failover is not possible: no good candidates have been found"
When I reset nofailover tag in postgres1 config
And I issue an empty POST request to http://127.0.0.1:8009/reload
Then I receive a response code 202
And there is one of ["Conflicting configuration between nofailover: False and failover_priority: 0. Defaulting to nofailover: False"] WARNING in the postgres1 patroni log after 5 seconds
And "members/postgres1" key in DCS has tags={'failover_priority': '0', 'nofailover': False} after 10 seconds
And I issue a POST request to http://127.0.0.1:8010/failover with {"candidate": "postgres1"}
Then I receive a response code 200
And postgres1 role is the primary after 10 seconds
2 changes: 1 addition & 1 deletion features/steps/basic_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,6 @@ def check_patroni_log(context, message_list, level, node, timeout):
messsages_of_level = context.pctl.read_patroni_log(node, level)
if any(any(message in line for line in messsages_of_level) for message in message_list):
break
time.sleep(1)
sleep(1)
else:
assert False, f"There were none of {message_list} {level} in the {node} patroni log after {timeout} seconds"
6 changes: 6 additions & 0 deletions features/steps/patroni_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def scheduled_restart(context, url, in_seconds, data):
context.execute_steps(u"""Given I issue a POST request to {0}/restart with {1}""".format(url, json.dumps(data)))


@step('I {action:w} {tag:w} tag in {pg_name:w} config')
def add_bool_tag_to_config(context, action, tag, pg_name):
value = action == 'set'
context.pctl.add_tag_to_config(pg_name, tag, value)


@step('I add tag {tag:w} {value:w} to {pg_name:w} config')
def add_tag_to_config(context, tag, value, pg_name):
context.pctl.add_tag_to_config(pg_name, tag, value)
Expand Down
13 changes: 8 additions & 5 deletions patroni/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ def __init__(self, configfile: str,
self.__effective_configuration = self._build_effective_configuration({}, self._local_configuration)
self._data_dir = self.__effective_configuration.get('postgresql', {}).get('data_dir', "")
self._cache_file = os.path.join(self._data_dir, self.__CACHE_FILENAME)
if validator: # patronictl uses validator=None and we don't want to load anything from local cache in this case
self._load_cache()
if validator: # patronictl uses validator=None
self._load_cache() # we don't want to load anything from local cache for ctl
self._validate_failover_tags() # irrelevant for ctl
self._cache_needs_saving = False
self._validate_failover_tags()

@property
def config_file(self) -> Optional[str]:
Expand Down Expand Up @@ -356,6 +356,7 @@ def reload_local_configuration(self) -> Optional[bool]:
new_configuration = self._build_effective_configuration(self._dynamic_configuration, configuration)
self._local_configuration = configuration
self.__effective_configuration = new_configuration
self._validate_failover_tags()
return True
else:
logger.info('No local configuration items changed.')
Expand Down Expand Up @@ -814,10 +815,12 @@ def _validate_failover_tags(self) -> None:
bedrock source of truth)
"""
tags = self.get('tags', {})
if 'nofailover' not in tags:
return
nofailover_tag = tags.get('nofailover')
failover_priority_tag = parse_int(tags.get('failover_priority'))
if failover_priority_tag is not None \
and (nofailover_tag is True and failover_priority_tag > 0
or nofailover_tag is False and failover_priority_tag <= 0):
and (bool(nofailover_tag) is True and failover_priority_tag > 0
or bool(nofailover_tag) is False and failover_priority_tag <= 0):
logger.warning('Conflicting configuration between nofailover: %s and failover_priority: %s. '
'Defaulting to nofailover: %s', nofailover_tag, failover_priority_tag, nofailover_tag)
10 changes: 7 additions & 3 deletions patroni/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,18 @@ def _filter_tags(tags: Dict[str, Any]) -> Dict[str, Any]:
A custom tag is any tag added to the configuration ``tags`` section that is not one of ``clonefrom``,
``nofailover``, ``noloadbalance`` or ``nosync``.
For the Patroni predefined tags, the returning object will only contain them if they are enabled as they
all are boolean values that default to disabled.
For most of the Patroni predefined tags, the returning object will only contain them if they are enabled as
they all are boolean values that default to disabled.
However ``nofailover`` tag is always returned if ``failover_priority`` tag is defined. In this case, we need
both values to see if they are contradictory and the ``nofailover`` value should be used.
:returns: a dictionary of tags set for this node. The key is the tag name, and the value is the corresponding
tag value.
"""
return {tag: value for tag, value in tags.items()
if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value}
if any((tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync'),
value,
tag == 'nofailover' and 'failover_priority' in tags))}

@property
@abc.abstractmethod
Expand Down
2 changes: 1 addition & 1 deletion postgres0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ postgresql:
# safety_margin: 5

tags:
nofailover: false
# failover_priority: 1
noloadbalance: false
clonefrom: false
nosync: false
2 changes: 1 addition & 1 deletion postgres1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,6 @@ postgresql:
#pre_promote: /path/to/pre_promote.sh

tags:
nofailover: false
# failover_priority: 1
noloadbalance: false
clonefrom: false
2 changes: 1 addition & 1 deletion postgres2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ postgresql:
# krb_server_keyfile: /var/spool/keytabs/postgres
unix_socket_directories: '..' # parent directory of data_dir
tags:
nofailover: false
# failover_priority: 1
noloadbalance: false
clonefrom: false
# replicatefrom: postgresql1
68 changes: 30 additions & 38 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,48 +159,40 @@ def test_invalid_path(self):
@patch('patroni.config.logger')
def test__validate_failover_tags(self, mock_logger, mock_get):
"""Ensures that only one of `nofailover` or `failover_priority` can be provided"""
mock_logger.warning.reset_mock()
config = Config("postgres0.yml")

# Providing one of `nofailover` or `failover_priority` is fine
just_nofailover = {"nofailover": True}
mock_get.side_effect = [just_nofailover] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_not_called()
just_failover_priority = {"failover_priority": 1}
mock_get.side_effect = [just_failover_priority] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_not_called()
for single_param in ({"nofailover": True}, {"failover_priority": 1}, {"failover_priority": 0}):
mock_get.side_effect = [single_param] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_not_called()

# Providing both `nofailover` and `failover_priority` is fine if consistent
consistent_false = {"nofailover": False, "failover_priority": 1}
mock_get.side_effect = [consistent_false] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_not_called()
consistent_true = {"nofailover": True, "failover_priority": 0}
mock_get.side_effect = [consistent_true] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_not_called()
for consistent_state in (
{"nofailover": False, "failover_priority": 1},
{"nofailover": True, "failover_priority": 0},
{"nofailover": "False", "failover_priority": 0}
):
mock_get.side_effect = [consistent_state] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_not_called()

# Providing both inconsistently should log a warning
inconsistent_false = {"nofailover": False, "failover_priority": 0}
mock_get.side_effect = [inconsistent_false] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_called_once_with(
'Conflicting configuration between nofailover: %s and failover_priority: %s.'
+ ' Defaulting to nofailover: %s',
False,
0,
False
)
mock_logger.warning.reset_mock()
inconsistent_true = {"nofailover": True, "failover_priority": 1}
mock_get.side_effect = [inconsistent_true] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_called_once_with(
'Conflicting configuration between nofailover: %s and failover_priority: %s.'
+ ' Defaulting to nofailover: %s',
True,
1,
True
)
for inconsistent_state in (
{"nofailover": False, "failover_priority": 0},
{"nofailover": True, "failover_priority": 1},
{"nofailover": "False", "failover_priority": 1},
{"nofailover": "", "failover_priority": 0}
):
mock_get.side_effect = [inconsistent_state] * 2
self.assertIsNone(config._validate_failover_tags())
mock_logger.warning.assert_called_once_with(
'Conflicting configuration between nofailover: %s and failover_priority: %s.'
+ ' Defaulting to nofailover: %s',
inconsistent_state['nofailover'],
inconsistent_state['failover_priority'],
inconsistent_state['nofailover'])
mock_logger.warning.reset_mock()

def test__process_postgresql_parameters(self):
expected_params = {
Expand Down
16 changes: 16 additions & 0 deletions tests/test_patroni.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,20 @@ def test_schedule_next_run(self):
self.p.next_run = time.time() - self.p.dcs.loop_wait - 1
self.p.schedule_next_run()

def test__filter_tags(self):
tags = {'noloadbalance': False, 'clonefrom': False, 'nosync': False, 'smth': 'random'}
self.assertEqual(self.p._filter_tags(tags), {'smth': 'random'})

tags['clonefrom'] = True
tags['smth'] = False
self.assertEqual(self.p._filter_tags(tags), {'clonefrom': True, 'smth': False})

tags = {'nofailover': False, 'failover_priority': 0}
self.assertEqual(self.p._filter_tags(tags), tags)

tags = {'nofailover': True, 'failover_priority': 1}
self.assertEqual(self.p._filter_tags(tags), tags)

def test_noloadbalance(self):
self.p.tags['noloadbalance'] = True
self.assertTrue(self.p.noloadbalance)
Expand All @@ -186,9 +200,11 @@ def test_nofailover(self):
# Setting `nofailover: True` has precedence
(True, 0, True),
(True, 1, True),
('False', 1, True), # because we use bool() for the value
# Similarly, setting `nofailover: False` has precedence
(False, 0, False),
(False, 1, False),
('', 0, False),
# Only when we have `nofailover: None` should we got based on priority
(None, 0, True),
(None, 1, False),
Expand Down

0 comments on commit 71ccf91

Please sign in to comment.