Skip to content

Commit

Permalink
Shut down on join 400 (#6473)
Browse files Browse the repository at this point in the history
  • Loading branch information
achamayou authored Sep 11, 2024
1 parent 8baf2e5 commit 4093777
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Changed

- The `set_jwt_issuer` governance action has been updated, and no longer accepts `key_filter` or `key_policy` arguments (#6450).
- Nodes started in `Join` mode will shut down if they receive and unrecoverable condition such as `StartupSeqnoIsOld` when attempting to join (#6471).

### Removed

Expand Down
13 changes: 12 additions & 1 deletion src/node/node_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,18 @@ namespace ccf
return;
}

if (status != HTTP_STATUS_OK)
if (status == HTTP_STATUS_BAD_REQUEST)
{
auto error_msg = fmt::format(
"Join request to {} returned 400 Bad Request: {}. Shutting "
"down node gracefully.",
config.join.target_rpc_address,
std::string(data.begin(), data.end()));
LOG_FAIL_FMT("{}", error_msg);
RINGBUFFER_WRITE_MESSAGE(
AdminMessage::fatal_error_msg, to_host, error_msg);
}
else if (status != HTTP_STATUS_OK)
{
const auto& location = headers.find(http::headers::LOCATION);
if (
Expand Down
5 changes: 3 additions & 2 deletions tests/infra/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,8 +905,9 @@ def run_join_node(
)
except TimeoutError as e:
LOG.error(f"New pending node {node.node_id} failed to join the network")
has_stopped = node.remote.check_done()
if stop_on_error:
assert node.remote.check_done()
assert has_stopped, "Node should have stopped"
node.stop()
out_path, err_path = node.get_logs()
if out_path is not None and err_path is not None:
Expand All @@ -920,7 +921,7 @@ def run_join_node(
if "Quote does not contain known enclave measurement" in error:
raise CodeIdNotFound from e
if "StartupSeqnoIsOld" in error:
raise StartupSeqnoIsOld from e
raise StartupSeqnoIsOld(has_stopped) from e
if "invalid cert on handshake" in error:
raise ServiceCertificateInvalid from e
raise
Expand Down
10 changes: 8 additions & 2 deletions tests/reconfiguration.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,10 +864,13 @@ def run_join_old_snapshot(args):
snapshots_dir=tmp_dir,
timeout=3,
)
except infra.network.StartupSeqnoIsOld:
except infra.network.StartupSeqnoIsOld as e:
LOG.info(
f"Node {new_node.local_node_id} started from old snapshot could not join the service, as expected"
)
assert e.args == (
True,
), "Node has stopped on receiving StartupSeqnoIsOld"
else:
raise RuntimeError(
f"Node {new_node.local_node_id} started from old snapshot unexpectedly joined the service"
Expand All @@ -883,10 +886,13 @@ def run_join_old_snapshot(args):
from_snapshot=False,
timeout=3,
)
except infra.network.StartupSeqnoIsOld:
except infra.network.StartupSeqnoIsOld as e:
LOG.info(
f"Node {new_node.local_node_id} started without snapshot could not join the service, as expected"
)
assert e.args == (
True,
), "Node has stopped on receiving StartupSeqnoIsOld"
else:
raise RuntimeError(
f"Node {new_node.local_node_id} started without snapshot unexpectedly joined the service successfully"
Expand Down

0 comments on commit 4093777

Please sign in to comment.