Skip to content

Commit

Permalink
ResetFailed and ResetFailedUnit
Browse files Browse the repository at this point in the history
This PR will add bluechi the ability to reset failed all units or reset
one failed unit.

Solves: #932
Signed-off-by: Artiom Divak <[email protected]>
  • Loading branch information
ArtiomDivak committed Sep 4, 2024
1 parent a890cbb commit 9f7478d
Show file tree
Hide file tree
Showing 14 changed files with 287 additions and 18 deletions.
16 changes: 16 additions & 0 deletions data/org.eclipse.bluechi.Node.xml
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,22 @@
<arg name="level" type="s" direction="in" />
</method>

<!--
ResetFailed:
Reset all the failed units on the node
-->
<method name="ResetFailed" />

<!--
ResetFailedUnit:
Reset the failed state of a specific unit on the node.
@name Name of the unit to reset the failed state for.
-->
<method name="ResetFailedUnit">
<arg name="name" type="s" direction="in" />
</method>


<!--
Name:
Expand Down
4 changes: 4 additions & 0 deletions data/org.eclipse.bluechi.internal.Agent.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@
<method name="SetLogLevel">
<arg name="level" type="s" direction="in" />
</method>
<method name="ResetFailed" />
<method name="ResetFailedUnit">
<arg name="name" type="s" direction="in" />
</method>

<signal name="JobDone">
<arg name="id" type="u" />
Expand Down
2 changes: 2 additions & 0 deletions src/agent/agent.c
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,8 @@ static const sd_bus_vtable internal_agent_vtable[] = {
SD_BUS_METHOD("EnableUnitFiles", "asbb", "ba(sss)", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("DisableUnitFiles", "asb", "a(sss)", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("Reload", "", "", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("ResetFailed", "", "", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("ResetFailedUnit", "s", "", agent_method_passthrough_to_systemd, 0),
SD_BUS_VTABLE_END
};

Expand Down
17 changes: 17 additions & 0 deletions src/bindings/python/bluechi/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,23 @@ def reload_unit(self, name: str, mode: str) -> ObjPath:
mode,
)

def reset_failed(self) -> None:
"""
ResetFailed:
Reset all the failed units on the node
"""
self.get_proxy().ResetFailed()

def reset_failed_unit(self, name: str) -> None:
"""
ResetFailedUnit:
Reset the failed state of a specific unit on the node.
@name Name of the unit to reset the failed state for.
"""
self.get_proxy().ResetFailedUnit(
name,
)

def restart_unit(self, name: str, mode: str) -> ObjPath:
"""
RestartUnit:
Expand Down
38 changes: 20 additions & 18 deletions src/client/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,26 @@ int method_version(UNUSED Command *command, UNUSED void *userdata) {
}

const Method methods[] = {
{ "help", 0, 0, OPT_NONE, method_help, usage_bluechi },
{ "list-unit-files", 0, 1, OPT_FILTER, method_list_unit_files, usage_bluechi },
{ "list-units", 0, 1, OPT_FILTER, method_list_units, usage_bluechi },
{ "start", 2, 2, OPT_NONE, method_start, usage_bluechi },
{ "stop", 2, 2, OPT_NONE, method_stop, usage_bluechi },
{ "freeze", 2, 2, OPT_NONE, method_freeze, usage_bluechi },
{ "thaw", 2, 2, OPT_NONE, method_thaw, usage_bluechi },
{ "restart", 2, 2, OPT_NONE, method_restart, usage_bluechi },
{ "reload", 2, 2, OPT_NONE, method_reload, usage_bluechi },
{ "monitor", 0, 2, OPT_NONE, method_monitor, usage_bluechi },
{ "metrics", 1, 1, OPT_NONE, method_metrics, usage_bluechi },
{ "enable", 2, ARG_ANY, OPT_FORCE | OPT_RUNTIME | OPT_NO_RELOAD, method_enable, usage_bluechi },
{ "disable", 2, ARG_ANY, OPT_NO_RELOAD, method_disable, usage_bluechi },
{ "daemon-reload", 1, 1, OPT_NONE, method_daemon_reload, usage_bluechi },
{ "status", 0, ARG_ANY, OPT_WATCH, method_status, usage_bluechi },
{ "set-loglevel", 1, 2, OPT_NONE, method_set_loglevel, usage_bluechi },
{ "version", 0, 0, OPT_NONE, method_version, usage_bluechi },
{ NULL, 0, 0, 0, NULL, NULL }
{ "help", 0, 0, OPT_NONE, method_help, usage_bluechi },
{ "list-unit-files", 0, 1, OPT_FILTER, method_list_unit_files, usage_bluechi },
{ "list-units", 0, 1, OPT_FILTER, method_list_units, usage_bluechi },
{ "start", 2, 2, OPT_NONE, method_start, usage_bluechi },
{ "stop", 2, 2, OPT_NONE, method_stop, usage_bluechi },
{ "freeze", 2, 2, OPT_NONE, method_freeze, usage_bluechi },
{ "thaw", 2, 2, OPT_NONE, method_thaw, usage_bluechi },
{ "restart", 2, 2, OPT_NONE, method_restart, usage_bluechi },
{ "reload", 2, 2, OPT_NONE, method_reload, usage_bluechi },
{ "reset-failed", 1, 1, OPT_NONE, method_reset_failed, usage_bluechi },
{ "reset-failed-unit", 2, 2, OPT_NONE, method_reset_failed_unit, usage_bluechi },
{ "monitor", 0, 2, OPT_NONE, method_monitor, usage_bluechi },
{ "metrics", 1, 1, OPT_NONE, method_metrics, usage_bluechi },
{ "enable", 2, ARG_ANY, OPT_FORCE | OPT_RUNTIME | OPT_NO_RELOAD, method_enable, usage_bluechi },
{ "disable", 2, ARG_ANY, OPT_NO_RELOAD, method_disable, usage_bluechi },
{ "daemon-reload", 1, 1, OPT_NONE, method_daemon_reload, usage_bluechi },
{ "status", 0, ARG_ANY, OPT_WATCH, method_status, usage_bluechi },
{ "set-loglevel", 1, 2, OPT_NONE, method_set_loglevel, usage_bluechi },
{ "version", 0, 0, OPT_NONE, method_version, usage_bluechi },
{ NULL, 0, 0, 0, NULL, NULL }
};

const OptionType option_types[] = {
Expand Down
4 changes: 4 additions & 0 deletions src/client/method-help.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ void usage_bluechi() {
printf(" usage: reload nodename unitname\n");
printf(" - restart: restarts a specific systemd service (or timer, or slice) on a specific node\n");
printf(" usage: restart nodename unitname\n");
printf(" - reset-failed: reset all failed units on a node\n");
printf(" usage: reset-failed nodename\n");
printf(" - reset-failed-unit: reset a failed unit on a node\n");
printf(" usage: reset-failed nodename\n");
printf(" - enable: enables the specified systemd files on a specific node\n");
printf(" usage: enable nodename unitfilename...\n");
printf(" - disable: disables the specified systemd files on a specific node\n");
Expand Down
63 changes: 63 additions & 0 deletions src/client/method-unit-actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,61 @@ static int method_daemon_reload_on(Client *client, char *node_name) {
return 0;
}

static int method_reset_failed_on(Client *client, char *node_name) {
int r = 0;
_cleanup_sd_bus_error_ sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_sd_bus_message_ sd_bus_message *result = NULL;

r = assemble_object_path_string(NODE_OBJECT_PATH_PREFIX, node_name, &client->object_path);
if (r < 0) {
return r;
}

r = sd_bus_call_method(
client->api_bus,
BC_INTERFACE_BASE_NAME,
client->object_path,
NODE_INTERFACE,
"ResetFailed",
&error,
&result,
"");
if (r < 0) {
fprintf(stderr, "Failed to issue method call: %s\n", error.message);
return r;
}

return 0;
}

static int method_reset_failed_unit_on(Client *client, char *node_name, char *unit) {
int r = 0;
_cleanup_sd_bus_error_ sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_sd_bus_message_ sd_bus_message *result = NULL;

r = assemble_object_path_string(NODE_OBJECT_PATH_PREFIX, node_name, &client->object_path);
if (r < 0) {
return r;
}

r = sd_bus_call_method(
client->api_bus,
BC_INTERFACE_BASE_NAME,
client->object_path,
NODE_INTERFACE,
"ResetFailedUnit",
&error,
&result,
"s",
unit);
if (r < 0) {
fprintf(stderr, "Failed to issue method call: %s\n", error.message);
return r;
}

return 0;
}

static int method_freeze_unit_on(Client *client, char *node_name, char *unit) {
int r = 0;
_cleanup_sd_bus_error_ sd_bus_error error = SD_BUS_ERROR_NULL;
Expand Down Expand Up @@ -320,6 +375,14 @@ int method_restart(Command *command, void *userdata) {
return method_lifecycle_action_on(userdata, command->opargv[0], command->opargv[1], "RestartUnit");
}

int method_reset_failed(Command *command, void *userdata) {
return method_reset_failed_on(userdata, command->opargv[0]);
}

int method_reset_failed_unit(Command *command, void *userdata) {
return method_reset_failed_unit_on(userdata, command->opargv[0], command->opargv[1]);
}

int method_reload(Command *command, void *userdata) {
return method_lifecycle_action_on(userdata, command->opargv[0], command->opargv[1], "ReloadUnit");
}
Expand Down
2 changes: 2 additions & 0 deletions src/client/method-unit-actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
int method_start(Command *command, void *userdata);
int method_stop(Command *command, void *userdata);
int method_restart(Command *command, void *userdata);
int method_reset_failed(Command *command, void *userdata);
int method_reset_failed_unit(Command *command, void *userdata);
int method_reload(Command *command, void *userdata);
int method_freeze(Command *command, void *userdata);
int method_thaw(Command *command, void *userdata);
Expand Down
2 changes: 2 additions & 0 deletions src/controller/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ static const sd_bus_vtable node_vtable[] = {
SD_BUS_METHOD("ThawUnit", "s", "", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("RestartUnit", "ss", "o", node_method_restart_unit, 0),
SD_BUS_METHOD("ReloadUnit", "ss", "o", node_method_reload_unit, 0),
SD_BUS_METHOD("ResetFailed", "", "", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("ResetFailedUnit", "s", "", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("GetUnitProperties", "ss", "a{sv}", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("GetUnitProperty", "sss", "v", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("SetUnitProperties", "sba(sv)", "", node_method_set_unit_properties, 0),
Expand Down
27 changes: 27 additions & 0 deletions tests/bluechi_test/bluechictl.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,33 @@ def reload_unit(
expected_result,
)

def reset_failed(
self,
node_name: str,
check_result: bool = True,
expected_result: int = 0,
) -> Tuple[Optional[int], Union[Iterator[bytes], Any, Tuple[bytes, bytes]]]:
return self._run(
f"ResetFailed on node {node_name}",
f"reset-failed {node_name}",
check_result,
expected_result,
)

def reset_failed_unit(
self,
node_name: str,
unit: str,
check_result: bool = True,
expected_result: int = 0,
) -> Tuple[Optional[int], Union[Iterator[bytes], Any, Tuple[bytes, bytes]]]:
return self._run(
f"ResetFailedUnit on node {node_name} on unit {unit}",
f"reset-failed-unit {node_name} {unit}",
check_result,
expected_result,
)

def stop_unit(
self,
node_name: str,
Expand Down
3 changes: 3 additions & 0 deletions tests/tests/tier0/bluechi-reset-failed-unit/main.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
summary: Test bluechictl daemon-reload. This test will send an unrunble test will
fail at start but then will be change and update with daemon-reload
id: 6c67e96d-db4b-4a54-980c-287e33450abf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#
# Copyright Contributors to the Eclipse BlueChi project
#
# SPDX-License-Identifier: LGPL-2.1-or-later

import logging
from typing import Dict

from bluechi_test.config import BluechiAgentConfig, BluechiControllerConfig
from bluechi_test.machine import BluechiAgentMachine, BluechiControllerMachine
from bluechi_test.service import Option, Section, SimpleRemainingService
from bluechi_test.test import BluechiTest

LOGGER = logging.getLogger(__name__)
NODE_FOO = "node-foo"


def exec(ctrl: BluechiControllerMachine, nodes: Dict[str, BluechiAgentMachine]):

node_foo = nodes[NODE_FOO]

# Create a service which won't start due to nonexistent executable
failed_service_1 = SimpleRemainingService(name="simple1.service")
failed_service_1.set_option(Section.Service, Option.ExecStart, "/s")
failed_service_2 = SimpleRemainingService(name="simple2.service")
failed_service_2.set_option(Section.Service, Option.ExecStart, "/s")
failed_service_3 = SimpleRemainingService(name="simple3.service")
failed_service_3.set_option(Section.Service, Option.ExecStart, "/bin/sleep 60")

node_foo.install_systemd_service(failed_service_1)
node_foo.install_systemd_service(failed_service_2)
node_foo.install_systemd_service(failed_service_3)

ctrl.bluechictl.start_unit(NODE_FOO, failed_service_1.name)
ctrl.bluechictl.start_unit(NODE_FOO, failed_service_2.name)
ctrl.bluechictl.start_unit(NODE_FOO, failed_service_3.name)

assert node_foo.wait_for_unit_state_to_be(failed_service_1.name, "failed")
assert node_foo.wait_for_unit_state_to_be(failed_service_2.name, "failed")
assert node_foo.wait_for_unit_state_to_be(failed_service_3.name, "active")

ctrl.bluechictl.reset_failed_unit(NODE_FOO, failed_service_1.name)

assert node_foo.wait_for_unit_state_to_be(failed_service_1.name, "inactive")
assert node_foo.wait_for_unit_state_to_be(failed_service_2.name, "failed")
assert node_foo.wait_for_unit_state_to_be(failed_service_3.name, "active")


def test_bluechi_reset_failed(
bluechi_test: BluechiTest,
bluechi_node_default_config: BluechiAgentConfig,
bluechi_ctrl_default_config: BluechiControllerConfig,
):
node_foo_cfg = bluechi_node_default_config.deep_copy()
node_foo_cfg.node_name = NODE_FOO

bluechi_test.add_bluechi_agent_config(node_foo_cfg)

bluechi_ctrl_default_config.allowed_node_names = [NODE_FOO]
bluechi_test.set_bluechi_controller_config(bluechi_ctrl_default_config)

bluechi_test.run(exec)
3 changes: 3 additions & 0 deletions tests/tests/tier0/bluechi-reset-failed/main.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
summary: Test bluechictl daemon-reload. This test will send an unrunble test will
fail at start but then will be change and update with daemon-reload
id: fc5db6a7-1367-450b-b674-35b9541d4f3e
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#
# Copyright Contributors to the Eclipse BlueChi project
#
# SPDX-License-Identifier: LGPL-2.1-or-later

import logging
from typing import Dict

from bluechi_test.config import BluechiAgentConfig, BluechiControllerConfig
from bluechi_test.machine import BluechiAgentMachine, BluechiControllerMachine
from bluechi_test.service import Option, Section, SimpleRemainingService
from bluechi_test.test import BluechiTest

LOGGER = logging.getLogger(__name__)
NODE_FOO = "node-foo"


def exec(ctrl: BluechiControllerMachine, nodes: Dict[str, BluechiAgentMachine]):

node_foo = nodes[NODE_FOO]

# Create a service which won't start due to nonexistent executable
failed_service_1 = SimpleRemainingService(name="simple1.service")
failed_service_1.set_option(Section.Service, Option.ExecStart, "/s")
failed_service_2 = SimpleRemainingService(name="simple2.service")
failed_service_2.set_option(Section.Service, Option.ExecStart, "/s")
failed_service_3 = SimpleRemainingService(name="simple3.service")
failed_service_3.set_option(Section.Service, Option.ExecStart, "/bin/sleep 60")

node_foo.install_systemd_service(failed_service_1)
node_foo.install_systemd_service(failed_service_2)
node_foo.install_systemd_service(failed_service_3)

ctrl.bluechictl.start_unit(NODE_FOO, failed_service_1.name)
ctrl.bluechictl.start_unit(NODE_FOO, failed_service_2.name)
ctrl.bluechictl.start_unit(NODE_FOO, failed_service_3.name)

assert node_foo.wait_for_unit_state_to_be(failed_service_1.name, "failed")
assert node_foo.wait_for_unit_state_to_be(failed_service_2.name, "failed")
assert node_foo.wait_for_unit_state_to_be(failed_service_3.name, "active")

ctrl.bluechictl.reset_failed(NODE_FOO)

assert node_foo.wait_for_unit_state_to_be(failed_service_1.name, "inactive")
assert node_foo.wait_for_unit_state_to_be(failed_service_2.name, "inactive")
assert node_foo.wait_for_unit_state_to_be(failed_service_3.name, "active")


def test_bluechi_reset_failed(
bluechi_test: BluechiTest,
bluechi_node_default_config: BluechiAgentConfig,
bluechi_ctrl_default_config: BluechiControllerConfig,
):
node_foo_cfg = bluechi_node_default_config.deep_copy()
node_foo_cfg.node_name = NODE_FOO

bluechi_test.add_bluechi_agent_config(node_foo_cfg)

bluechi_ctrl_default_config.allowed_node_names = [NODE_FOO]
bluechi_test.set_bluechi_controller_config(bluechi_ctrl_default_config)

bluechi_test.run(exec)

0 comments on commit 9f7478d

Please sign in to comment.