From 2b86418ba49a56f5063337e1e6cde2ee54abf73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Tue, 22 Aug 2023 09:55:03 +0200 Subject: [PATCH 1/9] manager: add option to list PIDs This commit is the groundwork for adding support for debugging via GDB (or another compatible debugger). This way, we can retrieve the subprocesses' PIDs and pass those to the debugger. --- python/knot_resolver/client/commands/pids.py | 52 +++++++++++++++++++ python/knot_resolver/controller/interface.py | 5 ++ .../controller/supervisord/__init__.py | 8 +++ python/knot_resolver/manager/manager.py | 17 ++++-- python/knot_resolver/manager/server.py | 42 ++++++++++++--- 5 files changed, 113 insertions(+), 11 deletions(-) create mode 100644 python/knot_resolver/client/commands/pids.py diff --git a/python/knot_resolver/client/commands/pids.py b/python/knot_resolver/client/commands/pids.py new file mode 100644 index 000000000..c14c734e8 --- /dev/null +++ b/python/knot_resolver/client/commands/pids.py @@ -0,0 +1,52 @@ +import argparse +import json +import sys +from typing import Iterable, List, Optional, Tuple, Type + +from knot_resolver.client.command import Command, CommandArgs, CompWords, register_command +from knot_resolver.utils.requests import request + +PIDS_TYPE = Iterable + + +@register_command +class PidsCommand(Command): + def __init__(self, namespace: argparse.Namespace) -> None: + self.proc_type: Optional[str] = namespace.proc_type + + super().__init__(namespace) + + @staticmethod + def register_args_subparser( + subparser: "argparse._SubParsersAction[argparse.ArgumentParser]", + ) -> Tuple[argparse.ArgumentParser, "Type[Command]"]: + pids = subparser.add_parser("pids", help="list the PIDs of kresd manager subprocesses") + pids.add_argument( + "proc_type", + help="Optional, the type of process to query. May be 'kresd', 'gc', or 'all' (default).", + nargs="?", + default="all", + ) + return pids, PidsCommand + + @staticmethod + def completion(args: List[str], parser: argparse.ArgumentParser) -> CompWords: + return {} + + def run(self, args: CommandArgs) -> None: + response = request(args.socket, "GET", f"pids/{self.proc_type}") + + if response.status == 200: + pids = json.loads(response.body) + if isinstance(pids, PIDS_TYPE): + for pid in pids: + print(pid) + else: + print( + f"Unexpected response type '{type(pids).__name__}' from manager. Expected '{PIDS_TYPE.__name__}'", + file=sys.stderr, + ) + sys.exit(1) + else: + print(response, file=sys.stderr) + sys.exit(1) diff --git a/python/knot_resolver/controller/interface.py b/python/knot_resolver/controller/interface.py index 43c24257e..49808d01a 100644 --- a/python/knot_resolver/controller/interface.py +++ b/python/knot_resolver/controller/interface.py @@ -109,6 +109,7 @@ def __init__(self, config: KresConfig, kresid: KresID) -> None: self._id = kresid self._config = config self._registered_worker: bool = False + self._pid: Optional[int] = None self._config_file: Optional[Path] = None if self.type is SubprocessType.KRESD: @@ -189,6 +190,10 @@ async def _stop(self) -> None: async def _restart(self) -> None: pass + @abstractmethod + async def get_pid(self) -> int: + pass + @abstractmethod def status(self) -> SubprocessStatus: pass diff --git a/python/knot_resolver/controller/supervisord/__init__.py b/python/knot_resolver/controller/supervisord/__init__.py index 347ac1e71..ddb9b29b1 100644 --- a/python/knot_resolver/controller/supervisord/__init__.py +++ b/python/knot_resolver/controller/supervisord/__init__.py @@ -223,6 +223,14 @@ def _restart(self) -> None: fast = _create_fast_proxy(self._config) fast.startProcess(self.name) + @async_in_a_thread + def get_pid(self) -> int: + if self._pid is None: + supervisord = _create_supervisord_proxy(self._config) + info = supervisord.getProcessInfo(self.name) + self._pid = info["pid"] + return self._pid + def get_used_config(self) -> KresConfig: return self._config diff --git a/python/knot_resolver/manager/manager.py b/python/knot_resolver/manager/manager.py index f9c687087..aef7ae855 100644 --- a/python/knot_resolver/manager/manager.py +++ b/python/knot_resolver/manager/manager.py @@ -63,7 +63,7 @@ class KresManager: # pylint: disable=too-many-instance-attributes Instantiate with `KresManager.create()`, not with the usual constructor! """ - def __init__(self, shutdown_trigger: Callable[[int], None], _i_know_what_i_am_doing: bool = False): + def __init__(self, _i_know_what_i_am_doing: bool = False): if not _i_know_what_i_am_doing: logger.error( "Trying to create an instance of KresManager using normal constructor. Please use " @@ -80,19 +80,18 @@ def __init__(self, shutdown_trigger: Callable[[int], None], _i_know_what_i_am_do self._watchdog_task: Optional["asyncio.Task[None]"] = None self._fix_counter: _FixCounter = _FixCounter() self._config_store: ConfigStore - self._shutdown_trigger: Callable[[int], None] = shutdown_trigger + self._shutdown_triggers: List[Callable[[int], None]] = [] @staticmethod async def create( subprocess_controller: SubprocessController, config_store: ConfigStore, - shutdown_trigger: Callable[[int], None], ) -> "KresManager": """ Creates new instance of KresManager. """ - inst = KresManager(shutdown_trigger, _i_know_what_i_am_doing=True) + inst = KresManager(_i_know_what_i_am_doing=True) await inst._async_init(subprocess_controller, config_store) # noqa: SLF001 return inst @@ -211,6 +210,9 @@ async def _stop_gc(self) -> None: await self._gc.stop() self._gc = None + def add_shutdown_trigger(self, trigger: Callable[[int], None]) -> None: + self._shutdown_triggers.append(trigger) + async def validate_config(self, _old: KresConfig, new: KresConfig) -> Result[NoneType, str]: async with self._manager_lock: if _old.rate_limiting != new.rate_limiting: @@ -233,6 +235,10 @@ async def validate_config(self, _old: KresConfig, new: KresConfig) -> Result[Non logger.debug("Canary process test passed.") return Result.ok(None) + async def get_pids(self, proc_type: Optional[SubprocessType]) -> List[int]: + processes = await self._controller.get_all_running_instances() + return [await pr.get_pid() for pr in processes if proc_type is None or pr.type == proc_type] + async def _reload_system_state(self) -> None: async with self._manager_lock: self._workers = [] @@ -338,7 +344,8 @@ async def forced_shutdown(self) -> None: logger.warning("Collecting all remaining workers...") await self._reload_system_state() logger.warning("Terminating...") - self._shutdown_trigger(1) + for trigger in self._shutdown_triggers: + trigger(1) async def _instability_handler(self) -> None: if self._fix_counter.is_too_high(): diff --git a/python/knot_resolver/manager/server.py b/python/knot_resolver/manager/server.py index b09ff7b99..5e0e484ed 100644 --- a/python/knot_resolver/manager/server.py +++ b/python/knot_resolver/manager/server.py @@ -21,6 +21,7 @@ from knot_resolver.constants import CONFIG_FILE, USER from knot_resolver.controller import get_best_controller_implementation from knot_resolver.controller.exceptions import SubprocessControllerExecError +from knot_resolver.controller.interface import SubprocessType from knot_resolver.controller.registered_workers import command_single_registered_worker from knot_resolver.datamodel import kres_config_json_schema from knot_resolver.datamodel.cache_schema import CacheClearRPCSchema @@ -87,7 +88,7 @@ class Server: # This is top-level class containing pretty much everything. Instead of global # variables, we use instance attributes. That's why there are so many and it's # ok. - def __init__(self, store: ConfigStore, config_path: Optional[Path]): + def __init__(self, store: ConfigStore, config_path: Optional[Path], manager: KresManager): # config store & server dynamic reconfiguration self.config_store = store @@ -100,6 +101,7 @@ def __init__(self, store: ConfigStore, config_path: Optional[Path]): self._config_path: Optional[Path] = config_path self._exit_code: int = 0 self._shutdown_event = asyncio.Event() + self._manager = manager async def _reconfigure(self, config: KresConfig) -> None: await self._reconfigure_listen_address(config) @@ -323,6 +325,30 @@ async def _handler_reload(self, _request: web.Request) -> web.Response: await self._reload_config() return web.Response(text="Reloading...") + async def _handler_pids(self, request: web.Request) -> web.Response: + """ + Route handler for listing PIDs of subprocesses + """ + + proc_type: Optional[SubprocessType] = None + + if "path" in request.match_info and len(request.match_info["path"]) > 0: + ptstr = request.match_info["path"] + if ptstr == "/kresd": + proc_type = SubprocessType.KRESD + elif ptstr == "/gc": + proc_type = SubprocessType.GC + elif ptstr == "/all": + proc_type = None + else: + return web.Response(text=f"Invalid process type '{ptstr}'", status=400) + + return web.json_response( + await self._manager.get_pids(proc_type), + headers={"Access-Control-Allow-Origin": "*"}, + dumps=partial(json.dumps, indent=4), + ) + def _setup_routes(self) -> None: self.app.add_routes( [ @@ -339,6 +365,7 @@ def _setup_routes(self) -> None: web.get("/metrics/json", self._handler_metrics_json), web.get("/metrics/prometheus", self._handler_metrics_prometheus), web.post("/cache/clear", self._handler_cache_clear), + web.get("/pids{path:.*}", self._handler_pids), ] ) @@ -410,7 +437,7 @@ async def _init_config_store(config: Dict[str, Any]) -> ConfigStore: return ConfigStore(config_validated) -async def _init_manager(config_store: ConfigStore, server: Server) -> KresManager: +async def _init_manager(config_store: ConfigStore) -> KresManager: """ Called asynchronously when the application initializes. """ @@ -420,7 +447,7 @@ async def _init_manager(config_store: ConfigStore, server: Server) -> KresManage # Create KresManager. This will perform autodetection of available service managers and # select the most appropriate to use (or use the one configured directly) - manager = await KresManager.create(controller, config_store, server.trigger_shutdown) + manager = await KresManager.create(controller, config_store) logger.info("Initial configuration applied. Process manager initialized...") return manager @@ -559,11 +586,14 @@ async def start_server(config: Path = CONFIG_FILE) -> int: # noqa: PLR0915 await files.init_files_watchdog(config_store) + # After we have loaded the configuration, we can start worrying about subprocess management. + manager = await _init_manager(config_store) + # prepare instance of the server (no side effects) - server = Server(config_store, config) + server = Server(config_store, config, manager) - # After we have loaded the configuration, we can start worring about subprocess management. - manager = await _init_manager(config_store, server) + # add Server's shutdown trigger to the manager + manager.add_shutdown_trigger(server.trigger_shutdown) except SubprocessControllerExecError as e: # if we caught this exception, some component wants to perform a reexec during startup. Most likely, it would From 4545e040bf81be5837774f0462ed741c89fae024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Thu, 24 Aug 2023 08:05:21 +0200 Subject: [PATCH 2/9] kresctl: add command to run a debugger on subprocesses --- poe | 2 +- python/knot_resolver/client/commands/debug.py | 96 +++++++++++++++++++ python/knot_resolver/client/main.py | 17 +++- 3 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 python/knot_resolver/client/commands/debug.py diff --git a/poe b/poe index d1f58894c..815428a37 100755 --- a/poe +++ b/poe @@ -1,4 +1,4 @@ #!/bin/sh script_dir="$(dirname "$(readlink -f "$0")")" -exec poetry --directory "$script_dir" run poe --root "$script_dir" "$@" +exec poetry --directory "$script_dir" run -- poe --root "$script_dir" "$@" diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py new file mode 100644 index 000000000..0d1129277 --- /dev/null +++ b/python/knot_resolver/client/commands/debug.py @@ -0,0 +1,96 @@ +import argparse +import json +import os +import sys +from typing import List, Optional, Tuple, Type + +from knot_resolver.client.command import Command, CommandArgs, CompWords, register_command +from knot_resolver.utils import which +from knot_resolver.utils.requests import request + +PIDS_TYPE = List + + +@register_command +class DebugCommand(Command): + def __init__(self, namespace: argparse.Namespace) -> None: + self.proc_type: Optional[str] = namespace.proc_type + self.sudo: bool = namespace.sudo + self.gdb: str = namespace.gdb + self.gdb_args: List[str] = namespace.extra + super().__init__(namespace) + + @staticmethod + def register_args_subparser( + subparser: "argparse._SubParsersAction[argparse.ArgumentParser]", + ) -> Tuple[argparse.ArgumentParser, "Type[Command]"]: + debug = subparser.add_parser( + "debug", + help="Run GDB on the manager's subprocesses - runs with sudo by default to avoid ptrace_scope issues", + ) + debug.add_argument( + "proc_type", + help="Optional, the type of process to debug. May be 'kresd' (default), 'gc', or 'all'.", + type=str, + nargs="?", + default="kresd", + ) + debug.add_argument( + "--no-sudo", + dest="sudo", + help="Do not run GDB with sudo (may not work if your ptrace_scope is 1 or higher)", + action="store_false", + ) + debug.add_argument( + "--gdb", + help="GDB command (may be a command on PATH, or an absolute path)", + type=str, + default="gdb", + ) + return debug, DebugCommand + + @staticmethod + def completion(args: List[str], parser: argparse.ArgumentParser) -> CompWords: + return {} + + def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 + gdb_cmd = str(which.which(self.gdb)) + sudo_cmd = str(which.which("sudo")) + + response = request(args.socket, "GET", f"pids/{self.proc_type}") + if response.status != 200: + print(response, file=sys.stderr) + sys.exit(1) + + pids = json.loads(response.body) + if not isinstance(pids, PIDS_TYPE): + print( + f"Unexpected response type '{type(pids).__name__}' from manager. Expected '{PIDS_TYPE.__name__}'", + file=sys.stderr, + ) + sys.exit(1) + if len(pids) == 0: + print( + f"There are no processes of type '{self.proc_type}' available to debug", + file=sys.stderr, + ) + + exec_args = [] + + if self.sudo: + exec_args.extend([sudo_cmd, "--"]) + + # attach to PIDs + exec_args.extend([gdb_cmd, "--pid", str(pids[0])]) + inferior = 2 + for pid in pids[1:]: + exec_args.extend(["-init-eval-command", "add-inferior"]) + exec_args.extend(["-init-eval-command", f"inferior {inferior}"]) + exec_args.extend(["-init-eval-command", f"attach {pid}"]) + inferior += 1 + + exec_args.extend(["-init-eval-command", "inferior 1"]) + exec_args.extend(self.gdb_args) + + print(f"exec_args = {exec_args}") + os.execl(*exec_args) diff --git a/python/knot_resolver/client/main.py b/python/knot_resolver/client/main.py index 75cd6a77f..461b7fc42 100644 --- a/python/knot_resolver/client/main.py +++ b/python/knot_resolver/client/main.py @@ -1,6 +1,7 @@ import argparse import importlib import os +import sys from knot_resolver.constants import VERSION @@ -68,7 +69,21 @@ def main() -> None: parser = create_main_argument_parser() install_commands_parsers(parser) - namespace = parser.parse_args() + # TODO: This is broken with unpatched versions of poethepoet, because they drop the `--` pseudo-argument. + # Patch submitted at . + try: + pa_index = sys.argv.index("--", 1) + argv_to_parse = sys.argv[1:pa_index] + argv_extra = sys.argv[(pa_index + 1) :] + except ValueError: + argv_to_parse = sys.argv[1:] + argv_extra = [] + + namespace = parser.parse_args(argv_to_parse) + if hasattr(namespace, "extra"): + raise TypeError("'extra' is already an attribute - this is disallowed for commands") + namespace.extra = argv_extra + client = KresClient(namespace, parser) client.execute() From 1d772a780a61cc3ecde5d2e0d165f74e6cae2d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Mon, 18 Sep 2023 15:53:40 +0200 Subject: [PATCH 3/9] manager: add more verbose PIDs listing --- python/knot_resolver/client/commands/debug.py | 18 ++++++------- python/knot_resolver/client/commands/pids.py | 27 ++++++++++++++----- python/knot_resolver/manager/manager.py | 11 ++++++-- python/knot_resolver/manager/server.py | 6 ++--- 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py index 0d1129277..44ab33ebd 100644 --- a/python/knot_resolver/client/commands/debug.py +++ b/python/knot_resolver/client/commands/debug.py @@ -8,7 +8,7 @@ from knot_resolver.utils import which from knot_resolver.utils.requests import request -PIDS_TYPE = List +PROCS_TYPE = List @register_command @@ -57,19 +57,19 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 gdb_cmd = str(which.which(self.gdb)) sudo_cmd = str(which.which("sudo")) - response = request(args.socket, "GET", f"pids/{self.proc_type}") + response = request(args.socket, "GET", f"processes/{self.proc_type}") if response.status != 200: print(response, file=sys.stderr) sys.exit(1) - pids = json.loads(response.body) - if not isinstance(pids, PIDS_TYPE): + procs = json.loads(response.body) + if not isinstance(procs, PROCS_TYPE): print( - f"Unexpected response type '{type(pids).__name__}' from manager. Expected '{PIDS_TYPE.__name__}'", + f"Unexpected response type '{type(procs).__name__}' from manager. Expected '{PROCS_TYPE.__name__}'", file=sys.stderr, ) sys.exit(1) - if len(pids) == 0: + if len(procs) == 0: print( f"There are no processes of type '{self.proc_type}' available to debug", file=sys.stderr, @@ -81,12 +81,12 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 exec_args.extend([sudo_cmd, "--"]) # attach to PIDs - exec_args.extend([gdb_cmd, "--pid", str(pids[0])]) + exec_args.extend([gdb_cmd, "--pid", str(procs[0]["pid"])]) inferior = 2 - for pid in pids[1:]: + for proc in procs[1:]: exec_args.extend(["-init-eval-command", "add-inferior"]) exec_args.extend(["-init-eval-command", f"inferior {inferior}"]) - exec_args.extend(["-init-eval-command", f"attach {pid}"]) + exec_args.extend(["-init-eval-command", f'attach {proc["pid"]}']) inferior += 1 exec_args.extend(["-init-eval-command", "inferior 1"]) diff --git a/python/knot_resolver/client/commands/pids.py b/python/knot_resolver/client/commands/pids.py index c14c734e8..2a7a5e85f 100644 --- a/python/knot_resolver/client/commands/pids.py +++ b/python/knot_resolver/client/commands/pids.py @@ -6,13 +6,14 @@ from knot_resolver.client.command import Command, CommandArgs, CompWords, register_command from knot_resolver.utils.requests import request -PIDS_TYPE = Iterable +PROCESSES_TYPE = Iterable @register_command class PidsCommand(Command): def __init__(self, namespace: argparse.Namespace) -> None: self.proc_type: Optional[str] = namespace.proc_type + self.verbose: int = namespace.verbose super().__init__(namespace) @@ -27,6 +28,13 @@ def register_args_subparser( nargs="?", default="all", ) + pids.add_argument( + "-v", + "--verbose", + help="Optional, makes the output more verbose, in a machine-readable format.", + action="count", + default=0, + ) return pids, PidsCommand @staticmethod @@ -34,16 +42,21 @@ def completion(args: List[str], parser: argparse.ArgumentParser) -> CompWords: return {} def run(self, args: CommandArgs) -> None: - response = request(args.socket, "GET", f"pids/{self.proc_type}") + response = request(args.socket, "GET", f"processes/{self.proc_type}") if response.status == 200: - pids = json.loads(response.body) - if isinstance(pids, PIDS_TYPE): - for pid in pids: - print(pid) + processes = json.loads(response.body) + if isinstance(processes, PROCESSES_TYPE): + if self.verbose < 1: + for p in processes: + print(p["pid"]) + else: + for p in processes: + print(p) + else: print( - f"Unexpected response type '{type(pids).__name__}' from manager. Expected '{PIDS_TYPE.__name__}'", + f"Unexpected response type '{type(processes).__name__}' from manager. Expected '{PROCESSES_TYPE.__name__}'", file=sys.stderr, ) sys.exit(1) diff --git a/python/knot_resolver/manager/manager.py b/python/knot_resolver/manager/manager.py index aef7ae855..a26bb8061 100644 --- a/python/knot_resolver/manager/manager.py +++ b/python/knot_resolver/manager/manager.py @@ -55,6 +55,13 @@ async def _deny_max_worker_changes(config_old: KresConfig, config_new: KresConfi return Result.ok(None) +async def _subprocess_desc(subprocess: Subprocess) -> object: + return { + "type": subprocess.type.name, + "pid": await subprocess.get_pid(), + } + + class KresManager: # pylint: disable=too-many-instance-attributes """ Core of the whole operation. Orchestrates individual instances under some @@ -235,9 +242,9 @@ async def validate_config(self, _old: KresConfig, new: KresConfig) -> Result[Non logger.debug("Canary process test passed.") return Result.ok(None) - async def get_pids(self, proc_type: Optional[SubprocessType]) -> List[int]: + async def get_processes(self, proc_type: Optional[SubprocessType]) -> List[object]: processes = await self._controller.get_all_running_instances() - return [await pr.get_pid() for pr in processes if proc_type is None or pr.type == proc_type] + return [await _subprocess_desc(pr) for pr in processes if proc_type is None or pr.type == proc_type] async def _reload_system_state(self) -> None: async with self._manager_lock: diff --git a/python/knot_resolver/manager/server.py b/python/knot_resolver/manager/server.py index 5e0e484ed..06fab0cfc 100644 --- a/python/knot_resolver/manager/server.py +++ b/python/knot_resolver/manager/server.py @@ -325,7 +325,7 @@ async def _handler_reload(self, _request: web.Request) -> web.Response: await self._reload_config() return web.Response(text="Reloading...") - async def _handler_pids(self, request: web.Request) -> web.Response: + async def _handler_processes(self, request: web.Request) -> web.Response: """ Route handler for listing PIDs of subprocesses """ @@ -344,7 +344,7 @@ async def _handler_pids(self, request: web.Request) -> web.Response: return web.Response(text=f"Invalid process type '{ptstr}'", status=400) return web.json_response( - await self._manager.get_pids(proc_type), + await self._manager.get_processes(proc_type), headers={"Access-Control-Allow-Origin": "*"}, dumps=partial(json.dumps, indent=4), ) @@ -365,7 +365,7 @@ def _setup_routes(self) -> None: web.get("/metrics/json", self._handler_metrics_json), web.get("/metrics/prometheus", self._handler_metrics_prometheus), web.post("/cache/clear", self._handler_cache_clear), - web.get("/pids{path:.*}", self._handler_pids), + web.get("/processes{path:.*}", self._handler_processes), ] ) From 055d53dbcbff81c86751da5952046211e2d51174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Fri, 22 Sep 2023 10:07:29 +0200 Subject: [PATCH 4/9] manager, kresctl: print status in pids --- python/knot_resolver/manager/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/knot_resolver/manager/manager.py b/python/knot_resolver/manager/manager.py index a26bb8061..952c8b7d2 100644 --- a/python/knot_resolver/manager/manager.py +++ b/python/knot_resolver/manager/manager.py @@ -59,6 +59,7 @@ async def _subprocess_desc(subprocess: Subprocess) -> object: return { "type": subprocess.type.name, "pid": await subprocess.get_pid(), + "status": subprocess.status().name, } From d0c52a1c31a2b2bcef3cf90a171912942dce9461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Wed, 4 Oct 2023 15:01:07 +0200 Subject: [PATCH 5/9] kresctl: debug command help message --- python/knot_resolver/client/commands/debug.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py index 44ab33ebd..421e0edb7 100644 --- a/python/knot_resolver/client/commands/debug.py +++ b/python/knot_resolver/client/commands/debug.py @@ -77,11 +77,15 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 exec_args = [] + # Put `sudo --` at the beginning of the command. if self.sudo: exec_args.extend([sudo_cmd, "--"]) - # attach to PIDs - exec_args.extend([gdb_cmd, "--pid", str(procs[0]["pid"])]) + # Attach GDB to processes - the first process gets passed as a regular `--pid` argument to GDB, the + # rest are attached using the `add-inferior` and `attach` GDB commands. This way, we are now debugging + # multiple processes. + exec_args.extend([gdb_cmd]) + exec_args.extend(["-init-eval-command", f'attach {procs[0]["pid"]}']) inferior = 2 for proc in procs[1:]: exec_args.extend(["-init-eval-command", "add-inferior"]) @@ -89,7 +93,18 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 exec_args.extend(["-init-eval-command", f'attach {proc["pid"]}']) inferior += 1 - exec_args.extend(["-init-eval-command", "inferior 1"]) + num_inferiors = inferior - 1 + if num_inferiors > 1: + # Now we switch back to the first process and add additional provided GDB arguments. + exec_args.extend(["-init-eval-command", "inferior 1"]) + exec_args.extend( + [ + "-init-eval-command", + "echo \\n\\nYou are now debugging multiple Knot Resolver processes. To switch between " + "them, use the 'inferior ' command, where is an integer from 1 to " + f"{num_inferiors}.\\n\\n", + ] + ) exec_args.extend(self.gdb_args) print(f"exec_args = {exec_args}") From e35e96b6384a4351c71759c25c3aedea53207618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Thu, 5 Oct 2023 15:49:00 +0200 Subject: [PATCH 6/9] kresctl debug: allow paths for '--gdb' and add existence checks --- python/knot_resolver/client/commands/debug.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py index 421e0edb7..3350fda59 100644 --- a/python/knot_resolver/client/commands/debug.py +++ b/python/knot_resolver/client/commands/debug.py @@ -2,6 +2,7 @@ import json import os import sys +from pathlib import Path from typing import List, Optional, Tuple, Type from knot_resolver.client.command import Command, CommandArgs, CompWords, register_command @@ -45,7 +46,7 @@ def register_args_subparser( "--gdb", help="GDB command (may be a command on PATH, or an absolute path)", type=str, - default="gdb", + default=None, ) return debug, DebugCommand @@ -54,8 +55,24 @@ def completion(args: List[str], parser: argparse.ArgumentParser) -> CompWords: return {} def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 - gdb_cmd = str(which.which(self.gdb)) - sudo_cmd = str(which.which("sudo")) + if self.gdb is None: + try: + gdb_cmd = str(which.which("gdb")) + except RuntimeError: + print("Could not find 'gdb' in $PATH. Is GDB installed?", file=sys.stderr) + sys.exit(1) + elif "/" not in self.gdb: + try: + gdb_cmd = str(which.which(self.gdb)) + except RuntimeError: + print(f"Could not find '{self.gdb}' in $PATH.", file=sys.stderr) + sys.exit(1) + else: + gdb_cmd_path = Path(self.gdb).absolute() + if not gdb_cmd_path.exists(): + print(f"Could not find '{self.gdb}'.", file=sys.stderr) + sys.exit(1) + gdb_cmd = str(gdb_cmd_path) response = request(args.socket, "GET", f"processes/{self.proc_type}") if response.status != 200: @@ -79,12 +96,18 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 # Put `sudo --` at the beginning of the command. if self.sudo: + try: + sudo_cmd = str(which.which("sudo")) + except RuntimeError: + print("Could not find 'sudo' in $PATH. Is sudo installed?", file=sys.stderr) + sys.exit(1) exec_args.extend([sudo_cmd, "--"]) - # Attach GDB to processes - the first process gets passed as a regular `--pid` argument to GDB, the - # rest are attached using the `add-inferior` and `attach` GDB commands. This way, we are now debugging - # multiple processes. + # Attach GDB to processes - the processes are attached using the `add-inferior` and `attach` GDB + # commands. This way, we can debug multiple processes. exec_args.extend([gdb_cmd]) + exec_args.extend(["-init-eval-command", "set detach-on-fork off"]) + exec_args.extend(["-init-eval-command", "set schedule-multiple on"]) exec_args.extend(["-init-eval-command", f'attach {procs[0]["pid"]}']) inferior = 2 for proc in procs[1:]: From 8531ba9a4bc61610e063efd5ad7460c9c1be995d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Fri, 15 Mar 2024 10:16:16 +0100 Subject: [PATCH 7/9] kresctl debug: adjust defaults, documentation --- doc/user/manager-client.rst | 91 +++++++++++++++++++ python/knot_resolver/client/commands/debug.py | 10 +- python/knot_resolver/client/commands/pids.py | 20 ++-- 3 files changed, 105 insertions(+), 16 deletions(-) diff --git a/doc/user/manager-client.rst b/doc/user/manager-client.rst index abeab2c93..03a2d9eb8 100644 --- a/doc/user/manager-client.rst +++ b/doc/user/manager-client.rst @@ -304,3 +304,94 @@ single ``kresctl`` command. command is run. Requires a connection to the management API. + + +.. option:: pids + + Lists the PIDs of the Manager's subprocesses, separated by newlines. + + .. option:: --json + + Makes the output more verbose, in JSON. In addition to the subprocesses' + PIDs, it also prints their types and statuses. + + .. option:: [proc_type] + + :default: ``all`` + + Optional. The type of process to query. See :ref:`Subprocess types + ` for more info. + + +.. option:: debug + + Executes a GDB-compatible debugger and attaches it to the Manager's + subprocesses. By default, the debugger is ``gdb`` and the subprocesses are + only the ``kresd`` workers. + + .. warning:: + + The ``debug`` command is a utility for Knot Resolver developers and is + not intended to be used by end-users. Running this command **will** make + your resolver unresponsive. + + .. note:: + + Modern kernels will prevent debuggers from tracing processes that are + not their descendants, which is exactly the scenario that happens with + ``kresctl debug``. There are three ways to work around this, listed in + the order in which they are preferred in terms of security: + + 1. Grant the debugger the ``cap_sys_ptrace`` capability + (**recommended**) + + * For ``gdb``, this may be achieved by using the ``setcap`` + command like so: + + .. code-block:: bash + + sudo setcap cap_sys_ptrace=eip /usr/bin/gdb + + 2. Run the debugger as root + + * You may use the ``--sudo`` option to achieve this + + 3. Set ``/proc/sys/kernel/yama/ptrace_scope`` to ``0`` + + * This will allow **all** programs in your current session to + trace each other. Handle with care! + + .. note:: + + This command will only work if executed on the same machine where Knot + Resolver is running. Remote debugging is currently not supported. + + .. option:: [proc_type] + + :default: ``kresd`` + + Optional. The type of process to debug. See :ref:`Subprocess types + ` for more info. + + .. option:: --sudo + + Run the debugger with sudo. + + .. option:: --gdb + + Use a custom GDB executable. This may be a command on ``PATH``, or an + absolute path to an executable. + + +.. _manager-client-subprocess-types: + +Subprocess types +---------------- + +Some of ``kresctl``'s commands (like :option:`pids` and :option:`debug`) take a subprocess +type value determining which subprocesses will be affected by them. The possible +values are as follows: + +* ``kresd`` -- the worker daemons +* ``gc`` -- the cache garbage collector +* ``all`` -- all of the Manager's subprocesses diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py index 3350fda59..cd12db50d 100644 --- a/python/knot_resolver/client/commands/debug.py +++ b/python/knot_resolver/client/commands/debug.py @@ -27,7 +27,7 @@ def register_args_subparser( ) -> Tuple[argparse.ArgumentParser, "Type[Command]"]: debug = subparser.add_parser( "debug", - help="Run GDB on the manager's subprocesses - runs with sudo by default to avoid ptrace_scope issues", + help="Run GDB on the manager's subprocesses", ) debug.add_argument( "proc_type", @@ -37,14 +37,14 @@ def register_args_subparser( default="kresd", ) debug.add_argument( - "--no-sudo", + "--sudo", dest="sudo", - help="Do not run GDB with sudo (may not work if your ptrace_scope is 1 or higher)", - action="store_false", + help="Run GDB with sudo", + action="store_true", ) debug.add_argument( "--gdb", - help="GDB command (may be a command on PATH, or an absolute path)", + help="Custom GDB executable (may be a command on PATH, or an absolute path)", type=str, default=None, ) diff --git a/python/knot_resolver/client/commands/pids.py b/python/knot_resolver/client/commands/pids.py index 2a7a5e85f..a1ab5f8c8 100644 --- a/python/knot_resolver/client/commands/pids.py +++ b/python/knot_resolver/client/commands/pids.py @@ -13,7 +13,7 @@ class PidsCommand(Command): def __init__(self, namespace: argparse.Namespace) -> None: self.proc_type: Optional[str] = namespace.proc_type - self.verbose: int = namespace.verbose + self.json: int = namespace.json super().__init__(namespace) @@ -21,7 +21,7 @@ def __init__(self, namespace: argparse.Namespace) -> None: def register_args_subparser( subparser: "argparse._SubParsersAction[argparse.ArgumentParser]", ) -> Tuple[argparse.ArgumentParser, "Type[Command]"]: - pids = subparser.add_parser("pids", help="list the PIDs of kresd manager subprocesses") + pids = subparser.add_parser("pids", help="List the PIDs of the Manager's subprocesses") pids.add_argument( "proc_type", help="Optional, the type of process to query. May be 'kresd', 'gc', or 'all' (default).", @@ -29,11 +29,10 @@ def register_args_subparser( default="all", ) pids.add_argument( - "-v", - "--verbose", - help="Optional, makes the output more verbose, in a machine-readable format.", - action="count", - default=0, + "--json", + help="Optional, makes the output more verbose, in JSON.", + action="store_true", + default=False, ) return pids, PidsCommand @@ -47,12 +46,11 @@ def run(self, args: CommandArgs) -> None: if response.status == 200: processes = json.loads(response.body) if isinstance(processes, PROCESSES_TYPE): - if self.verbose < 1: - for p in processes: - print(p["pid"]) + if self.json: + print(json.dumps(processes, indent=2)) else: for p in processes: - print(p) + print(p["pid"]) else: print( From 243b30c9dbdc1500706db029f88c6c1d3bed3e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oto=20=C5=A0=C5=A5=C3=A1va?= Date: Mon, 18 Mar 2024 12:21:28 +0100 Subject: [PATCH 8/9] kresctl debug: add --print-only and be silent by default --- doc/user/manager-client.rst | 5 +++++ python/knot_resolver/client/commands/debug.py | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/user/manager-client.rst b/doc/user/manager-client.rst index 03a2d9eb8..7d43a07f1 100644 --- a/doc/user/manager-client.rst +++ b/doc/user/manager-client.rst @@ -382,6 +382,11 @@ single ``kresctl`` command. Use a custom GDB executable. This may be a command on ``PATH``, or an absolute path to an executable. + .. option:: --print-only + + Prints the GDB command line into ``stderr`` as a Python array, does not + execute GDB. + .. _manager-client-subprocess-types: diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py index cd12db50d..5d9a81df0 100644 --- a/python/knot_resolver/client/commands/debug.py +++ b/python/knot_resolver/client/commands/debug.py @@ -18,6 +18,7 @@ def __init__(self, namespace: argparse.Namespace) -> None: self.proc_type: Optional[str] = namespace.proc_type self.sudo: bool = namespace.sudo self.gdb: str = namespace.gdb + self.print_only: bool = namespace.print_only self.gdb_args: List[str] = namespace.extra super().__init__(namespace) @@ -41,6 +42,7 @@ def register_args_subparser( dest="sudo", help="Run GDB with sudo", action="store_true", + default=False, ) debug.add_argument( "--gdb", @@ -48,6 +50,12 @@ def register_args_subparser( type=str, default=None, ) + debug.add_argument( + "--print-only", + help="Prints the GDB command line into stderr as a Python array, does not execute GDB", + action="store_true", + default=False, + ) return debug, DebugCommand @staticmethod @@ -105,7 +113,7 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 # Attach GDB to processes - the processes are attached using the `add-inferior` and `attach` GDB # commands. This way, we can debug multiple processes. - exec_args.extend([gdb_cmd]) + exec_args.extend([gdb_cmd, "--"]) exec_args.extend(["-init-eval-command", "set detach-on-fork off"]) exec_args.extend(["-init-eval-command", "set schedule-multiple on"]) exec_args.extend(["-init-eval-command", f'attach {procs[0]["pid"]}']) @@ -130,5 +138,7 @@ def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 ) exec_args.extend(self.gdb_args) - print(f"exec_args = {exec_args}") - os.execl(*exec_args) + if self.print_only: + print(f"{exec_args}") + else: + os.execl(*exec_args) From cefe42bc85f2414b623112e8dc92e8b630b5ab6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Mr=C3=A1zek?= Date: Tue, 3 Dec 2024 11:40:45 +0100 Subject: [PATCH 9/9] doc: debugging with kresctl moved to dev --- doc/dev/debugging-with-kresctl.rst | 109 +++++++++++++++++++++++++++++ doc/dev/index.rst | 7 ++ doc/user/manager-client.rst | 96 ------------------------- 3 files changed, 116 insertions(+), 96 deletions(-) create mode 100644 doc/dev/debugging-with-kresctl.rst diff --git a/doc/dev/debugging-with-kresctl.rst b/doc/dev/debugging-with-kresctl.rst new file mode 100644 index 000000000..53fcddd11 --- /dev/null +++ b/doc/dev/debugging-with-kresctl.rst @@ -0,0 +1,109 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _debugging-with-kresctl: + +********************** +Debugging with kresctl +********************** + +Knot Resolver is made up of several independent components, +so it can be difficult to debug the individual parts. +To help with this, there is an option in the kresctl utility +that can run GDB-compatible debugger on a specific component of the resolver, see the ``debug`` command. + +.. program:: kresctl + +.. option:: pids + + Lists the PIDs of the Manager's subprocesses, separated by newlines. + + .. option:: --json + + Makes the output more verbose, in JSON. In addition to the subprocesses' + PIDs, it also prints their types and statuses. + + .. option:: [proc_type] + + :default: all + + Optional. The type of process to query. See :ref:`Subprocess types + ` for more info. + + +.. option:: debug + + Executes a GDB-compatible debugger and attaches it to the Manager's + subprocesses. By default, the debugger is ``gdb`` and the subprocesses are + only the ``kresd`` workers. + + .. warning:: + + The ``debug`` command is a utility for Knot Resolver developers and is + not intended to be used by end-users. Running this command **will** make + your resolver unresponsive. + + .. note:: + + Modern kernels will prevent debuggers from tracing processes that are + not their descendants, which is exactly the scenario that happens with + ``kresctl debug``. There are three ways to work around this, listed in + the order in which they are preferred in terms of security: + + 1. Grant the debugger the ``cap_sys_ptrace`` capability + (**recommended**) + + * For ``gdb``, this may be achieved by using the ``setcap`` + command like so: + + .. code-block:: bash + + sudo setcap cap_sys_ptrace=eip /usr/bin/gdb + + 2. Run the debugger as root + + * You may use the ``--sudo`` option to achieve this + + 3. Set ``/proc/sys/kernel/yama/ptrace_scope`` to ``0`` + + * This will allow **all** programs in your current session to + trace each other. Handle with care! + + .. note:: + + This command will only work if executed on the same machine where Knot + Resolver is running. Remote debugging is currently not supported. + + .. option:: [proc_type] + + :default: kresd + + Optional. The type of process to debug. See :ref:`Subprocess types + ` for more info. + + .. option:: --sudo + + Run the debugger with sudo. + + .. option:: --gdb + + Use a custom GDB executable. This may be a command on ``PATH``, or an + absolute path to an executable. + + .. option:: --print-only + + Prints the GDB command line into ``stderr`` as a Python array, does not + execute GDB. + + +.. _debugging-with-kresctl-subprocess-types: + +Subprocess types +---------------- + +Some of ``kresctl``'s commands (like :option:`pids` and :option:`debug`) take a subprocess +type value determining which subprocesses will be affected by them. The possible +values are as follows: + +* ``kresd`` -- the worker daemons +* ``gc`` -- the cache garbage collector +* ``all`` -- all of the Manager's subprocesses diff --git a/doc/dev/index.rst b/doc/dev/index.rst index a13e3d616..f0d557630 100644 --- a/doc/dev/index.rst +++ b/doc/dev/index.rst @@ -30,6 +30,13 @@ Welcome to Knot Resolver's documentation for developers and advanced users! manager-dev-code layered-protocols +.. toctree:: + :caption: Debugging + :name: debugging-chapter + :maxdepth: 1 + + debugging-with-kresctl + .. toctree:: :caption: Lua configuration :name: configuration-lua-chapter diff --git a/doc/user/manager-client.rst b/doc/user/manager-client.rst index 7d43a07f1..abeab2c93 100644 --- a/doc/user/manager-client.rst +++ b/doc/user/manager-client.rst @@ -304,99 +304,3 @@ single ``kresctl`` command. command is run. Requires a connection to the management API. - - -.. option:: pids - - Lists the PIDs of the Manager's subprocesses, separated by newlines. - - .. option:: --json - - Makes the output more verbose, in JSON. In addition to the subprocesses' - PIDs, it also prints their types and statuses. - - .. option:: [proc_type] - - :default: ``all`` - - Optional. The type of process to query. See :ref:`Subprocess types - ` for more info. - - -.. option:: debug - - Executes a GDB-compatible debugger and attaches it to the Manager's - subprocesses. By default, the debugger is ``gdb`` and the subprocesses are - only the ``kresd`` workers. - - .. warning:: - - The ``debug`` command is a utility for Knot Resolver developers and is - not intended to be used by end-users. Running this command **will** make - your resolver unresponsive. - - .. note:: - - Modern kernels will prevent debuggers from tracing processes that are - not their descendants, which is exactly the scenario that happens with - ``kresctl debug``. There are three ways to work around this, listed in - the order in which they are preferred in terms of security: - - 1. Grant the debugger the ``cap_sys_ptrace`` capability - (**recommended**) - - * For ``gdb``, this may be achieved by using the ``setcap`` - command like so: - - .. code-block:: bash - - sudo setcap cap_sys_ptrace=eip /usr/bin/gdb - - 2. Run the debugger as root - - * You may use the ``--sudo`` option to achieve this - - 3. Set ``/proc/sys/kernel/yama/ptrace_scope`` to ``0`` - - * This will allow **all** programs in your current session to - trace each other. Handle with care! - - .. note:: - - This command will only work if executed on the same machine where Knot - Resolver is running. Remote debugging is currently not supported. - - .. option:: [proc_type] - - :default: ``kresd`` - - Optional. The type of process to debug. See :ref:`Subprocess types - ` for more info. - - .. option:: --sudo - - Run the debugger with sudo. - - .. option:: --gdb - - Use a custom GDB executable. This may be a command on ``PATH``, or an - absolute path to an executable. - - .. option:: --print-only - - Prints the GDB command line into ``stderr`` as a Python array, does not - execute GDB. - - -.. _manager-client-subprocess-types: - -Subprocess types ----------------- - -Some of ``kresctl``'s commands (like :option:`pids` and :option:`debug`) take a subprocess -type value determining which subprocesses will be affected by them. The possible -values are as follows: - -* ``kresd`` -- the worker daemons -* ``gc`` -- the cache garbage collector -* ``all`` -- all of the Manager's subprocesses