From a6ccbca487feb6fc6ea569053c6604ace572ddeb Mon Sep 17 00:00:00 2001 From: Lzhang-hub <18229858296@163.com> Date: Tue, 26 Mar 2024 18:15:24 +0800 Subject: [PATCH 1/3] resolve PDSH_SSH_ARGS_APPEND not in environment --- deepspeed/launcher/multinode_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepspeed/launcher/multinode_runner.py b/deepspeed/launcher/multinode_runner.py index 44e694952ffe..d086c3c3a55f 100644 --- a/deepspeed/launcher/multinode_runner.py +++ b/deepspeed/launcher/multinode_runner.py @@ -73,8 +73,8 @@ def name(self): def get_cmd(self, environment, active_resources): environment['PDSH_RCMD_TYPE'] = 'ssh' - if self.args.ssh_port is not None: # only specify ssh port if it is specified - environment["PDSH_SSH_ARGS_APPEND"] += f" -p {self.args.ssh_port}" + if self.args.ssh_port is not None: # only specify ssh port if it is specified + environment["PDSH_SSH_ARGS_APPEND"] = f"{environment.get('PDSH_SSH_ARGS_APPEND', '')} -p {self.args.ssh_port}" active_workers = ",".join(active_resources.keys()) logger.info("Running on the following workers: %s" % active_workers) From 31323342f9b132f3b3a3a6476211b79d4abb9576 Mon Sep 17 00:00:00 2001 From: Lzhang-hub <18229858296@163.com> Date: Thu, 28 Mar 2024 11:31:40 +0800 Subject: [PATCH 2/3] delete space --- deepspeed/launcher/multinode_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/launcher/multinode_runner.py b/deepspeed/launcher/multinode_runner.py index d086c3c3a55f..ddd05bdd639a 100644 --- a/deepspeed/launcher/multinode_runner.py +++ b/deepspeed/launcher/multinode_runner.py @@ -73,7 +73,7 @@ def name(self): def get_cmd(self, environment, active_resources): environment['PDSH_RCMD_TYPE'] = 'ssh' - if self.args.ssh_port is not None: # only specify ssh port if it is specified + if self.args.ssh_port is not None: # only specify ssh port if it is specified environment["PDSH_SSH_ARGS_APPEND"] = f"{environment.get('PDSH_SSH_ARGS_APPEND', '')} -p {self.args.ssh_port}" active_workers = ",".join(active_resources.keys()) From dc9ecf74b423db62c71fcfce12722080def6fcde Mon Sep 17 00:00:00 2001 From: Lzhang-hub <18229858296@163.com> Date: Fri, 29 Mar 2024 10:10:03 +0800 Subject: [PATCH 3/3] resolve format check --- deepspeed/launcher/multinode_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deepspeed/launcher/multinode_runner.py b/deepspeed/launcher/multinode_runner.py index ddd05bdd639a..ce58deadc281 100644 --- a/deepspeed/launcher/multinode_runner.py +++ b/deepspeed/launcher/multinode_runner.py @@ -74,7 +74,8 @@ def name(self): def get_cmd(self, environment, active_resources): environment['PDSH_RCMD_TYPE'] = 'ssh' if self.args.ssh_port is not None: # only specify ssh port if it is specified - environment["PDSH_SSH_ARGS_APPEND"] = f"{environment.get('PDSH_SSH_ARGS_APPEND', '')} -p {self.args.ssh_port}" + environment["PDSH_SSH_ARGS_APPEND"] = f"{environment.get('PDSH_SSH_ARGS_APPEND', '')} \ + -p {self.args.ssh_port}" active_workers = ",".join(active_resources.keys()) logger.info("Running on the following workers: %s" % active_workers)