diff --git a/resources/playbook/roles/bibigrid/files/slurm/delete_server.py b/resources/playbook/roles/bibigrid/files/slurm/delete_server.py index c7b080c5..7b7fc674 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/delete_server.py +++ b/resources/playbook/roles/bibigrid/files/slurm/delete_server.py @@ -20,10 +20,13 @@ logging.info("delete_server.py started") start_time = time.time() +logging.info(f"Terminate parameter: {sys.argv[1]}") + if len(sys.argv) < 2: logging.warning("usage: $0 instance1_name[,instance2_name,...]") logging.info("Your input %s with length %s", sys.argv, len(sys.argv)) sys.exit(1) + terminate_workers = sys.argv[1].split("\n") logging.info("Deleting instances %s", terminate_workers) @@ -61,7 +64,8 @@ logging.warning(f"Couldn't delete worker {terminate_worker}") else: logging.info(f"Deleted {terminate_worker}") -logging.info("Successful delete_server.py execution!") + +logging.info(f"Successful delete_server.py execution ({sys.argv[1]})!") time_in_s = time.time() - start_time logging.info("--- %s minutes and %s seconds ---", math.floor(time_in_s / 60), time_in_s % 60) logging.info("Exit Code 0") diff --git a/resources/playbook/roles/bibigrid/files/slurm/fail.sh b/resources/playbook/roles/bibigrid/files/slurm/fail.sh index 682dafe7..436f8b59 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/fail.sh +++ b/resources/playbook/roles/bibigrid/files/slurm/fail.sh @@ -39,7 +39,11 @@ scontrol update NodeName="$1" state=RESUME reason=FailedStartup # no sudo needed hosts=$(scontrol show hostnames "$1") +echo "Hosts $hosts used" + # delete servers python3 /usr/local/bin/delete_server.py "${hosts}" +echo "Finished delete_server.py execution." + exit $?