From b3ae80eab51184f4bc20e476c8b37e1cd3e8415c Mon Sep 17 00:00:00 2001 From: Ivan Pepelnjak Date: Fri, 3 Jan 2025 13:39:16 +0100 Subject: [PATCH] Use timeout to kill vrnetlab readiness check SSH sessions (#1734) The SSH client used to check vrnetlab readiness can be stuck for an awfully long when the VM is in just the right phase of getting ready. This change uses the 'timeout' command (when available) to kill the check after 5 seconds, allowing the Ansible retry mechanism to work as expected. --- .../ansible/tasks/readiness-check/vm-clab-ssh-check.yml | 9 ++++++++- netsim/devices/vjunos-router.yml | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/netsim/ansible/tasks/readiness-check/vm-clab-ssh-check.yml b/netsim/ansible/tasks/readiness-check/vm-clab-ssh-check.yml index cd997318c..4d41a05e2 100644 --- a/netsim/ansible/tasks/readiness-check/vm-clab-ssh-check.yml +++ b/netsim/ansible/tasks/readiness-check/vm-clab-ssh-check.yml @@ -10,11 +10,18 @@ run_once: True any_errors_fatal: True +- name: Check for 'timeout' command + local_action: + module: shell + cmd: which timeout || echo "FAILED" + changed_when: False + register: have_timeout + - name: Execute local ssh command to check {{ netlab_device_type|default(inventory_hostname) }} readiness local_action: module: shell cmd: | - sshpass -p '{{ ansible_ssh_pass }}' \ + {{ 'timeout -k 3s 5s' if "FAILED" not in have_timeout.stdout else '' }} sshpass -p '{{ ansible_ssh_pass }}' \ ssh -o StrictHostKeyChecking=no {{ netlab_ssh_args|default('') }} \ -o UserKnownHostsFile=/dev/null {{ ansible_user }}@{{ ansible_host }} \ '{{ netlab_check_command | default("show version") }}' diff --git a/netsim/devices/vjunos-router.yml b/netsim/devices/vjunos-router.yml index 02df0fcb0..9920cac8b 100644 --- a/netsim/devices/vjunos-router.yml +++ b/netsim/devices/vjunos-router.yml @@ -24,7 +24,7 @@ clab: group_vars: ansible_user: admin ansible_ssh_pass: admin@123 - netlab_check_retries: 40 + netlab_check_retries: 60 netlab_check_delay: 10 graphite.icon: router