Skip to content

Commit

Permalink
zabbix api no longer used when not set in configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
XaverStiensmeier committed Jul 5, 2024
1 parent 1861480 commit 2d3d9a9
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 45 deletions.
50 changes: 30 additions & 20 deletions resources/playbook/roles/bibigrid/files/slurm/delete_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import time

import os_client_config
import requests
import yaml

from pyzabbix import ZabbixAPI
from pyzabbix import ZabbixAPI, ZabbixAPIException

LOGGER_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
logging.basicConfig(format=LOGGER_FORMAT, filename="/var/log/slurm/delete_server.log", level=logging.INFO)
Expand All @@ -29,11 +29,11 @@
logging.info("Your input %s with length %s", sys.argv, len(sys.argv))
sys.exit(1)

separator = ','
SEPERATOR = ','
if '\n' in sys.argv[1]:
separator = '\n'
SEPERATOR = '\n'

terminate_workers = sys.argv[1].split(separator)
terminate_workers = sys.argv[1].split(SEPERATOR)
logging.info("Deleting instances %s", terminate_workers)

GROUP_VARS_PATH = "/opt/playbook/group_vars"
Expand Down Expand Up @@ -61,8 +61,8 @@
for worker_group in worker_groups:
for terminate_worker in terminate_workers:
# terminate all servers that are part of the current worker group
result = subprocess.run(["scontrol", "show", "hostname", worker_group["name"]],
stdout=subprocess.PIPE, check=True) # get all workers in worker_type
result = subprocess.run(["scontrol", "show", "hostname", worker_group["name"]], stdout=subprocess.PIPE,
check=True) # get all workers in worker_type
possible_workers = result.stdout.decode("utf-8").strip().split("\n")
if terminate_worker in possible_workers:
result = connections[worker_group["cloud_identifier"]].delete_server(terminate_worker)
Expand All @@ -76,19 +76,29 @@
# -------------------------------

# connect to Zabbix API
zapi = ZabbixAPI(server='http://localhost/zabbix')
# authenticate
zapi.login("Admin",common_config["zabbix_conf"]["admin_password"])
# iterate over terminate_workers list
for terminate_worker in terminate_workers:
# get list of hosts that matches the hostname
hosts = zapi.host.get(output=["hostid","name"],filter={"name": terminate_worker})
if not hosts:
logging.warning(f"Can't remove host '{terminate_worker}' from Zabbix: Host doesn't exist.")
else:
# remove host from Zabbix
zapi.host.delete(hosts[0]["hostid"])
logging.info(f"Remove host '{terminate_worker}' from Zabbix.")
if common_config["enable_zabbix"]:
try:
# Connect to Zabbix API
zapi = ZabbixAPI(server='http://localhost/zabbix')

# Authenticate
zapi.login("Admin", common_config["zabbix_conf"]["admin_password"])

# Iterate over terminate_workers list
for terminate_worker in terminate_workers:
try:
# Get list of hosts that matches the hostname
hosts = zapi.host.get(output=["hostid", "name"], filter={"name": terminate_worker})
if not hosts:
logging.warning(f"Can't remove host '{terminate_worker}' from Zabbix: Host doesn't exist.")
else:
# Remove host from Zabbix
zapi.host.delete(hosts[0]["hostid"])
logging.info(f"Removed host '{terminate_worker}' from Zabbix.")
except ZabbixAPIException as e:
logging.error(f"Error while handling host '{terminate_worker}': {e}")
except requests.exceptions.RequestException as e:
logging.error(f"Cannot connect to Zabbix server: {e}")

logging.info(f"Successful delete_server.py execution ({sys.argv[1]})!")
time_in_s = time.time() - start_time
Expand Down
55 changes: 30 additions & 25 deletions resources/playbook/roles/bibigrid/tasks/011-zabbix-agent.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
- name: Install zabbix python-api
pip:
name: zabbix-api

- name: Install zabbix agent
apt:
name: zabbix-agent
state: present
when: "ansible_distribution_file_variety == 'Debian'"

- name: Install zabbix agent
dnf:
name: zabbix-agent
state: present
when: "ansible_distribution_file_variety == 'RedHat'"
- name: Ensure zabbix user exists
when: "'master' not in group_names"
user:
name: zabbix
comment: "Zabbix Monitoring User"
home: /var/lib/zabbix
shell: /usr/sbin/nologin
createhome: no

Check failure on line 8 in resources/playbook/roles/bibigrid/tasks/011-zabbix-agent.yaml

View workflow job for this annotation

GitHub Actions / linting-job

yaml[truthy]

Truthy value should be one of \[false, true]

- name: Create zabbix_agent dropin directory
file:
Expand All @@ -35,22 +28,34 @@
mode: 0644
notify: zabbix-agent

- name: Start and Enable zabbix-agent
systemd:
name: zabbix-agent
state: started
enabled: true

- name: Install zabbix python-api
pip:
name: zabbix-api

- name: Copy Zabbix Host delete script
copy:
src: zabbix/zabbix_host_delete.py
dest: /usr/local/bin/zabbix_host_delete.py
mode: 0755

- name: Install zabbix python-api
pip:
name: zabbix-api

- name: Install zabbix agent
apt:
name: zabbix-agent
state: present
when: "ansible_distribution_file_variety == 'Debian'"

- name: Install zabbix agent
dnf:
name: zabbix-agent
state: present
when: "ansible_distribution_file_variety == 'RedHat'"

- name: Start and Enable zabbix-agent
systemd:
name: zabbix-agent
state: started
enabled: true

# --------------------------------------
# -- Add worker node as zabbix hosts --
# --------------------------------------
Expand Down

0 comments on commit 2d3d9a9

Please sign in to comment.