Skip to content

Commit

Permalink
Merge pull request #116 from juztas/sitermapi
Browse files Browse the repository at this point in the history
Autofetch overwrite config. Use SiteRM Api to send pings between hosts
  • Loading branch information
juztas authored Jun 20, 2024
2 parents 5e69ef6 + c9f314c commit ca044ae
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
REPLACEME
1 change: 1 addition & 0 deletions autogole-api/packaging/files/etc/grid-security/hostkey.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
REPLACEME
19 changes: 19 additions & 0 deletions autogole-api/packaging/files/etc/overrides.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
"urn:ogf:network:icair.org:2013":
name: "NSI_STARLIGHT"
joint_net: False
"urn:ogf:network:es.net:2013":
name: "ESNET"
joint_net: True
"urn:ogf:network:stack-fabric:2024":
name: "NSI_FABRIC"
joint_net: True
"urn:ogf:network:lsanca.pacificwave.net:2016":
name: "NSI_PACWAVE"
joint_net: False
"urn:ogf:network:uhnet.net:2021":
name: "NSI_HAWAII"
joint_net: False
"urn:ogf:network:calit2.optiputer.net:2020":
name: "NSI_UCSD"
joint_net: False
16 changes: 15 additions & 1 deletion autogole-api/packaging/files/etc/rtmon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,37 @@ sleep_timer: 30
# Work dir for temp files (api directory). Default /srv
workdir: '/srv/'

# Grafana settings (for the API)
grafana_host: 'https://autogole-grafana.nrp-nautilus.io'
grafana_api_key: 'REPLACE_ME'

# Template path and tags. If tag updated - it will force update all templates
template_path: '/etc/rtmon/templates'
template_tag: 'V0.01'

# Data sources for Prometheus (It will use grafana API to find data sources uid)
data_sources:
general: Prometheus
realtime: PrometheusRT


# Sense endpoints and their auth files.
sense_endpoints:
"sense-o.es.net": "/etc/sense-o-auth-prod.yaml"
"sense-o-dev.es.net": "/etc/sense-o-auth.yaml"

# Additional links for the templates
template_links:
- title: 'All Node Monitoring'
url: 'https://autogole-grafana.nrp-nautilus.io/d/D7xOxim4z/full-dtn-monitoring-variable?orgId=1&refresh=1m'
- title: 'All Switches Monitoring'
url: 'https://autogole-grafana.nrp-nautilus.io/d/1J9Zz1mWz/full-switch-monitoring-variable?orgId=1&refresh=1m'

# Override URL for the NSI,ESnet,Fabric mermaid diagrams. Most of those RMs report everything in a single SwitchingSubnet
# and we need to override it to show the actual topology (joint, or not). Additionally - most of those have no site name or
# realportname - so we need to override it as well. (That is the feature not yet implemented in other RMs)
override_url: "https://raw.githubusercontent.com/esnet/sense-rtmon/main/autogole-api/packaging/files/etc/overrides.yaml"

# HostCert and HostKey location (needed for SiteRM communications). Cert mush be authorized in all Frontends.
hostcert: '/etc/grid-security/hostcert.pem'
hostkey: '/etc/grid-security/hostkey.pem'

3 changes: 3 additions & 0 deletions autogole-api/packaging/start-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
echo "Dont forget to update the rtmon.yaml file with the correct parameters"
echo "Dont forget to update the sense-o-auth.yaml file with the correct parameters"
echo "Dont forget to update the sense-o-auth-prod.yaml file with the correct parameters"
echo "Dont forget to update the hostcert and hostkey file with certificates"

docker run \
-dit --name rtmon \
Expand All @@ -10,6 +11,8 @@ docker run \
-v $(pwd)/files/etc/rtmon.yaml:/etc/rtmon.yaml:ro \
-v $(pwd)/files/etc/sense-o-auth.yaml:/etc/sense-o-auth.yaml:ro \
-v $(pwd)/files/etc/sense-o-auth-prod.yaml:/etc/sense-o-auth-prod.yaml:ro \
-v $(pwd)/files/etc/grid-security/hostcert.pem:/etc/grid-security/hostcert.pem:ro \
-v $(pwd)/files/etc/grid-security/hostcert.pem:/etc/grid-security/hostkey.pem:ro \
--restart always \
--net=host \
rtmon
9 changes: 8 additions & 1 deletion autogole-api/src/python/RTMon/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
from RTMonLibs.Template import Template
from RTMonLibs.Template import Mermaid
from RTMonLibs.SiteOverride import SiteOverride
from RTMonLibs.SiteRMApi import SiteRMApi

class RTMonWorker(SenseAPI, GrafanaAPI, Template, Mermaid, SiteOverride):
class RTMonWorker(SenseAPI, GrafanaAPI, Template, SiteOverride, SiteRMApi, Mermaid):
""" RTMon Worker """
def __init__(self, **kwargs):
super().__init__(**kwargs)
Expand All @@ -33,6 +34,7 @@ def submit_exe(self, filename, fout):
self.logger.info('='*80)
self.logger.info('Submit Execution: %s, %s', filename, fout)
instance = self.s_getInstance(fout['referenceUUID'])
fout['instance'] = instance
self.logger.info(f"Here is instance for {fout['referenceUUID']}:")
self.logger.info(pformat(instance))
# 2. Get the manifest from SENSE-0
Expand All @@ -44,6 +46,7 @@ def submit_exe(self, filename, fout):
self.logger.error('Instance not in correct state: %s, %s', fout['referenceUUID'], instance['state'])
return
manifest = self.s_getManifest(instance)
fout['manifest'] = manifest
self.logger.info("Here is manifest for the following instance:")
self.logger.info(pformat(manifest))
if not manifest:
Expand All @@ -66,6 +69,8 @@ def submit_exe(self, filename, fout):
fout['state'] = 'running'
fout.setdefault('retries', 0)
self._updateState(filename, fout)
# 6. Submit SiteRM Action to issue a ping test both ways
self.sr_submit_ping(instance=instance, manifest=manifest)

def delete_exe(self, filename, fout):
"""Delete Action Execution"""
Expand Down Expand Up @@ -97,6 +102,8 @@ def running_exe(self, filename, fout):
# we need to update the dashboard with new template_tag
if self.config['template_tag'] in dashbVals['tags']:
self.logger.info('Dashboard is present in Grafana: %s', dashbName)
# Check if we need to re-issue ping test
self.sr_submit_ping(instance=fout.get('instance', {}), manifest=fout.get('manifest', {}))
return
# Need to update the dashboard with new template_tag
self.logger.info('Dashboard is present in Grafana, but with old version: %s', dashbName)
Expand Down
40 changes: 39 additions & 1 deletion autogole-api/src/python/RTMonLibs/GeneralLibs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"""General Libraries for RTMon"""
import os
import json
import time
import requests
from yaml import safe_load as yload
from yaml import safe_dump as ydump

Expand Down Expand Up @@ -49,11 +51,47 @@ def dumpYaml(data, logger):
logger.error('Error in dumping yaml dict: %s', ex)
return {}

def loadYaml(data, logger):
"""Load YAML"""
if isinstance(data, (dict, list)):
return data
try:
return yload(data)
except Exception as ex:
if logger:
logger.error('Error in loading yaml dict: %s', ex)
logger.error('Data: %s', data)
logger.error('Data type: %s', type(data))
else:
print('Error in loading yaml dict: %s', ex)
print('Data: %s', data)
print('Data type: %s', type(data))
return {}

def getConfig(logger=None):
"""Get Config"""
if not os.path.isfile("/etc/rtmon.yaml"):
if logger:
logger.error("Config file /etc/rtmon.yaml does not exist.")
raise Exception("Config file /etc/rtmon.yaml does not exist.")
with open("/etc/rtmon.yaml", "r", encoding="utf-8") as fd:
return yload(fd.read())
return loadYaml(fd.read(), logger)

def getWebContentFromURL(url, logger, raiseEx=True):
"""GET from URL"""
retries = 3
out = {}
while retries > 0:
retries -= 1
try:
out = requests.get(url, timeout=60)
return out
except requests.exceptions.RequestException as ex:
logger.error(f"Got requests.exceptions.RequestException: {ex}. Retries left: {retries}")
if raiseEx and retries == 0:
raise
out = {}
out['error'] = str(ex)
out['status_code'] = -1
time.sleep(1)
return out
21 changes: 15 additions & 6 deletions autogole-api/src/python/RTMonLibs/SiteOverride.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,29 @@
"""
Class for overriding site specific settings (e.g. OpenNSA/NSI/NRM Name, ports)
"""
from RTMonLibs.GeneralLibs import loadYaml, getWebContentFromURL

class SiteOverride:
"""Site Override"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.config = kwargs.get('config')
self.logger = kwargs.get('logger')
# TODO: To load automatically config
self.override = {"urn:ogf:network:icair.org:2013": {"name": "NSI_STARLIGHT", "joint_net": False},
"urn:ogf:network:es.net:2013": {"name": "ESNET", "joint_net": True},
"urn:ogf:network:stack-fabric:2024": {"name": "NSI_FABRIC", "joint_net": True},
"urn:ogf:network:lsanca.pacificwave.net:2016": {"name": "NSI_PACWAVE", "joint_net": False}}
self.peers = {} # Get all peers mapping from input config
self.override = {}
self._getOverrides()
self.peers = {}

def _getOverrides(self):
"""Get all overrides from a config file"""
if not self.config.get('override_url'):
self.logger.error("No override URL set for parsing/mapping peers")
return
tmpoverrides = getWebContentFromURL(self.config['override_url'], self.logger)
if tmpoverrides:
self.override = loadYaml(tmpoverrides.text, self.logger)
else:
self.logger.error("Failed to get overrides from URL: %s", self.config['override_url'])

def so_mappeers(self, indata):
"""Map all peers"""
Expand Down
85 changes: 85 additions & 0 deletions autogole-api/src/python/RTMonLibs/SiteRMApi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""
Class for interacting with SENSE SiteRMs
"""
from RTMonLibs.GeneralLibs import loadJson
from sense.client.siterm.debug_api import DebugApi



class SiteRMApi:
"""Class for interacting with SENSE-0 API"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.config = kwargs.get('config')
self.logger = kwargs.get('logger')
self.siterm_debug = DebugApi()

@staticmethod
def _sr_all_keys_match(action, newaction):
return all(newaction.get(key) == action.get(key) for key in newaction)

def _sr_get_all_hosts(self, **kwargs):
"""Get all hosts from manifest"""
allHosts, allIPs = [], {}
for _idx, item in enumerate(kwargs.get("manifest", {}).get("Ports", [])):
for hostdata in item.get('Host', []):
if item.get('Vlan'):
hostdata['vlan'] = f"vlan.{item['Vlan']}"
allHosts.append(hostdata)
# Check if IPv6 or IPv4 is defined
for key, defval in [("IPv4", "?ipv4?"), ("IPv6", "?ipv6?")]:
# TODO: Remove split of ip once this is solved: https://github.com/sdn-sense/siterm/issues/576
if hostdata.get(key) and hostdata[key] != defval:
allIPs.setdefault(key, [])
allIPs[key].append(hostdata[key].split('/')[0])
return allHosts, allIPs

def sr_get_debug_actions(self, **kwargs):
"""Get all debug actions for a site and hostname"""
allDebugActions = []
for key in ["new", "active"]:
jsonOut = {}
out = self.siterm_debug.get_all_debug_hostname(sitename=kwargs.get("sitename"),
hostname=kwargs.get("hostname"),
state=key)
if out and out[0]:
jsonOut = loadJson(out[0], self.logger)
for item in jsonOut:
item["requestdict"] = loadJson(item["requestdict"], self.logger)
allDebugActions.append(item)
return allDebugActions

def sr_submit_ping(self, **kwargs):
"""Submit a ping test to the SENSE-SiteRM API"""
self.logger.info("Start check for ping test if needed")
hosts, allIPs = self._sr_get_all_hosts(**kwargs)
# based on our variables;
for host in hosts:
# Check if IPv6 or IPv4 is defined
for key, defval in [("IPv4", "?ipv4?"), ("IPv6", "?ipv6?")]:
if host.get(key) and host[key] != defval:
hostspl = host.get("Name").split(':')
allDebugActions = self.sr_get_debug_actions(**{'sitename': hostspl[0],
'hostname': hostspl[1]})
for ip in allIPs.get(key, []):
hostip = host[key].split('/')[0]
if hostip == ip:
# We ignore ourself. No need to ping ourself
continue
# Loop all debug actions and check if the action is already in the list of actions
# # TODO: Change time to 1hr once this is solved: https://github.com/sdn-sense/siterm/issues/574
newaction = {"hostname": hostspl[1], "type": "rapid-ping",
"sitename": hostspl[0], "ip": ip,
"packetsize": kwargs.get("packetsize", 56),
"interval": kwargs.get("interval", 5),
"interface": host['Interface'] if not host.get('vlan') else host['vlan'],
"time": kwargs.get("time", 300)}
actionPresent = False
for action in allDebugActions:
if self._sr_all_keys_match(action.get('requestdict'), newaction):
actionPresent = True
break
if not actionPresent:
self.logger.info(f"Submitting ping test for {newaction}")
self.siterm_debug.submit_ping(**newaction)

0 comments on commit ca044ae

Please sign in to comment.