From 18d2a35d4acd356058687824bf56b15b97c1814d Mon Sep 17 00:00:00 2001 From: mabasian <54101509+mabasian@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:35:27 +0200 Subject: [PATCH] ADD: CSM Alerts and notif (#2028) * FIX: frontend csm adderd to alert boxes * ADD: the fetch csm to monitoring * ADD: backend csm func * ADD: node alrt csm fetched * ADD: CSM to the control alert --- launcher/src/backend/Monitoring.js | 195 ++++++++++++++---- launcher/src/background.js | 4 + .../node-page/components/alert/NodeAlert.vue | 93 ++++++++- .../UI/the-control/ControlAlert.vue | 77 +++++++ launcher/src/languages/en.json | 3 +- launcher/src/store/ControlService.js | 3 + launcher/src/store/services.js | 10 +- 7 files changed, 338 insertions(+), 47 deletions(-) diff --git a/launcher/src/backend/Monitoring.js b/launcher/src/backend/Monitoring.js index 527512a749..2ace566b06 100755 --- a/launcher/src/backend/Monitoring.js +++ b/launcher/src/backend/Monitoring.js @@ -436,11 +436,11 @@ export class Monitoring { var query = rpc_method.trim().indexOf("{") < 0 ? JSON.stringify({ - jsonrpc: "2.0", - method: rpc_method.trim(), - params: rpc_params, - id: 1, - }) + jsonrpc: "2.0", + method: rpc_method.trim(), + params: rpc_params, + id: 1, + }) : rpc_method; // Define default response @@ -2623,8 +2623,8 @@ export class Monitoring { const addr_type = Array.isArray(addr) ? "arr" : typeof addr === "string" && ["public", "local"].includes(addr) - ? "str" - : "invalid"; + ? "str" + : "invalid"; addr = addr_type == "str" ? addr.toLowerCase().trim() : addr; if (addr_type == "invalid") { return { @@ -2712,7 +2712,7 @@ export class Monitoring { for (let i = 0; i < serviceInfos.length; i++) { const hashDependencies = serviceInfos[i].config.dependencies.consensusClients.length || - serviceInfos[i].config.dependencies.executionClients.length + serviceInfos[i].config.dependencies.executionClients.length ? "yes" : "no"; easyInfos.push({ @@ -3249,8 +3249,9 @@ rm -rf diskoutput const parsedJson = JSON.parse(stdoutJson); let message = - `${parsedJson?.message || ""}${parsedJson?.message && parsedJson?.stacktraces ? "\n" : ""}${parsedJson?.stacktraces || "" - }`.trim() || output; + `${parsedJson?.message || ""}${parsedJson?.message && parsedJson?.stacktraces ? "\n" : ""}${ + parsedJson?.stacktraces || "" + }`.trim() || output; return { pubkey: pubkey, @@ -3340,6 +3341,7 @@ rm -rf diskoutput ]; } } + /** * Will gather metrics from Prometheus and evaluate. * If thresholds are exceeded, an alert will be generated and added to the retuned array. @@ -3353,100 +3355,217 @@ rm -rf diskoutput } const queries = { app_monitoring_readyz: "max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer)", - cluster_missed_attestations: "max(increase(core_tracker_failed_duties_total[10m])) by (cluster_hash, cluster_name)", - cluster_failure_rate: "floor(100 * (max(increase(core_tracker_success_duties_total[15m])) by (cluster_hash, cluster_name) / max(increase(core_tracker_expect_duties_total[15m])) by (cluster_hash, cluster_name)))", - percentage_failed_sync_message_duty: "(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer)\n) \n/ \n(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n + \n sum(increase(core_bcast_broadcast_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n)", + cluster_missed_attestations: + "max(increase(core_tracker_failed_duties_total[10m])) by (cluster_hash, cluster_name)", + cluster_failure_rate: + "floor(100 * (max(increase(core_tracker_success_duties_total[15m])) by (cluster_hash, cluster_name) / max(increase(core_tracker_expect_duties_total[15m])) by (cluster_hash, cluster_name)))", + percentage_failed_sync_message_duty: + "(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer)\n) \n/ \n(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n + \n sum(increase(core_bcast_broadcast_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n)", connected_relays: "group (p2p_relay_connections) by (cluster_peer)", peer_ping_latency: "histogram_quantile(0.90, sum(rate(p2p_ping_latency_secs_bucket[2m])) by (le,peer))", - } + }; const queryPromises = Object.entries(queries).map(([key, query]) => { - return this.queryPrometheus(encodeURIComponent(query)).then(result => ({ key, result })); + return this.queryPrometheus(encodeURIComponent(query)).then((result) => ({ key, result })); }); const results = await Promise.all(queryPromises); - let alerts = results.map((metric) => { - if (metric.result.status != "success") { - return; - } - if (metric.key === "peer_ping_latency") { - let value = Math.max(...metric.result.data.result.map((r) => r.value[1])); + let alerts = results + .map((metric) => { + if (metric.result.status != "success") { + return; + } + if (metric.key === "peer_ping_latency") { + let value = Math.max(...metric.result.data.result.map((r) => r.value[1])); + return this.parseObolCharonAlerts(metric.key, value); + } + let value = metric.result.data.result[0].value[1]; return this.parseObolCharonAlerts(metric.key, value); - } - let value = metric.result.data.result[0].value[1]; - return this.parseObolCharonAlerts(metric.key, value); - }).filter((alert) => alert); + }) + .filter((alert) => alert); return alerts; - } catch (error) { log.error("Fetching Obol Charon Alerts Failed:\n" + error); - return [] + return []; } } parseObolCharonAlerts(key, value) { - value = 0 + value = 0; //app_monitoring_readyz if (key === "app_monitoring_readyz") { switch (value) { case 0: return { name: "Cluster in Unknown Status", - level: "warning" + level: "warning", }; case 2: return { name: "Beacon Node Down", - level: "critical" + level: "critical", }; case 4: return { name: "Cluster Insufficient Peers", - level: "warning" + level: "warning", }; case 6: return { name: "Cluster Missing Validators", - level: "critical" + level: "critical", }; case 7: return { name: "Beacon Node Zero Peers", - level: "critical" + level: "critical", }; } } if (key === "cluster_missed_attestations" && value > 0) { return { name: "Cluster Missed Attestations", - level: "critical" + level: "critical", }; } if (key === "cluster_failure_rate" && value < 95) { return { name: "Cluster Failure Rate", - level: "critical" + level: "critical", }; } if (key === "percentage_failed_sync_message_duty" && value > 0.1) { return { name: "Failed Sync Msg Duty", - level: "critical" + level: "critical", }; } if (key === "connected_relays" && value < 1) { return { name: "Num. Connected Relays", - level: "warning" + level: "warning", }; } if (key === "peer_ping_latency" && value > 0.4) { return { name: "Peer Ping Latency", - level: "warning" + level: "warning", }; } } + + /** + * Will gather metrics from Prometheus and evaluate. + * If thresholds are exceeded, an alert will be generated and added to the retuned array. + * @returns {Object[]} Array of alerts e.g. [{name: "Cluster in Unknown Status", level: "warning"}, {name: "Beacon Node Down", level: "critical"}] + */ + async fetchCsmAlerts() { + try { + const serviceInfos = await this.getServiceInfos("LCOMService"); + if (serviceInfos.length < 1) { + return []; + } + + const queries = { + lcoms_initial_slashing_submitted: "lcoms_initial_slashing_submitted", + lcoms_withdrawal_submitted: "lcoms_withdrawal_submitted", + lcoms_stealing_penalty: "lcoms_stealing_penalty", + lcoms_stealing_penalty_stolenAmount: "lcoms_stealing_penalty_stolenAmount", + lcoms_exit_request: "lcoms_exit_request", + lcoms_exit_request_timestamp: "lcoms_exit_request_timestamp", + lcoms_fee_to_distribute: "lcoms_fee_to_distribute", + lcoms_node_operator_status: "lcoms_node_operator_status", + lcoms_current_bond: "lcoms_current_bond", + lcoms_required_bond: "lcoms_required_bond", + }; + + const queryPromises = Object.entries(queries).map(([key, query]) => { + return this.queryPrometheus(encodeURIComponent(query)).then((result) => ({ key, result })); + }); + + const results = await Promise.all(queryPromises); + + let currentBond = null; + let requiredBond = null; + + let alerts = results + .map((metric) => { + if (metric.result.status !== "success") { + return []; + } + + const value = parseFloat(metric.result.data.result[0].value[1]); + + if (metric.key === "lcoms_current_bond") { + currentBond = value; + } else if (metric.key === "lcoms_required_bond") { + requiredBond = value; + } + + return this.parseCsmAlerts(metric.key, value); + }) + .filter((alert) => alert !== null); + + if (currentBond !== null && requiredBond !== null) { + const bondDifference = currentBond - requiredBond; + const bondAlert = this.parseCsmAlerts("bond_difference", bondDifference); + if (bondAlert) { + alerts.push(bondAlert); + } + } + + return alerts; + } catch (error) { + log.error("Fetching CSM Alerts Failed:\n" + error); + return []; + } + } + + parseCsmAlerts(key, value) { + if (key === "lcoms_initial_slashing_submitted" && value > 0) { + return { + name: "slashing event", + level: "critical", + }; + } + if (key === "lcoms_withdrawal_submitted" && value > 0) { + return { + name: "withdrawal submitted", + level: "notification", + }; + } + if (key === "lcoms_stealing_penalty" && value > 0) { + return { + name: "EL stealing penalty", + level: "critical", + }; + } + if (key === "lcoms_exit_request" && value > 0) { + return { + name: "exit request", + level: "critical", + }; + } + if (key === "lcoms_fee_to_distribute" && value > 0) { + return { + name: "none-claimed rewards", + level: "notification", + }; + } + if (key === "lcoms_node_operator_status" && value < 1) { + return { + name: "node operator inactive", + level: "critical", + }; + } + if (key === "bond_difference" && value < 0) { + return { + name: "Insufficient Bond", + level: "critical", + }; + } + return []; + } } diff --git a/launcher/src/background.js b/launcher/src/background.js index 9e606f6334..8dcfcd53c8 100755 --- a/launcher/src/background.js +++ b/launcher/src/background.js @@ -745,6 +745,10 @@ ipcMain.handle("fetchObolCharonAlerts", async () => { return await monitoring.fetchObolCharonAlerts(); }); +ipcMain.handle("fetchCsmAlerts", async () => { + return await monitoring.fetchCsmAlerts(); +}); + // Scheme must be registered before the app is ready protocol.registerSchemesAsPrivileged([{ scheme: "app", privileges: { secure: true, standard: true } }]); diff --git a/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue b/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue index 03953c4544..04d1fca8e5 100755 --- a/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue +++ b/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue @@ -159,6 +159,55 @@ + + + + + + + +
{ this.fetchObolCharonAlerts(); }, 120000); + this.fetchCsm(); + this.csmInterval = setInterval(() => { + this.fetchCsm(); + }, 120000); }, beforeUnmount() { clearInterval(this.polling); if (this.obolInterval) { clearInterval(this.obolInterval); } + if (this.csmInterval) { + clearInterval(this.csmInterval); + } }, created() { this.storageCheck(); @@ -438,13 +497,31 @@ export default { async fetchObolCharonAlerts() { try { const alerts = await ControlService.fetchObolCharonAlerts(); - console.log("Obol Charon alerts:", alerts); this.processAlerts(alerts); } catch (error) { console.error("Failed to fetch Obol Charon alerts:", error); } }, + async fetchCsm() { + try { + const alerts = await ControlService.fetchCsmAlerts(); + + this.processCsm(alerts); + } catch (error) { + console.error("Failed to fetch Obol Charon alerts:", error); + } + }, + processCsm(alerts) { + const criticalAlertNames = alerts.filter((alert) => alert.level === "critical").map((alert) => alert.name); + + const notifictionsNames = alerts.filter((alert) => alert.level === "notification").map((alert) => alert.name); + + this.criticalCsm = criticalAlertNames; + + this.notifCsm = notifictionsNames; + }, + processAlerts(alerts) { const criticalAlertNames = alerts.filter((alert) => alert.level === "critical").map((alert) => alert.name); @@ -463,6 +540,7 @@ export default { this.alertShowState.push(color); } }, + async checkSettings() { try { const savedConfig = await ControlService.readConfig(); @@ -843,10 +921,19 @@ export default { display: flex; width: 95%; height: 35%; - justify-content: flex-start; - align-items: center; + justify-content: center; + align-items: flex-end; font-size: 45%; font-weight: 700; text-transform: uppercase; } +.val-message span { + display: block; + width: 100%; + height: 100%; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + text-align: left; +} diff --git a/launcher/src/components/UI/the-control/ControlAlert.vue b/launcher/src/components/UI/the-control/ControlAlert.vue index 15487b119d..ad071fe748 100755 --- a/launcher/src/components/UI/the-control/ControlAlert.vue +++ b/launcher/src/components/UI/the-control/ControlAlert.vue @@ -162,6 +162,54 @@ + + + + + + + +
{ this.fetchObolCharonAlerts(); }, 120000); + + this.fetchCsm(); + this.csmInterval = setInterval(() => { + this.fetchCsm(); + }, 120000); }, beforeUnmount() { clearInterval(this.polling); if (this.obolInterval) { clearInterval(this.obolInterval); } + if (this.csmInterval) { + clearInterval(this.csmInterval); + } }, created() { this.storageCheck(); @@ -415,6 +474,24 @@ export default { this.alertShowState.push(color); } }, + async fetchCsm() { + try { + const alerts = await ControlService.fetchCsmAlerts(); + + this.processCsm(alerts); + } catch (error) { + console.error("Failed to fetch Obol Charon alerts:", error); + } + }, + processCsm(alerts) { + const criticalAlertNames = alerts.filter((alert) => alert.level === "critical").map((alert) => alert.name); + + const notifictionsNames = alerts.filter((alert) => alert.level === "notification").map((alert) => alert.name); + + this.criticalCsm = criticalAlertNames; + + this.notifCsm = notifictionsNames; + }, async checkSettings() { try { const savedConfig = await ControlService.readConfig(); diff --git a/launcher/src/languages/en.json b/launcher/src/languages/en.json index fb1818aa44..ef7899ceac 100755 --- a/launcher/src/languages/en.json +++ b/launcher/src/languages/en.json @@ -499,7 +499,8 @@ "sync": "Synchronization Error", "taskFail": "Task Failed", "stats": "STATUS: OPEN", - "clientService": "CLIENT / SERVICE" + "clientService": "CLIENT / SERVICE", + "csm": "csm operator monitoring" }, "resyncModal": { "message": "YOUR CLIENT Stores blockchain data ON YOUR SERVER SO YOUR NODE CAN serve the network. this is necessary for your node TO PICK up its ATTESTATION DUTY in STAKING. BY USING THIS OPTION YOU ARE DELETING YOUR CLIENTS's CURRENT BLOCKCHAIN DATA & USE THE SELECTED OPTION AS A SOURCE TO SYNCHRONIZE YOUR NODE ANEW.", diff --git a/launcher/src/store/ControlService.js b/launcher/src/store/ControlService.js index b08e1c1016..c07fab09c5 100755 --- a/launcher/src/store/ControlService.js +++ b/launcher/src/store/ControlService.js @@ -679,6 +679,9 @@ class ControlService extends EventEmitter { async fetchObolCharonAlerts() { return this.promiseIpc.send("fetchObolCharonAlerts"); } + async fetchCsmAlerts() { + return this.promiseIpc.send("fetchCsmAlerts"); + } } if (!instance) { instance = new ControlService(window.electron); diff --git a/launcher/src/store/services.js b/launcher/src/store/services.js index 1045e58b9f..d7d8366331 100755 --- a/launcher/src/store/services.js +++ b/launcher/src/store/services.js @@ -1400,7 +1400,7 @@ export const useServices = defineStore("services", { }, { id: 32, - name: "CMS Monitoring", + name: "CSM Monitoring", service: "LCOMService", displayPluginMenu: false, serviceIsPending: false, @@ -1410,8 +1410,8 @@ export const useServices = defineStore("services", { configPanel: false, category: "service", path: "/lcoms", - icon: "/img/icon/plugin-icons/Other/LCOM.png", - sIcon: "/img/icon/plugin-icons/Other/LCOM-s.png", + icon: "/img/icon/service-icons/Other/LCOM.png", + sIcon: "/img/icon/service-icons/Other/LCOM-s.png", linkUrl: "", docsUrl: "https://github.com/stereum-dev/lido-csm-operator-monitoring-service", headerOption: false, @@ -1441,8 +1441,8 @@ export const useServices = defineStore("services", { configPanel: false, category: "service", path: "/ipfs", - icon: "/img/icon/plugin-icons/Other/IPFS.png", - sIcon: "/img/icon/plugin-icons/Other/IPFS-s.png", + icon: "/img/icon/service-icons/Other/IPFS.png", + sIcon: "/img/icon/service-icons/Other/IPFS-s.png", linkUrl: "", docsUrl: "https://docs.ipfs.tech/", headerOption: false,