Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADD: Obol Alerts #2018

Merged
merged 15 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

130 changes: 119 additions & 11 deletions launcher/src/backend/Monitoring.js
Original file line number Diff line number Diff line change
Expand Up @@ -436,11 +436,11 @@ export class Monitoring {
var query =
rpc_method.trim().indexOf("{") < 0
? JSON.stringify({
jsonrpc: "2.0",
method: rpc_method.trim(),
params: rpc_params,
id: 1,
})
jsonrpc: "2.0",
method: rpc_method.trim(),
params: rpc_params,
id: 1,
})
: rpc_method;

// Define default response
Expand Down Expand Up @@ -2623,8 +2623,8 @@ export class Monitoring {
const addr_type = Array.isArray(addr)
? "arr"
: typeof addr === "string" && ["public", "local"].includes(addr)
? "str"
: "invalid";
? "str"
: "invalid";
addr = addr_type == "str" ? addr.toLowerCase().trim() : addr;
if (addr_type == "invalid") {
return {
Expand Down Expand Up @@ -2712,7 +2712,7 @@ export class Monitoring {
for (let i = 0; i < serviceInfos.length; i++) {
const hashDependencies =
serviceInfos[i].config.dependencies.consensusClients.length ||
serviceInfos[i].config.dependencies.executionClients.length
serviceInfos[i].config.dependencies.executionClients.length
? "yes"
: "no";
easyInfos.push({
Expand Down Expand Up @@ -3249,9 +3249,8 @@ rm -rf diskoutput
const parsedJson = JSON.parse(stdoutJson);

let message =
`${parsedJson?.message || ""}${parsedJson?.message && parsedJson?.stacktraces ? "\n" : ""}${
parsedJson?.stacktraces || ""
}`.trim() || output;
`${parsedJson?.message || ""}${parsedJson?.message && parsedJson?.stacktraces ? "\n" : ""}${parsedJson?.stacktraces || ""
}`.trim() || output;

return {
pubkey: pubkey,
Expand Down Expand Up @@ -3341,4 +3340,113 @@ rm -rf diskoutput
];
}
}
/**
* Will gather metrics from Prometheus and evaluate.
* If thresholds are exceeded, an alert will be generated and added to the retuned array.
* @returns {Object[]} Array of alerts e.g. [{name: "Cluster in Unknown Status", level: "warning"}, {name: "Beacon Node Down", level: "critical"}]
*/
async fetchObolCharonAlerts() {
try {
const serviceInfos = await this.getServiceInfos("CharonService");
if (serviceInfos.length < 1) {
return [];
}
const queries = {
app_monitoring_readyz: "max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer)",
cluster_missed_attestations: "max(increase(core_tracker_failed_duties_total[10m])) by (cluster_hash, cluster_name)",
cluster_failure_rate: "floor(100 * (max(increase(core_tracker_success_duties_total[15m])) by (cluster_hash, cluster_name) / max(increase(core_tracker_expect_duties_total[15m])) by (cluster_hash, cluster_name)))",
percentage_failed_sync_message_duty: "(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer)\n) \n/ \n(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n + \n sum(increase(core_bcast_broadcast_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n)",
connected_relays: "group (p2p_relay_connections) by (cluster_peer)",
peer_ping_latency: "histogram_quantile(0.90, sum(rate(p2p_ping_latency_secs_bucket[2m])) by (le,peer))",
}

const queryPromises = Object.entries(queries).map(([key, query]) => {
return this.queryPrometheus(encodeURIComponent(query)).then(result => ({ key, result }));
});

const results = await Promise.all(queryPromises);

let alerts = results.map((metric) => {
if (metric.result.status != "success") {
return;
}
if (metric.key === "peer_ping_latency") {
let value = Math.max(...metric.result.data.result.map((r) => r.value[1]));
return this.parseObolCharonAlerts(metric.key, value);
}
let value = metric.result.data.result[0].value[1];
return this.parseObolCharonAlerts(metric.key, value);
}).filter((alert) => alert);

return alerts;

} catch (error) {
log.error("Fetching Obol Charon Alerts Failed:\n" + error);
return []
}
}

parseObolCharonAlerts(key, value) {
value = 0
//app_monitoring_readyz
if (key === "app_monitoring_readyz") {
switch (value) {
case 0:
return {
name: "Cluster in Unknown Status",
level: "warning"
};
case 2:
return {
name: "Beacon Node Down",
level: "critical"
};
case 4:
return {
name: "Cluster Insufficient Peers",
level: "warning"
};
case 6:
return {
name: "Cluster Missing Validators",
level: "critical"
};
case 7:
return {
name: "Beacon Node Zero Peers",
level: "critical"
};
}
}
if (key === "cluster_missed_attestations" && value > 0) {
return {
name: "Cluster Missed Attestations",
level: "critical"
};
}
if (key === "cluster_failure_rate" && value < 95) {
return {
name: "Cluster Failure Rate",
level: "critical"
};
}
if (key === "percentage_failed_sync_message_duty" && value > 0.1) {
return {
name: "Failed Sync Msg Duty",
level: "critical"
};
}
if (key === "connected_relays" && value < 1) {
return {
name: "Num. Connected Relays",
level: "warning"
};
}
if (key === "peer_ping_latency" && value > 0.4) {
return {
name: "Peer Ping Latency",
level: "warning"
};
}
}
}
7 changes: 5 additions & 2 deletions launcher/src/backend/SSHService.js
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,10 @@ export class SSHService {
* @param {Client} [conn]
* @returns `void`
*/
async uploadFileSSH(localPath, remotePath, conn = this.getConnectionFromPool()) {
async uploadFileSSH(localPath, remotePath, conn) {
if (!conn) {
conn = await this.getConnectionFromPool();
}
return new Promise((resolve, reject) => {
const readStream = fs.createReadStream(localPath);
readStream.on("error", reject);
Expand Down Expand Up @@ -621,7 +624,7 @@ export class SSHService {
if (item.isDirectory()) {
await this.uploadDirectorySSH(localFilePath, remoteFilePath, conn);
} else {
await this.uploadFileSSH(localFilePath, remoteFilePath);
await this.uploadFileSSH(localFilePath, remoteFilePath, conn);
}
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion launcher/src/backend/ValidatorAccountManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,7 @@ export class ValidatorAccountManager {
this.nodeConnection.sshService.exec(`rm -rf ${dataDir}`);
const result = await this.nodeConnection.sshService.uploadDirectorySSH(path.normalize(localPath), dataDir);
if (result) {
log.info("Obol Backup downloaded from: ", localPath);
log.info("Obol Backup uploaded from: ", localPath);
}
} catch (err) {
log.error("Error uploading Obol Backup: ", err);
Expand Down
4 changes: 4 additions & 0 deletions launcher/src/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,10 @@ ipcMain.handle("readGasConfigFile", async (event, args) => {
return await tekuGasLimitConfig.readGasConfigFile(args);
});

ipcMain.handle("fetchObolCharonAlerts", async () => {
return await monitoring.fetchObolCharonAlerts();
});

// Scheme must be registered before the app is ready
protocol.registerSchemesAsPrivileged([{ scheme: "app", privileges: { secure: true, standard: true } }]);

Expand Down
63 changes: 13 additions & 50 deletions launcher/src/components/UI/node-page/NodeScreen.vue
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,16 @@
<SidebarSection />
</div>
<div class="col-start-2 col-end-17 w-full h-full">
<NodeSection
@open-expert="openExpertModal"
@open-log="openLogPage"
@export-setup="exportSetup"
/>
<NodeSection @open-expert="openExpertModal" @open-log="openLogPage" @export-setup="exportSetup" />
</div>
<div class="col-start-17 col-end-21 ml-1 grid grid-cols-2 grid-rows-9">
<NetworkStatus />
<ServiceSection @open-expert="openExpertModal" @open-logs="openLogPage" />
</div>
<div class="col-start-21 col-end-25 px-1 flex flex-col justify-between">
<div
class="h-[60px] self-center w-full flex flex-col justify-center items-center"
>
<div class="h-[60px] self-center w-full flex flex-col justify-center items-center">
<button
class="w-full h-[34px] rounded-full bg-[#264744] hover:bg-[#325e5a] px-2 py-1 text-gray-200 active:scale-95 shadow-md shadow-zinc-800 active:shadow-none transition-all duration-200 ease-in-out uppercase flex justify-center items-center"
class="info-toggle-btn w-full h-[34px] rounded-full bg-[#264744] hover:bg-[#325e5a] px-2 py-1 text-gray-200 active:scale-95 shadow-md shadow-zinc-800 active:shadow-none transition-all duration-200 ease-in-out uppercase flex justify-center items-center"
@click="alarmToggle"
@mouseenter="
footerStore.cursorLocation = nodeStore.infoAlarm
Expand All @@ -30,11 +24,7 @@
"
@mouseleave="footerStore.cursorLocation = ''"
>
<img
class="w-8"
src="/img/icon/node-page-icons/access-tutorial-icon.png"
alt="information"
/>
<img class="w-8" src="/img/icon/node-page-icons/access-tutorial-icon.png" alt="information" />
</button>
</div>
<AlertSection :info-aralm="nodeStore.infoAlarm" />
Expand All @@ -47,11 +37,7 @@
@export-all-log="updateAndExportAllLogs"
@export-customized-logs="updateAndExportAllLogs"
/>
<ExpertWindow
v-if="isExpertModeOpen"
:item="expertModeClient"
@hide-modal="closeExpertMode"
/>
<ExpertWindow v-if="isExpertModeOpen" :item="expertModeClient" @hide-modal="closeExpertMode" />
</div>

<!-- End Node main layout -->
Expand Down Expand Up @@ -150,8 +136,6 @@ onUnmounted(() => {

//************* Methods *************



//get all configs and services
const nodeSetupsPrepration = () => {
setupStore.allSetups.forEach((s) => (s.isActive = false));
Expand Down Expand Up @@ -192,10 +176,7 @@ const checkForListingKeys = async () => {
serviceStore.installedServices &&
serviceStore.installedServices.length > 0 &&
serviceStore.installedServices.some(
(s) =>
s.category === "validator" &&
s.state === "running" &&
(!s.config.keys || !s.config.keys.length > 0)
(s) => s.category === "validator" && s.state === "running" && (!s.config.keys || !s.config.keys.length > 0)
)
) {
clearInterval(pollingListingKeys);
Expand All @@ -209,11 +190,7 @@ const updateConnectionStats = async () => {
controlStore.ipAddress = stats.ipAddress;
};
const updateServiceLogs = async () => {
if (
serviceStore.installedServices &&
serviceStore.installedServices.length > 0 &&
headerStore.refresh
) {
if (serviceStore.installedServices && serviceStore.installedServices.length > 0 && headerStore.refresh) {
const data = await ControlService.getServiceLogs({ logs_tail: 150 });
nodeStore.serviceLogs = data;
}
Expand All @@ -228,13 +205,9 @@ const updateAndExportAllLogs = async (client) => {
until: nodeStore.untilDateParsDays,
});

const fileName = `${client.name}_${
nodeStore.isExportCustomizedDateLoading ? "customized" : "all"
}_logs.txt`;
const fileName = `${client.name}_${nodeStore.isExportCustomizedDateLoading ? "customized" : "all"}_logs.txt`;
const data = [...nodeStore.allLogsForExp.logs].reverse();
const lineByLine = data
.map((line, index) => `#${data.length - index}: ${line}`)
.join("\n\n");
const lineByLine = data.map((line, index) => `#${data.length - index}: ${line}`).join("\n\n");
const blob = new Blob([lineByLine], { type: "text/plain;charset=utf-8" });
saveAs(blob, fileName);

Expand All @@ -246,11 +219,7 @@ const updateAndExportAllLogs = async (client) => {

const updateServerVitals = async () => {
try {
if (
serviceStore.installedServices &&
serviceStore.installedServices.length > 0 &&
headerStore.refresh
) {
if (serviceStore.installedServices && serviceStore.installedServices.length > 0 && headerStore.refresh) {
const data = await ControlService.getServerVitals();
controlStore.cpu = data.cpu;
controlStore.availDisk = data.availDisk;
Expand Down Expand Up @@ -286,18 +255,12 @@ const exportLogs = async (client) => {
(service) => service.config?.serviceID === client.config?.serviceID
);

const fileName = nodeStore.exportLogs
? `${client.name}_150_logs.txt`
: `${client.name}_all_logs.txt`;
const fileName = nodeStore.exportLogs ? `${client.name}_150_logs.txt` : `${client.name}_all_logs.txt`;

// Select the data based on the condition
const data = nodeStore.exportLogs
? currentService.logs.slice(-150).reverse()
: currentService.logs.reverse();
const data = nodeStore.exportLogs ? currentService.logs.slice(-150).reverse() : currentService.logs.reverse();

const lineByLine = data
.map((line, index) => `#${data.length - index}: ${line}`)
.join("\n\n");
const lineByLine = data.map((line, index) => `#${data.length - index}: ${line}`).join("\n\n");
const blob = new Blob([lineByLine], { type: "text/plain;charset=utf-8" });
saveAs(blob, fileName);
};
Expand Down
Loading