From df97c36146da2bfa23c9780eddb8791635003fa0 Mon Sep 17 00:00:00 2001 From: Sohom Bhattacharjee Date: Tue, 2 Jul 2024 11:22:14 +0530 Subject: [PATCH] more readme --- .vscode/settings.json | 6 +++ README.md | 97 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 86 insertions(+), 17 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..8fc1bed --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "cSpell.ignoreWords": [ + "alertmanager", + "alertpipeline" + ] +} diff --git a/README.md b/README.md index d453dd6..39cb7b7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,40 @@ The API is simple and extensible enough so as to enable users to extend the fram We will refer to this as the `tam` -## Design +## Quick Start + +Over here, we will quickly set up the tam on a docker-compose and fire a test-event using curl to see how it works.
+Once we have a basic example running, we can then dive into the details. + +_Note:_ This assumes that you have docker and docker-compose installed. Furthermore, you need a slack webhook url that is configured to send data to a channel. + +1. Get a Slack Webhook and copy the secret (this is the part after the `https://hooks.slack.com/services/` in the webhook-url) +2. Put this secret in a (`alertmanager/.env` directory) as follows + +``` +WEBHOOK_SECRET=secret we copied in step 1 +``` + +3. Run `make docker-build` +4. Run `make sed` +5. Run `docker compose up -d` +6. Send the basicWebhookPayload.json to the tam using curl + +``` +curl -v -H "Content-Type: application/json" -X POST localhost:8081/webhook -d @basicWebhookPayload.json +``` + +**NOTE** If everthing is configured correctly, then you should see a message in the channel that you have configured. If not, please look at the logs. The tam in docker-compose has debug logs enabled which are quite verbose. + +Sample output + +``` +alert: NOOP_ALERT +action: SendToSlack +result of ENRICHMENT_STEP_1 enrichment(s): ARG1,ARG2 +``` + +# Design The tam is a simple webhook server. @@ -27,7 +60,7 @@ Each alertpipeline is defined by - A list of Enrichments - A list of Actions. -For example, a typlcal config would look like this +For example, a typical config would look like this ``` alert_pipelines: @@ -42,6 +75,33 @@ alert_pipelines: action_args: "url" ``` +We can use the `alertmanager` to generate a sample config. We can redirect this output to a file and then modify it to our needs. + +``` +$ ./alertmanager config generate-template +``` + +``` +alert_pipelines: + - alert_name: NOOP_ALERT + enrichments: + - step_name: ENRICHMENT_STEP_1 + enrichment_name: NOOP_ENRICHMENT + enrichment_args: ARG1,ARG2 + actions: + - step_name: ACTION_STEP_1 + action_name: NOOP_ACTION + action_args: ARG1,ARG2 +``` + +We can use the in-built config-validator to check if the config-file is up-to-spec or not + +``` +$ ./alertmanager config validate --config-file /path/to/file +``` + +The list of available [enrichments](enrichment/README.md) and [actions](action/README.md) are available in the respective docs. + ## How does the TAM work ? The tam accepts a JSON payload in the following format @@ -49,25 +109,24 @@ The tam accepts a JSON payload in the following format ``` { "version": "4", - "groupKey": , // key identifying the group of alerts (e.g. to deduplicate) - "truncatedAlerts": , // how many alerts have been truncated due to "max_alerts" + "groupKey": , // key identifying the group of alerts (e.g. to deduplicate) + "truncatedAlerts": , // how many alerts have been truncated due to "max_alerts" "status": "", "receiver": , "groupLabels": , "commonLabels": , "commonAnnotations": , - "externalURL": , // backlink to the Alertmanager. + "externalURL": , // backlink to the Alertmanager. "alerts": [ - { - "status": "", - "labels": , - "annotations": , - "startsAt": "", - "endsAt": "", - "generatorURL": , // identifies the entity that caused the alert - "fingerprint": // fingerprint to identify the alert - }, - ... + { + "status": "", + "labels": , + "annotations": , + "startsAt": "", + "endsAt": "", + "generatorURL": , // identifies the entity that caused the alert + "fingerprint": // fingerprint to identify the alert + } ] } ``` @@ -77,6 +136,7 @@ note: This is detailed in the prometheus [webhook receiver docs](https://prometh The alerts object is a list that can contain multiple `alert`. Each of them are of the following format ``` + { "annotations": { "description": "Pod customer is restarting 2.11 times / 10 minutes.", @@ -101,6 +161,7 @@ The alerts object is a list that can contain multiple `alert`. Each of them are "startsAt": "2022-03-02T07:31:57.339Z", "status": "firing" } + ``` The tam uses the `labels.alertname` as a primary identifier to identify alerts and identify configured pipelines for said alerts. Thus, the above configured pipeline for `KubePodCrashLooping` would match this alert and then execute the enrichments and then the Actions. @@ -111,13 +172,15 @@ While the Enrichments and Actions can be built by the user using a certain frame [Actions](./action/README.md) and [Enrichments](./enrichment/README.md) live in their own directories. There are some sample alerts and enrichments pre-built for ease of use. -## SETUP +## SETUP on k8s (kind) ``` kind setup cluster helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo update -helm install prom-stack prometheus-community/kube-prometheus-stack -f values.yml +make sed +helm install prom-stack prometheus-community/kube-prometheus-stack -f deployment/kube-prometheus-stack.yml +kubectl apply -f deployment/toy_alert_manager.yml ```