From ce83e286227d5578d021be2f0496ff4b7fd2f14d Mon Sep 17 00:00:00 2001 From: modridi Date: Thu, 5 Oct 2023 17:02:41 +0200 Subject: [PATCH] feat: monitor Thanos components --- .../thanos/templates/compactor-dashboard.yaml | 2192 +++++++++++++++++ locals.tf | 7 +- 2 files changed, 2198 insertions(+), 1 deletion(-) create mode 100644 charts/thanos/templates/compactor-dashboard.yaml diff --git a/charts/thanos/templates/compactor-dashboard.yaml b/charts/thanos/templates/compactor-dashboard.yaml new file mode 100644 index 00000000..a6740a44 --- /dev/null +++ b/charts/thanos/templates/compactor-dashboard.yaml @@ -0,0 +1,2192 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + grafana_dashboard: "1" + name: grafana-dashboard +data: + thanos-compactor.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 28, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Group Compaction", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows rate of execution for compactions against blocks that are stored in the bucket by compaction resolution.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job, resolution) (rate(thanos_compact_group_compactions_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "compaction {{job}} {{resolution}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows ratio of errors compared to the total number of executed compactions against blocks that are stored in the bucket.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~\"$job\"}[$interval])) / sum by (job) (rate(thanos_compact_group_compactions_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 21, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Downsample", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows rate of execution for downsampling against blocks that are stored in the bucket by compaction resolution.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job, resolution) (rate(thanos_compact_downsample_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "downsample {{job}} {{resolution}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Shows ratio of errors compared to the total number of executed downsampling against blocks that are stored in the bucket.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum by (job) (rate(thanos_compact_downsample_failures_total{job=~\"$job\"}[$interval])) / sum by (job) (rate(thanos_compact_downsample_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "range": true, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 22, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Garbage Collection", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows rate of execution for removals of blocks if their data is available as part of a block with a higher compaction level.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_compact_garbage_collection_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "garbage collection {{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows ratio of errors compared to the total number of executed garbage collections.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 17 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_compact_garbage_collection_failures_total{job=~\"$job\"}[$interval])) / sum by (job) (rate(thanos_compact_garbage_collection_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows how long has it taken to execute garbage collection in quantiles.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 17 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_compact_garbage_collection_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_compact_garbage_collection_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_compact_garbage_collection_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 23, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Blocks deletion", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows deletion rate of blocks already marked for deletion.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 25 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_compact_blocks_cleaned_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Blocks cleanup {{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Deletion Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows deletion failures rate of blocks already marked for deletion.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 25 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_compact_block_cleanup_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Blocks cleanup failures {{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Deletion Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows rate at which blocks are marked for deletion (from GC and retention policy).", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 25 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_compact_blocks_marked_for_deletion_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Blocks marked {{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Marking Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 24, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Sync Meta", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows rate of execution for all meta files from blocks in the bucket into the memory.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_blocks_meta_syncs_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "sync {{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows ratio of errors compared to the total number of executed meta file sync.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 33 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_blocks_meta_sync_failures_total{job=~\"$job\"}[$interval])) / sum by (job) (rate(thanos_blocks_meta_syncs_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows how long has it taken to execute meta file sync, in quantiles.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 33 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_blocks_meta_sync_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_blocks_meta_sync_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_blocks_meta_sync_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 25, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Object Store Operations", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows rate of execution for operations against the bucket.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{operation}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows ratio of errors compared to the total number of executed operations against the bucket.", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 41 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\"}[$interval])) / sum by (job) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Shows how long has it taken to execute operations against the bucket, in quantiles.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 41 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 26, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 49 + }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_memstats_alloc_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc all {{instance}}", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_memstats_heap_alloc_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc heap {{instance}}", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc rate all {{instance}}", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(go_memstats_heap_alloc_bytes{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc rate heap {{instance}}", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_memstats_stack_inuse_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inuse heap {{instance}}", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_memstats_heap_inuse_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inuse stack {{instance}}", + "step": 10 + } + ], + "thresholds": [], + "title": "Memory Used", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 49 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_goroutines{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "step": 10 + } + ], + "thresholds": [], + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 49 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_gc_duration_seconds{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{quantile}} {{instance}}", + "step": 10 + } + ], + "thresholds": [], + "title": "GC Time Quantiles", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ] + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P5DCFC7561CCDE821" + }, + "refId": "A" + } + ], + "title": "Resources", + "type": "row" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "thanos-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "selected": false, + "text": "5m", + "value": "5m" + }, + "hide": 0, + "label": "interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + } + ], + "query": "5m,10m,30m,1h,6h,12h", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "current": { + "selected": true, + "text": "thanos-compactor", + "value": "thanos-compactor" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(up{job=~\".*thanos-compact.*\"}, job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Thanos / Compact", + "uid": "651943d05a8123e32867b4673963f42b", + "version": 1, + "weekStart": "" + } \ No newline at end of file diff --git a/locals.tf b/locals.tf index 68762134..ec628308 100644 --- a/locals.tf +++ b/locals.tf @@ -3,7 +3,12 @@ locals { # Possible values available here -> https://github.com/bitnami/charts/tree/master/bitnami/thanos/ helm_values = [{ thanos = { - + metrics = { + enabled = true + serviceMonitor = { + enabled = true + } + } storegateway = { enabled = true persistence = {