diff --git a/infrastructure/ansible/roles/docker/files/docker-daemon.json b/infrastructure/ansible/roles/docker/files/docker-daemon.json index b8ab8c9d..54d65077 100644 --- a/infrastructure/ansible/roles/docker/files/docker-daemon.json +++ b/infrastructure/ansible/roles/docker/files/docker-daemon.json @@ -2,6 +2,7 @@ "log-driver": "json-file", "log-opts": { "max-size": "10m", - "max-file": "3" + "max-file": "3", + "labels-regex": "^.+" } } diff --git a/monitoring/docker-compose.dev.yml b/monitoring/docker-compose.dev.yml index c5138a68..24e952bf 100644 --- a/monitoring/docker-compose.dev.yml +++ b/monitoring/docker-compose.dev.yml @@ -12,3 +12,27 @@ services: - target: 9090 published: 9090 mode: host + + loki: + ports: + - target: 3100 + published: 3100 + mode: host + + loki2: + ports: + - target: 3100 + published: 3101 + mode: host + + loki3: + ports: + - target: 3100 + published: 3102 + mode: host + + minio1: + ports: + - target: 9001 + published: 9001 + mode: host diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index c74cc214..dd2f3da2 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -49,6 +49,8 @@ services: source: kminion-groups_rev1.json - target: /etc/grafana/provisioning/dashboards/applications/kminion-topic_rev1.json source: kminion-topic_rev1.json + - target: /etc/grafana/provisioning/dashboards/containers/logging-universal-dashboard_rev1.json + source: logging-universal-dashboard_rev1.json prometheus: image: prom/prometheus:v2.38.0 @@ -94,6 +96,88 @@ services: - KAFKA_BROKER_LIST=kafka:9092 - KAFKA_COMPRESSION=gzip + loki: + image: grafana/loki:2.7.1 + configs: + - target: /etc/loki/loki-config.yml + source: loki-config.yml + command: -config.file=/etc/loki/loki-config.yml -config.expand-env=true -print-config-stderr + environment: + - NODE_NAME=loki + - MEMBER1=loki2 + - MEMBER2=loki3 + deploy: + labels: + - prometheus-job-service=loki + - prometheus-address=loki:3100 + + loki2: + image: grafana/loki:2.7.1 + configs: + - target: /etc/loki/loki-config.yml + source: loki-config.yml + command: -config.file=/etc/loki/loki-config.yml -config.expand-env=true -print-config-stderr + environment: + - NODE_NAME=loki2 + - MEMBER1=loki1 + - MEMBER2=loki3 + deploy: + labels: + - prometheus-job-service=loki2 + - prometheus-address=loki2:3100 + + loki3: + image: grafana/loki:2.7.1 + configs: + - target: /etc/loki/loki-config.yml + source: loki-config.yml + command: -config.file=/etc/loki/loki-config.yml -config.expand-env=true -print-config-stderr + environment: + - NODE_NAME=loki3 + - MEMBER1=loki2 + - MEMBER2=loki1 + deploy: + labels: + - prometheus-job-service=loki3 + - prometheus-address=loki3:3100 + + promtail: + image: grafana/promtail:2.6.1 + volumes: + - /var/lib/docker/containers:/host/containers + - /var/log:/var/log:ro + configs: + - target: /etc/promtail/promtail-config.yml + source: promtail-config.yml + command: -config.file=/etc/promtail/promtail-config.yml + deploy: + mode: global + + minio1: + image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z + entrypoint: sh + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...1}/data{1...2}' + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + healthcheck: + test: + [ + "CMD", + "curl", + "-f", + "http://localhost:9000/minio/health/live" + ] + interval: 30s + timeout: 20s + retries: 3 + hostname: minio1 + volumes: + - minio_data1:/data1 + - minio_data2:/data2 + deploy: + replicas: 1 + configs: grafana.ini: file: ./grafana/grafana.ini @@ -135,12 +219,29 @@ configs: name: kminion-topic_rev1.json-${kminion_topic_rev1_json_DIGEST:?err} labels: name: grafana + logging-universal-dashboard_rev1.json: + file: ./grafana/dashboards/containers/logging-universal-dashboard_rev1.json + name: logging-universal-dashboard_rev1.json-${logging_universal_dashboard_rev1_json_DIGEST:?err} + labels: + name: grafana prometheus.yml: file: ./prometheus/prometheus.yml name: prometheus.yml-${prometheus_yml_DIGEST:?err} labels: name: prometheus + loki-config.yml: + file: ./loki/loki-config.yml + name: loki-config.yml-${loki_config_yml_DIGEST:?err} + labels: + name: loki + promtail-config.yml: + file: ./promtail/promtail-config.yml + name: promtail-config.yml-${promtail_config_yml_DIGEST:?err} + labels: + name: promtail volumes: prometheus_data: grafana_data: + minio_data1: + minio_data2: diff --git a/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json b/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json new file mode 100644 index 00000000..8d3bf322 --- /dev/null +++ b/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json @@ -0,0 +1,926 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Universal and flexible dashboard for logging", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12611, + "graphTooltip": 0, + "id": 11, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "description": "Total Count of log lines in the specified time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "rgb(31, 255, 7)", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(31, 255, 7)", + "value": null + }, + { + "color": "rgb(31, 255, 7)", + "value": 10 + }, + { + "color": "rgb(31, 255, 7)", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 11, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval]))", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Total Count of logs", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "description": "Total Count: of $searchable_pattern in the specified time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "rgb(222, 15, 43)", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(222, 15, 43)", + "value": null + }, + { + "color": "rgb(222, 15, 43)", + "value": 10 + }, + { + "color": "rgb(222, 15, 43)", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 6, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval]))", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Total Count: of \"$searchable_pattern\"", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "description": "Live logs is a like 'tail -f | grep' in a real time", + "gridPos": { + "h": 22, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 2, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": true, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "{swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\"", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Live logs (filtered by \"$searchable_pattern\")", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 25 + }, + "id": 19, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.0.4", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval])) by (stream)", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Total count of stderr / stdout pie", + "type": "piechart" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 7, + "y": 25 + }, + "id": 20, + "interval": "1m", + "links": [], + "maxDataPoints": "", + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "6.4.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval])) by (swarm_task_name)", + "queryType": "range", + "refId": "A" + } + ], + "title": "Matched word: \"$searchable_pattern\" donut", + "type": "piechart" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "#299c46", + "text": "0" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "#C4162A", + "value": 50 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 25 + }, + "id": 9, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": false + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval])) * 100 / sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval]))", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "\"$searchable_pattern\" Percentage for specified time", + "type": "gauge" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 18, + "interval": "1m", + "links": [], + "maxDataPoints": "", + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval])) by (swarm_task_name)", + "queryType": "range", + "refId": "A" + } + ], + "title": "Matched word: \"$searchable_pattern\" historical", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 10, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(rate(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[30s])) by (swarm_task_name)", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "\"$searchable_pattern\" Rate per Task", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 6, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "{stream=\"stderr\"} stderr" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "{stream=\"stdout\"} stdout" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 7, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval])) by (stream)", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Count of stderr / stdout historical", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 37, + "style": "dark", + "tags": [ + "Loki", + "logging" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "instant_prometheus-kafka-adapter", + "value": "instant_prometheus-kafka-adapter" + }, + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "definition": "label_values({swarm_service_name=~\".+\"}, swarm_service_name)", + "hide": 0, + "includeAll": false, + "label": "Service", + "multi": false, + "name": "service_name", + "options": [], + "query": "label_values({swarm_service_name=~\".+\"}, swarm_service_name)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "definition": "label_values({swarm_service_name=\"$service_name\"}, swarm_task_name)", + "hide": 0, + "includeAll": true, + "label": "Task", + "multi": true, + "name": "task_name", + "options": [], + "query": "label_values({swarm_service_name=\"$service_name\"}, swarm_task_name)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "definition": "label_values({swarm_service_name=\"$service_name\"}, stream)", + "hide": 0, + "includeAll": true, + "label": "Stream", + "multi": false, + "name": "stream", + "options": [], + "query": "label_values({swarm_service_name=\"$service_name\"}, stream)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "error", + "value": "error" + }, + "hide": 0, + "label": "Search (case insensitive)", + "name": "searchable_pattern", + "options": [ + { + "selected": true, + "text": "error", + "value": "error" + } + ], + "query": "error", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Container logs", + "uid": "fRIvzUZMf", + "version": 4, + "weekStart": "" +} diff --git a/monitoring/loki/loki-config-minio.yml b/monitoring/loki/loki-config-minio.yml new file mode 100644 index 00000000..46020dac --- /dev/null +++ b/monitoring/loki/loki-config-minio.yml @@ -0,0 +1,20 @@ +storage_config: + aws: + # Note: use a fully qualified domain name, like localhost. + # full example: http://loki:supersecret@localhost.:9000 + s3: http://:@: + s3forcepathstyle: true + boltdb_shipper: + active_index_directory: /loki/boltdb-shipper-active + cache_location: /loki/boltdb-shipper-cache + shared_store: s3 + +schema_config: + configs: + - from: 2020-07-01 + store: boltdb-shipper + object_store: aws + schema: v11 + index: + prefix: index_ + period: 24h diff --git a/monitoring/loki/loki-config.yml b/monitoring/loki/loki-config.yml new file mode 100644 index 00000000..922f8fba --- /dev/null +++ b/monitoring/loki/loki-config.yml @@ -0,0 +1,55 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +# I think the issue cold be due to the network interface that is being used +# common: +# ring: +# instance_interface_names: +# - eth0 +# - eth1 +# - lo +# instance_interface_names: +# - eth0 +# - eth1 + +memberlist: + node_name: ${NODE_NAME} + join_members: + - ${MEMBER1} + - ${MEMBER2} + +ingester: + wal: + enabled: true + dir: /tmp/wal + +schema_config: + configs: + - from: 2020-07-01 + store: boltdb-shipper + object_store: aws + schema: v11 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + active_index_directory: /loki/boltdb-shipper-active + cache_location: /loki/boltdb-shipper-cache + resync_interval: 30s + shared_store: s3 + aws: + s3: http://minioadmin:minioadmin@minio1.:9000/loki + s3forcepathstyle: true + +compactor: + working_directory: /loki/boltdb-shipper-compactor + shared_store: s3 + +limits_config: + ingestion_rate_mb: 100 + ingestion_burst_size_mb: 150 + max_concurrent_tail_requests: 200 diff --git a/monitoring/package-metadata.json b/monitoring/package-metadata.json index 13e03667..48798832 100644 --- a/monitoring/package-metadata.json +++ b/monitoring/package-metadata.json @@ -4,7 +4,7 @@ "description": "A package for monitoring the platform services", "type": "infrastructure", "version": "0.0.1", - "dependencies": ["message-bus-kafka"], + "dependencies": [], "environmentVariables": { "STATEFUL_NODES": "single", "GF_SECURITY_ADMIN_USER": "admin", diff --git a/monitoring/promtail/promtail-config.yml b/monitoring/promtail/promtail-config.yml new file mode 100644 index 00000000..1cce8fe2 --- /dev/null +++ b/monitoring/promtail/promtail-config.yml @@ -0,0 +1,47 @@ +server: + http_listen_address: 0.0.0.0 + http_listen_port: 9080 + +positions: + filename: /tmp/positions.yaml + +clients: +- url: http://loki:3100/loki/api/v1/push + +scrape_configs: + +- job_name: containers + static_configs: + - targets: + - localhost + labels: + job: containerlogs + __path__: /host/containers/*/*log + + pipeline_stages: + - json: + expressions: + log: log + stream: stream + time: time + tag: attrs.tag + stack_name: attrs."com.docker.stack.namespace" + swarm_service_name: attrs."com.docker.swarm.service.name" + swarm_task_name: attrs."com.docker.swarm.task.name" + swarm_node_id: attrs."com.docker.swarm.node.id" + - regex: + expression: "^/host/containers/(?P.{12}).+/.+-json.log$" + source: filename + - timestamp: + format: RFC3339Nano + source: time + - labels: + stream: + container_id: + tag: + stack_name: + swarm_service_name: + swarm_task_name: + swarm_node_id: + - output: + source: log diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index 401c058d..0fee291e 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -15,10 +15,10 @@ ROOT_PATH="${COMPOSE_FILE_PATH}/.." . "${ROOT_PATH}/utils/log.sh" if [[ "${MODE}" == "dev" ]]; then - log info "Running Message Bus Kafka package in DEV mode" + log info "Running Monitoring package in DEV mode" monitoring_dev_compose_param="-c ${COMPOSE_FILE_PATH}/docker-compose.dev.yml" else - log info "Running Message Bus Kafka package in PROD mode" + log info "Running Monitoring package in PROD mode" monitoring_dev_compose_param="" fi