diff --git a/docker/batch-test.env b/docker/batch-test.env index c565a1148..abe27616b 100644 --- a/docker/batch-test.env +++ b/docker/batch-test.env @@ -115,5 +115,6 @@ CRON_WEEKLY_POSTGRESQL_BACKUP=True # selftest runs against public domain (example.(nl|com)) which will never work in the test environment CRON_15MIN_RUN_TESTS=False +CRON_15MIN_RUN_TESTS_BATCH=False INTERNETNL_BRANDING=True diff --git a/docker/cron/periodic/15min/tests-batch.py b/docker/cron/periodic/15min/tests-batch.py new file mode 100755 index 000000000..a08fbbea2 --- /dev/null +++ b/docker/cron/periodic/15min/tests-batch.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 + +# run tests on example domains and write metrics to prometheus textfile + +# for iterative development +# docker run -ti -e INTERNETNL_DOMAINNAME=internet.nl -v $PWD/docker/cron/periodic/15min/tests.py:/tests.py \ +# ghcr.io/internetstandards/cron:latest /tests.py --debug + +import sys +import os +import time +from prometheus_client import REGISTRY, Gauge, generate_latest +import prometheus_client +import logging +import requests +import datetime + +log = logging.getLogger(__name__) + +DEBUG = "--debug" in sys.argv + +# file to write metrics to https://github.com/prometheus/node_exporter?tab=readme-ov-file#textfile-collector +OUTPUT_TEXTFILE = "/prometheus-textfile-directory/tests-batch.prom" + + +BATCH_REQUEST_TIMEOUT = 60 * 5 +REQUEST_TIMEOUT = 30 + +REQUEST_TYPES = ["web", "mail"] + +IPV4_IP_APP_INTERNAL = os.environ.get("IPV4_IP_APP_INTERNAL") +INTERNETNL_DOMAINNAME = os.environ.get("INTERNETNL_DOMAINNAME") +# talk directly to the internal app container as the webserver might +# have access restrictions in place +URL_BASE = f"http://{IPV4_IP_APP_INTERNAL}:8080" +HEADERS = {"Host": INTERNETNL_DOMAINNAME} + +TEST_DOMAINS = { + # domain's to use in website tests + "web": [ + "internet.nl", + "example.nl", + "example.com", + "internetsociety.org", + "ripe.net", + "surf.nl", + "ecp.nl", + "forumstandaardisatie.nl", + "minez.nl", + ], + # domain's to use in mail tests + "mail": [ + "internetsociety.org", + "ripe.net", + "surf.nl", + "ecp.nl", + # these are currently really slow and will probably improve when + # we switch to sslyze, for now disable these in monitoring + # "internet.nl", + # "forumstandaardisatie.nl", + # "minez.nl", + ], +} + +METRIC_BATCH_RUN = Gauge("tests_batch_run_total", "Batch requests that have been run.", ["request_type"]) +METRIC_BATCH_SUCCESS = Gauge("tests_batch_success_total", "Batch requests runs that succeeded.", ["request_type"]) +METRIC_BATCH_FAILURE = Gauge("tests_batch_failure_total", "Batch requests runs that failed.", ["request_type"]) +METRIC_BATCH_TIMEOUT = Gauge("tests_batch_timeout_total", "Batch requests that ran into timeout.", ["request_type"]) +METRIC_BATCH_RUNTIME = Gauge( + "tests_batch_runtime_seconds", "Amount of time batch request ran before done.", ["request_type"] +) +METRIC_BATCH_STAGE_RUNTIME = Gauge( + "tests_batch_stage_runtime_seconds", "Amount of time each stage in batch request took.", ["request_type", "stage"] +) + +METRIC_BATCH_DOMAIN = Gauge("tests_batch_domain_total", "Amount of domains batch request.", ["request_type", "domain"]) + +METRIC_BATCH_DOMAIN_SUCCESS = Gauge( + "tests_batch_domain_success", + "Amount of successful domain tests in batch request per domain.", + ["request_type", "domain"], +) +METRIC_BATCH_DOMAIN_SCORE = Gauge( + "tests_batch_domain_score", "Per domain test scores for batch request.", ["request_type", "domain"] +) + +METRIC_BATCH_DOMAIN_CATEGORIES = Gauge( + "tests_batch_domain_categories", + "Domain verdict and status per category.", + ["request_type", "domain", "category", "verdict", "status"], +) + +METRIC_BATCH_DOMAIN_TESTS = Gauge( + "tests_batch_domain_tests", + "Domain verdict and status per test.", + ["request_type", "domain", "test", "verdict", "status"], +) + + +def wait_for_request_status(url: str, expected_status: list[str], timeout: int = 10, interval: int = 1, auth=None): + """Poll url and parse JSON for request.status, return if value matches expected status or + fail when timeout expires.""" + + log.debug("waiting for status: %s", expected_status) + + max_tries = int(timeout / interval) + + tries = 0 + status = "n/a" + while tries < max_tries: + status_response = requests.get(url, auth=auth, headers=HEADERS) + status_response.raise_for_status() + + log.debug(status_response.text) + status_data = status_response.json() + status: str = status_data["request"]["status"] + if status in expected_status: + break + time.sleep(interval) + tries += 1 + else: + raise TimeoutError(f"request status never reached '{str(expected_status)}' states, current state: '{status}'") + + +def run_test_batch(request_type: str, domains: list[str]): + request_data = {"type": "web", "domains": domains, "name": f"periodic test {str(datetime.datetime.now())}"} + + auth = ("periodic_tests", "periodic_tests") + api_url: str = URL_BASE + "/api/batch/v2/" + + test_start = int(time.time()) + + # start batch request + register_response = requests.post(api_url + "requests", json=request_data, auth=auth, headers=HEADERS) + register_response.raise_for_status() + log.debug(register_response.text) + + # get test_id from register data + register_data = register_response.json() + test_id: str = register_data["request"]["request_id"] + + # wait for batch tests to start + wait_for_request_status( + api_url + "requests/" + test_id, ["running", "generating", "done"], timeout=BATCH_REQUEST_TIMEOUT, auth=auth + ) + registering_time = int(time.time()) - test_start + METRIC_BATCH_STAGE_RUNTIME.labels(request_type, "registering").set(registering_time) + + # wait for batch tests to complete and report to be generated + wait_for_request_status( + api_url + "requests/" + test_id, ["generating", "done"], timeout=BATCH_REQUEST_TIMEOUT, auth=auth + ) + running_time = int(time.time()) - test_start - registering_time + METRIC_BATCH_STAGE_RUNTIME.labels(request_type, "running").set(running_time) + + # wait for report generation and batch to be done + wait_for_request_status(api_url + "requests/" + test_id, ["done"], timeout=BATCH_REQUEST_TIMEOUT, auth=auth) + generating_time = int(time.time()) - test_start - running_time + METRIC_BATCH_STAGE_RUNTIME.labels(request_type, "generating").set(generating_time) + + # get batch results + results_response = requests.get(api_url + "requests/" + test_id + "/results", auth=auth, headers=HEADERS) + results_response.raise_for_status() + log.debug(results_response.text) + + results_response_data = results_response.json() + + METRIC_BATCH_RUNTIME.labels(request_type).set(int(time.time() - test_start)) + METRIC_BATCH_SUCCESS.labels(request_type).set(1 if results_response_data["request"]["status"] == "done" else 0) + + for domain, results in results_response_data["domains"].items(): + METRIC_BATCH_DOMAIN.labels(request_type, domain).set(1) + METRIC_BATCH_DOMAIN_SUCCESS.labels(request_type, domain).set(1 if results["status"] == "ok" else 0) + METRIC_BATCH_DOMAIN_SCORE.labels(request_type, domain).set(results["scoring"]["percentage"]) + + for category, result in results["results"]["categories"].items(): + METRIC_BATCH_DOMAIN_CATEGORIES.labels( + request_type, domain, category, result["verdict"], result["status"] + ).inc(1) + + for test, result in results["results"]["tests"].items(): + METRIC_BATCH_DOMAIN_TESTS.labels(request_type, domain, test, result["verdict"], result["status"]).inc(1) + + +def run_batch_tests(): + for request_type in REQUEST_TYPES: + domains = TEST_DOMAINS[request_type] + log.info(f"testing: {request_type} {domains}") + + METRIC_BATCH_RUN.labels(request_type).set(1) + METRIC_BATCH_FAILURE.labels(request_type).set(0) + METRIC_BATCH_TIMEOUT.labels(request_type).set(0) + METRIC_BATCH_SUCCESS.labels(request_type).set(0) + try: + run_test_batch(request_type, domains) + + except Exception: + log.exception("Error during test") + METRIC_BATCH_FAILURE.labels(request_type).set(1) + + +def main(): + logging.basicConfig(level=logging.DEBUG if DEBUG else logging.ERROR) + + # disable internal metrics + REGISTRY.unregister(prometheus_client.GC_COLLECTOR) + REGISTRY.unregister(prometheus_client.PLATFORM_COLLECTOR) + REGISTRY.unregister(prometheus_client.PROCESS_COLLECTOR) + + # run test probes against domains and collect metrics + run_batch_tests() + + # write metrics to stdout or file in prometheus textfile format + if DEBUG: + print(generate_latest(REGISTRY).decode()) + else: + with open(OUTPUT_TEXTFILE, "w") as f: + f.write(generate_latest(REGISTRY).decode()) + + +if __name__ == "__main__" and os.environ.get("CRON_15MIN_RUN_TESTS_BATCH", "False") == "True": + main() diff --git a/docker/defaults.env b/docker/defaults.env index 70194b467..9effc2316 100644 --- a/docker/defaults.env +++ b/docker/defaults.env @@ -240,6 +240,9 @@ CRON_WEEKLY_POSTGRESQL_BACKUP=False # enable running tests every 15 minutes for metrics collection CRON_15MIN_RUN_TESTS=True +# enable running batch tests every 15 minutes for metrics collection, enable in local.env for batch deployments +CRON_15MIN_RUN_TESTS_BATCH=False + # enables internet.nl specific content (eg: contact information, faq, security.txt), only enable for internet.nl # instances. For customization see: documentation/Customize.md INTERNETNL_BRANDING=False diff --git a/docker/develop.env b/docker/develop.env index 6d46109da..feb7f01d8 100644 --- a/docker/develop.env +++ b/docker/develop.env @@ -64,6 +64,7 @@ LOGGING_DRIVER=json-file CRON_DAILY_POSTGRESQL_BACKUP=False CRON_WEEKLY_POSTGRESQL_BACKUP=False CRON_15MIN_RUN_TESTS=False +CRON_15MIN_RUN_TESTS_BATCH=False INTERNETNL_BRANDING=False diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 5d8114e32..8ca3c4ab1 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -681,6 +681,7 @@ services: - DB_PASSWORD=password - CRON_DAILY_POSTGRESQL_BACKUP - CRON_WEEKLY_POSTGRESQL_BACKUP + - CRON_15MIN_RUN_TESTS_BATCH - IPV4_IP_APP_INTERNAL - INTERNETNL_DOMAINNAME - INTERNETNL_CACHE_TTL @@ -708,6 +709,7 @@ services: - postgres-backups:/var/lib/postgresql/backups - nginx-logs-exporter:/var/log/nginx/prometheus-nginxlog-exporter/ - prometheus-textfile-directory:/prometheus-textfile-directory + # - ./cron/periodic:/etc/periodic healthcheck: test: ["CMD", "pgrep", "crond"] diff --git a/docker/grafana/dashboards/periodic-tests-batch.json b/docker/grafana/dashboards/periodic-tests-batch.json new file mode 100644 index 000000000..8f432ee09 --- /dev/null +++ b/docker/grafana/dashboards/periodic-tests-batch.json @@ -0,0 +1,1103 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Test runs are triggered every 15 minutes, runtime, probe success/failure and score are measured", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 13, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 8, + "title": "Statistics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 40 + }, + { + "color": "red", + "value": 60 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile( 0.50, tests_batch_runtime_seconds{request_type=~\"$request_type\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Batch request median runtime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(tests_batch_success_total{request_type=~\"$request_type\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Batch requests success", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(tests_batch_failure_total{request_type=~\"$request_type\"}) ", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Batch request failure", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(tests_batch_timeout_total{request_type=~\"$request_type\"}) ", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Batch request timeout", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 17, + "y": 1 + }, + "id": 17, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(tests_batch_domain_total{request_type=~\"$request_type\", domain=~\"$domain\"}) ", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Batch request domains", + "type": "stat" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 3, + "title": "Runtimes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "tests_batch_runtime_seconds{request_type=~\"$request_type\"}", + "legendFormat": "{{test}}", + "range": true, + "refId": "A" + } + ], + "title": "Batch request runtime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "tests_batch_stage_runtime_seconds{request_type=~\"$request_type\"}", + "legendFormat": "{{test}} - {{stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Batch request stage runtimes", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 4, + "panels": [], + "title": "Scores", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 100, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "tests_batch_domain_score{request_type=~\"$request_type\", domain=~\"$domain\"}", + "legendFormat": "{{test}} - {{domain}}", + "range": true, + "refId": "A" + } + ], + "title": "Test scores per domain", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(tests_batch_domain_total{request_type=~\"$request_type\", domain=~\"$domain\"}) by (test, domain) - sum(tests_batch_domain_success{request_type=~\"$request_type\", domain=~\"$domain\"}) by (test, domain)", + "legendFormat": "{{test}} - {{domain}}", + "range": true, + "refId": "A" + } + ], + "title": "Failed tests per domain", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 6, + "panels": [], + "title": "Failures", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "tests_batch_failure_total{request_type=~\"$request_type\"}", + "legendFormat": "{{test}}", + "range": true, + "refId": "A" + } + ], + "title": "Batch request failures", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "tests_batch_timeout_total{request_type=~\"$request_type\"}", + "legendFormat": "{{test}}", + "range": true, + "refId": "A" + } + ], + "title": "Batch request timeouts", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(tests_batch_run_total,request_type)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "request_type", + "options": [], + "query": { + "query": "label_values(tests_batch_run_total,request_type)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(tests_batch_domain_ok{request_type=~\"$request_type\"},domain)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "domain", + "options": [], + "query": { + "query": "label_values(tests_batch_domain_ok{request_type=~\"$request_type\"},domain)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Periodic tests batch", + "uid": "b38d84a3-02c1-4066-b27b-cfc11dd3ce1e", + "version": 11, + "weekStart": "" +} diff --git a/docker/grafana/dashboards/periodic-tests.json b/docker/grafana/dashboards/periodic-tests.json index d9c4cacee..d867c0942 100644 --- a/docker/grafana/dashboards/periodic-tests.json +++ b/docker/grafana/dashboards/periodic-tests.json @@ -598,7 +598,6 @@ "y": 6 }, "id": 1, - "interval": "15m", "options": { "legend": { "calcs": [ @@ -699,7 +698,6 @@ "y": 6 }, "id": 2, - "interval": "15m", "options": { "legend": { "calcs": [ @@ -739,7 +737,7 @@ "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 18 }, "id": 4, "panels": [], @@ -812,10 +810,9 @@ "h": 12, "w": 12, "x": 0, - "y": 18 + "y": 19 }, "id": 14, - "interval": "15m", "options": { "legend": { "calcs": [ @@ -913,10 +910,9 @@ "h": 12, "w": 12, "x": 12, - "y": 18 + "y": 19 }, "id": 5, - "interval": "15m", "options": { "legend": { "calcs": [ @@ -958,7 +954,7 @@ "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 31 }, "id": 6, "panels": [], @@ -1029,10 +1025,9 @@ "h": 12, "w": 12, "x": 0, - "y": 29 + "y": 32 }, "id": 7, - "interval": "15m", "options": { "legend": { "calcs": [ @@ -1130,10 +1125,9 @@ "h": 12, "w": 12, "x": 12, - "y": 29 + "y": 32 }, "id": 16, - "interval": "15m", "options": { "legend": { "calcs": [ @@ -1274,6 +1268,6 @@ "timezone": "", "title": "Periodic tests", "uid": "af7d1d82-c0f9-4d8d-bc03-542c4c4c75c0", - "version": 10, + "version": 11, "weekStart": "" } diff --git a/docker/test.env b/docker/test.env index 65e421622..ff0b0d17b 100644 --- a/docker/test.env +++ b/docker/test.env @@ -112,5 +112,6 @@ CRON_WEEKLY_POSTGRESQL_BACKUP=True # selftest runs against public domain (example.(nl|com)) which will never work in the test environment CRON_15MIN_RUN_TESTS=False +CRON_15MIN_RUN_TESTS_BATCH=False INTERNETNL_BRANDING=True diff --git a/documentation/Docker-deployment-batch.md b/documentation/Docker-deployment-batch.md index 2e702f66f..c6570431e 100644 --- a/documentation/Docker-deployment-batch.md +++ b/documentation/Docker-deployment-batch.md @@ -111,6 +111,7 @@ For example: cat >> docker/local.env < # allowed IP's to visit web interface without password