diff --git a/connaisseur/flask_application.py b/connaisseur/flask_application.py index 19d4edeba..77ea6aa37 100644 --- a/connaisseur/flask_application.py +++ b/connaisseur/flask_application.py @@ -4,6 +4,7 @@ import traceback from flask import Flask, jsonify, request +from prometheus_flask_exporter import PrometheusMetrics, NO_PREFIX from connaisseur.admission_request import AdmissionRequest from connaisseur.alert import send_alerts @@ -24,6 +25,15 @@ CONFIG = Config() DETECTION_MODE = os.environ.get("DETECTION_MODE", "0") == "1" +metrics = PrometheusMetrics( + APP, + defaults_prefix=NO_PREFIX, + buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 2.5, float("inf")), +) +""" +Provides metrics for the Flask application +""" + @APP.errorhandler(AlertSendingError) def handle_alert_sending_failure(err): @@ -76,6 +86,7 @@ def mutate(): # health probe @APP.route("/health", methods=["GET", "POST"]) +@metrics.do_not_track() def healthz(): """ Handle the '/health' endpoint and check the health status of the web server. @@ -87,6 +98,7 @@ def healthz(): # readiness probe @APP.route("/ready", methods=["GET", "POST"]) +@metrics.do_not_track() def readyz(): return "", 200 diff --git a/docs/README.md b/docs/README.md index fdf0909e4..9dc2d6436 100644 --- a/docs/README.md +++ b/docs/README.md @@ -139,6 +139,75 @@ Otherwise, Connaisseur translates all image references in the original request t ![](./assets/connaisseur-overview.png) +## Metrics + +Connaisseur exposes metrics about usage of the `/mutate` endpoint and general information about the python process +using [Prometheus Flask Exporter](https://pypi.org/project/prometheus-flask-exporter/) through the `/metrics` endpoint. + +``` +# HELP python_gc_objects_collected_total Objects collected during gc +# TYPE python_gc_objects_collected_total counter +python_gc_objects_collected_total{generation="0"} 4422.0 +python_gc_objects_collected_total{generation="1"} 1866.0 +python_gc_objects_collected_total{generation="2"} 0.0 +# HELP python_gc_objects_uncollectable_total Uncollectable object found during GC +# TYPE python_gc_objects_uncollectable_total counter +python_gc_objects_uncollectable_total{generation="0"} 0.0 +python_gc_objects_uncollectable_total{generation="1"} 0.0 +python_gc_objects_uncollectable_total{generation="2"} 0.0 +# HELP python_gc_collections_total Number of times this generation was collected +# TYPE python_gc_collections_total counter +python_gc_collections_total{generation="0"} 163.0 +python_gc_collections_total{generation="1"} 14.0 +python_gc_collections_total{generation="2"} 1.0 +# HELP python_info Python platform information +# TYPE python_info gauge +python_info{implementation="CPython",major="3",minor="10",patchlevel="2",version="3.10.2"} 1.0 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 6.1161472e+07 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 4.595712e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.6436681112e+09 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 3.3 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 12.0 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP exporter_info Information about the Prometheus Flask exporter +# TYPE exporter_info gauge +exporter_info{version="0.18.7"} 1.0 +# HELP http_request_duration_seconds Flask HTTP request duration in seconds +# TYPE http_request_duration_seconds histogram +http_request_duration_seconds_bucket{le="0.1",method="POST",path="/mutate",status="200"} 5.0 +http_request_duration_seconds_bucket{le="0.25",method="POST",path="/mutate",status="200"} 5.0 +http_request_duration_seconds_bucket{le="0.5",method="POST",path="/mutate",status="200"} 5.0 +http_request_duration_seconds_bucket{le="0.75",method="POST",path="/mutate",status="200"} 8.0 +http_request_duration_seconds_bucket{le="1.0",method="POST",path="/mutate",status="200"} 8.0 +http_request_duration_seconds_bucket{le="2.5",method="POST",path="/mutate",status="200"} 9.0 +http_request_duration_seconds_bucket{le="+Inf",method="POST",path="/mutate",status="200"} 9.0 +http_request_duration_seconds_count{method="POST",path="/mutate",status="200"} 9.0 +http_request_duration_seconds_sum{method="POST",path="/mutate",status="200"} 3.6445974350208417 +# HELP http_request_duration_seconds_created Flask HTTP request duration in seconds +# TYPE http_request_duration_seconds_created gauge +http_request_duration_seconds_created{method="POST",path="/mutate",status="200"} 1.643668194758098e+09 +# HELP http_request_total Total number of HTTP requests +# TYPE http_request_total counter +http_request_total{method="POST",status="200"} 9.0 +# HELP http_request_created Total number of HTTP requests +# TYPE http_request_created gauge +http_request_created{method="POST",status="200"} 1.6436681947581613e+09 +# HELP http_request_exceptions_total Total number of HTTP requests which resulted in an exception +# TYPE http_request_exceptions_total counter +``` + ## Compatibility Supported signature solutions and configuration options are documented under [validators](./validators/README.md). diff --git a/helm/templates/certificate_webhook-conf.yaml b/helm/templates/certificate_webhook-conf.yaml index 8f5635f8d..db7ef01e7 100644 --- a/helm/templates/certificate_webhook-conf.yaml +++ b/helm/templates/certificate_webhook-conf.yaml @@ -71,6 +71,7 @@ webhooks: name: {{ .Chart.Name }}-svc namespace: {{ .Release.Namespace }} path: /mutate + port: {{ .Values.service.port }} caBundle: {{ default ($certs.Cert | b64enc) (include "getInstalledTLSCert" .) }} rules: - operations: ["CREATE", "UPDATE"] diff --git a/helm/values.yaml b/helm/values.yaml index 71a8d917e..e6f5f6334 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -147,9 +147,9 @@ namespacedValidation: mode: ignore # 'ignore' or 'validate' # automatic child approval determines how admission of Kubernetes child resources is handled by Connaisseur. -# per default, Connaisseur validates and mutates all resources, e.g. deployments, replicaSets, pods, and +# per default, Connaisseur validates and mutates all resources, e.g. deployments, replicaSets, pods, and # automatically approves child resources of those to avoid duplicate validation and inconsistencies with the -# image policy. when disabled Connaisseur will only validate and mutate pods. check the docs for more +# image policy. when disabled Connaisseur will only validate and mutate pods. check the docs for more # information. # NOTE: configuration of automatic child approval is in EXPERIMENTAL state. automaticChildApproval: diff --git a/requirements.txt b/requirements.txt index eb75ac706..db4325e8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ Flask~=2.0.2 Jinja2~=3.0.3 jsonschema~=4.4.0 parsedatetime~=2.6 +prometheus-flask-exporter~=0.18.7 python-dateutil~=2.8.2 pytz~=2021.3 PyYAML~=6.0