diff --git a/README.md b/README.md index 7114c2c75..295595a26 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,10 @@ It is built to be extendable and currently aims to support the following signing It provides several additional features: +- [Metrics](docs/features/metrics.md): *get prometheus metrics at `/metrics`* +- [Alerting](docs/features/alerting.md): *send alerts based on verification result* - [Detection Mode](docs/features/detection_mode.md): *warn but do not block invalid images* - [Namespaced Validation](docs/features/namespaced_validation.md): *restrict validation to dedicated namespaces* -- [Alerting](docs/features/alerting.md): *send alerts based on verification result* - [Automatic Child Approval](docs/features/automatic_child_approval.md): *configure approval of Kubernetes child resources* diff --git a/connaisseur/flask_application.py b/connaisseur/flask_application.py index 19d4edeba..56379b69e 100644 --- a/connaisseur/flask_application.py +++ b/connaisseur/flask_application.py @@ -4,6 +4,7 @@ import traceback from flask import Flask, jsonify, request +from prometheus_flask_exporter import PrometheusMetrics, NO_PREFIX from connaisseur.admission_request import AdmissionRequest from connaisseur.alert import send_alerts @@ -24,6 +25,15 @@ CONFIG = Config() DETECTION_MODE = os.environ.get("DETECTION_MODE", "0") == "1" +metrics = PrometheusMetrics( + APP, + defaults_prefix=NO_PREFIX, + buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 10.0, 15.0, 20, 30.0, float("inf")), +) +""" +Provides metrics for the Flask application +""" + @APP.errorhandler(AlertSendingError) def handle_alert_sending_failure(err): @@ -41,6 +51,14 @@ def handle_alert_config_error(err): @APP.route("/mutate", methods=["POST"]) +@metrics.counter( + "mutate_requests_total", + "Total number of mutate requests", + labels={ + "allowed": lambda r: metrics_label(r, "allowed"), + "status_code": lambda r: metrics_label(r, "status_code"), + }, +) def mutate(): """ Handle the '/mutate' path and accept CREATE and UPDATE requests. @@ -74,8 +92,19 @@ def mutate(): return jsonify(response) +def metrics_label(response, label): + json_response = response.get_json(silent=True) + if json_response: + if label == "allowed": + return json_response["response"]["allowed"] + elif label == "status_code": + return json_response["response"]["status"]["code"] + return json_response + + # health probe @APP.route("/health", methods=["GET", "POST"]) +@metrics.do_not_track() def healthz(): """ Handle the '/health' endpoint and check the health status of the web server. @@ -87,6 +116,7 @@ def healthz(): # readiness probe @APP.route("/ready", methods=["GET", "POST"]) +@metrics.do_not_track() def readyz(): return "", 200 diff --git a/docs/README.md b/docs/README.md index fdf0909e4..c37fc2ffd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -17,9 +17,10 @@ It is built to be extendable and currently aims to support the following signing It provides several additional features: +- [Metrics](features/metrics.md): *get prometheus metrics at `/metrics`* +- [Alerting](features/alerting.md): *send alerts based on verification result* - [Detection Mode](features/detection_mode.md): *warn but do not block invalid images* - [Namespaced Validation](features/namespaced_validation.md): *restrict validation to dedicated namespaces* -- [Alerting](features/alerting.md): *send alerts based on verification result* - [Automatic Child Approval](features/automatic_child_approval.md): *configure approval of Kubernetes child resources* Feel free to reach out via [GitHub Discussions](https://github.com/sse-secure-systems/connaisseur/discussions)! diff --git a/docs/features/metrics.md b/docs/features/metrics.md new file mode 100644 index 000000000..5d8b3cf29 --- /dev/null +++ b/docs/features/metrics.md @@ -0,0 +1,80 @@ +# Metrics + +Connaisseur exposes metrics about usage of the `/mutate` endpoint and general information about the python process using [Prometheus Flask Exporter](https://pypi.org/project/prometheus-flask-exporter/) through the `/metrics` endpoint. + +This for example allows visualizing the number of allowed or denied resource requests. + +## Example + +``` +# HELP python_gc_objects_collected_total Objects collected during gc +# TYPE python_gc_objects_collected_total counter +python_gc_objects_collected_total{generation="0"} 4422.0 +python_gc_objects_collected_total{generation="1"} 1866.0 +python_gc_objects_collected_total{generation="2"} 0.0 +# HELP python_gc_objects_uncollectable_total Uncollectable object found during GC +# TYPE python_gc_objects_uncollectable_total counter +python_gc_objects_uncollectable_total{generation="0"} 0.0 +python_gc_objects_uncollectable_total{generation="1"} 0.0 +python_gc_objects_uncollectable_total{generation="2"} 0.0 +# HELP python_gc_collections_total Number of times this generation was collected +# TYPE python_gc_collections_total counter +python_gc_collections_total{generation="0"} 163.0 +python_gc_collections_total{generation="1"} 14.0 +python_gc_collections_total{generation="2"} 1.0 +# HELP python_info Python platform information +# TYPE python_info gauge +python_info{implementation="CPython",major="3",minor="10",patchlevel="2",version="3.10.2"} 1.0 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 6.1161472e+07 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 4.595712e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.6436681112e+09 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 3.3 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 12.0 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP exporter_info Information about the Prometheus Flask exporter +# TYPE exporter_info gauge +exporter_info{version="0.18.7"} 1.0 +# HELP http_request_duration_seconds Flask HTTP request duration in seconds +# TYPE http_request_duration_seconds histogram +http_request_duration_seconds_bucket{le="0.1",method="POST",path="/mutate",status="200"} 5.0 +http_request_duration_seconds_bucket{le="0.25",method="POST",path="/mutate",status="200"} 5.0 +http_request_duration_seconds_bucket{le="0.5",method="POST",path="/mutate",status="200"} 5.0 +http_request_duration_seconds_bucket{le="0.75",method="POST",path="/mutate",status="200"} 8.0 +http_request_duration_seconds_bucket{le="1.0",method="POST",path="/mutate",status="200"} 8.0 +http_request_duration_seconds_bucket{le="2.5",method="POST",path="/mutate",status="200"} 9.0 +http_request_duration_seconds_bucket{le="+Inf",method="POST",path="/mutate",status="200"} 9.0 +http_request_duration_seconds_count{method="POST",path="/mutate",status="200"} 9.0 +http_request_duration_seconds_sum{method="POST",path="/mutate",status="200"} 3.6445974350208417 +# HELP http_request_duration_seconds_created Flask HTTP request duration in seconds +# TYPE http_request_duration_seconds_created gauge +http_request_duration_seconds_created{method="POST",path="/mutate",status="200"} 1.643668194758098e+09 +# HELP http_request_total Total number of HTTP requests +# TYPE http_request_total counter +http_request_total{method="POST",status="200"} 9.0 +# HELP http_request_created Total number of HTTP requests +# TYPE http_request_created gauge +http_request_created{method="POST",status="200"} 1.6436681947581613e+09 +# HELP http_request_exceptions_total Total number of HTTP requests which resulted in an exception +# TYPE http_request_exceptions_total counter +# HELP mutate_requests_total Total number of mutate requests +# TYPE mutate_requests_total counter +mutate_requests_total{allowed="False",status_code="403"} 4.0 +mutate_requests_total{allowed="True",status_code="202"} 5.0 +# HELP mutate_requests_created Total number of mutate requests +# TYPE mutate_requests_created gauge +mutate_requests_created{allowed="False",status_code="403"} 1.643760946491879e+09 +mutate_requests_created{allowed="True",status_code="202"} 1.6437609592007663e+09 +``` + diff --git a/helm/values.yaml b/helm/values.yaml index dd2b9dfdb..a14e1968a 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -147,9 +147,9 @@ namespacedValidation: mode: ignore # 'ignore' or 'validate' # automatic child approval determines how admission of Kubernetes child resources is handled by Connaisseur. -# per default, Connaisseur validates and mutates all resources, e.g. deployments, replicaSets, pods, and +# per default, Connaisseur validates and mutates all resources, e.g. deployments, replicaSets, pods, and # automatically approves child resources of those to avoid duplicate validation and inconsistencies with the -# image policy. when disabled Connaisseur will only validate and mutate pods. check the docs for more +# image policy. when disabled Connaisseur will only validate and mutate pods. check the docs for more # information. # NOTE: configuration of automatic child approval is in EXPERIMENTAL state. automaticChildApproval: diff --git a/mkdocs.yml b/mkdocs.yml index 198850993..d72149368 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -92,9 +92,10 @@ nav: - validators/notaryv2.md - Features: - features/README.md + - features/metrics.md + - features/alerting.md - features/detection_mode.md - features/namespaced_validation.md - - features/alerting.md - features/automatic_child_approval.md - Security: - threat_model.md diff --git a/requirements.txt b/requirements.txt index eb75ac706..db4325e8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ Flask~=2.0.2 Jinja2~=3.0.3 jsonschema~=4.4.0 parsedatetime~=2.6 +prometheus-flask-exporter~=0.18.7 python-dateutil~=2.8.2 pytz~=2021.3 PyYAML~=6.0 diff --git a/tests/test_flask_application.py b/tests/test_flask_application.py index 21eba40c7..833be2338 100644 --- a/tests/test_flask_application.py +++ b/tests/test_flask_application.py @@ -116,11 +116,13 @@ def test_mutate_calls_send_alert_for_invalid_admission_request( def test_healthz(): - assert pytest.fa.healthz() == ("", 200) + with pytest.fa.APP.test_request_context(): + assert pytest.fa.healthz() == ("", 200) def test_readyz(): - assert pytest.fa.readyz() == ("", 200) + with pytest.fa.APP.test_request_context(): + assert pytest.fa.readyz() == ("", 200) @pytest.mark.parametrize( @@ -231,8 +233,9 @@ def test_error_handler( mocker.patch("connaisseur.flask_application.__admit", return_value=True) mock_function = mocker.patch(**function) - client = pytest.fa.APP.test_client() - mock_request_data = fix.get_admreq("deployments") - response = client.post("/mutate", json=mock_request_data) - assert response.status_code == 500 - assert response.get_data().decode() == err + with pytest.fa.APP.test_request_context(): + client = pytest.fa.APP.test_client() + mock_request_data = fix.get_admreq("deployments") + response = client.post("/mutate", json=mock_request_data) + assert response.status_code == 500 + assert response.get_data().decode() == err