Skip to content

Commit

Permalink
Adding self-monitoring, alert rules and grafana dashboard (#70)
Browse files Browse the repository at this point in the history
* Add self-metrics-endpoint relation, and alertrules

* grafana-dashboard relation added

* Linting...

* Fixing static analysis checks

* Fix broken integrations tests

* Rename itest and test new relation

* Timeouts in test_upgrade_charm.py increased

* More tiemeouts increased

* More timeouts...

* unpin pytest-operator

* Deploy prom with --trust

* Simplyfing num_peers

Co-authored-by: Leon <[email protected]>

* Missing trust=True in rerelate_app itest

* Specify relation name in the other side of the relation

Co-authored-by: Leon <[email protected]>
  • Loading branch information
Abuelodelanada and sed-i authored Jun 28, 2022
1 parent c6dc4b1 commit d1bcefc
Show file tree
Hide file tree
Showing 14 changed files with 15,107 additions and 21 deletions.
1,544 changes: 1,544 additions & 0 deletions lib/charms/grafana_k8s/v0/grafana_dashboard.py

Large diffs are not rendered by default.

2,304 changes: 2,304 additions & 0 deletions lib/charms/prometheus_k8s/v0/prometheus_scrape.py

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ provides:
# assumed network type: private
karma-dashboard:
interface: karma_dashboard
self-metrics-endpoint:
interface: prometheus_scrape
grafana-dashboard:
interface: grafana_dashboard

peers:
replicas:
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ module = ["ops.*", "lightkube.*", "git.*", "pytest_operator.*", "validators.*"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = ["charms.observability_libs.*"]
module = ["charms.grafana_k8s.*", "charms.observability_libs.*"]
follow_imports = "silent"
warn_unused_ignores = false

[tool.pytest.ini_options]
minversion = "6.0"
log_cli_level = "INFO"
asyncio_mode = "auto"
16 changes: 13 additions & 3 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
import hashlib
import logging
import socket
from typing import List, cast
from typing import List, Optional, cast

import yaml
from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerProvider
from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider
from charms.karma_k8s.v0.karma_dashboard import KarmaProvider
from charms.observability_libs.v0.kubernetes_service_patch import KubernetesServicePatch
from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
from ops.charm import ActionEvent, CharmBase
from ops.framework import StoredState
from ops.main import main
Expand Down Expand Up @@ -81,6 +83,14 @@ def __init__(self, *args):
],
)

# Self-monitoring
self._scraping = MetricsEndpointProvider(
self,
relation_name="self-metrics-endpoint",
jobs=[{"static_configs": [{"targets": [f"*:{self._api_port}"]}]}],
)
self.grafana_dashboard_provider = GrafanaDashboardProvider(charm=self)

self.container = self.unit.get_container(self._container_name)

# Core lifecycle events
Expand Down Expand Up @@ -120,7 +130,7 @@ def api_port(self) -> int:
return self._api_port

@property
def peer_relation(self) -> Relation:
def peer_relation(self) -> Optional["Relation"]:
"""Helper function for obtaining the peer relation object.
Returns: peer relation object
Expand Down Expand Up @@ -366,7 +376,7 @@ def _common_exit_hook(self) -> None:
service := self.container.get_service(self._service_name)
) and service.is_running()

num_peers = len(self.peer_relation.units)
num_peers = len(rel.units) if (rel := self.peer_relation) else 0

if layer_changed and (
not service_running or (num_peers > 0 and not self._stored.launched_with_peers)
Expand Down
Loading

0 comments on commit d1bcefc

Please sign in to comment.