Skip to content

Commit

Permalink
Added nopo11y-client chart
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaushal Vora committed May 13, 2024
1 parent 4331027 commit 7f9b90b
Show file tree
Hide file tree
Showing 5 changed files with 1,922 additions and 0 deletions.
24 changes: 24 additions & 0 deletions charts/nopo11y-client/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: nopo11y
description: A Helm chart to setup a generic observability for any microservice

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.0.0"
127 changes: 127 additions & 0 deletions charts/nopo11y-client/templates/defaultAlerts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{{- if .Values.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
namespace: {{ .Values.namespace }}
name: {{ .Values.appLabel }}-default-alert-rules
labels:
release: {{ .Values.prometheusReleaseLabel }}
spec:
groups:
- name: {{ .Values.appLabel }}-default-alert-rules
rules:
- alert: {{ .Values.appLabel }}HighCpuUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 80
{{- else }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 80
{{- end }}
for: 10m
annotations:
description: CPU utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 80% from last 5 minutes .
summary: CPU Utilization went over 80% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: warning
- alert: {{ .Values.appLabel }}HighCpuUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 90
{{- else }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 90
{{- end }}
for: 10m
annotations:
description: CPU utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 90% from last 5 minutes.
summary: CPU Utilization went over 90% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: critical
- alert: {{ .Values.appLabel }}HighMemoryUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 80
{{- else }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 80
{{- end }}
for: 10m
annotations:
description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 80% from last 5 minutes.
summary: Memory Utilization went over 80% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: warning
- alert: {{ .Values.appLabel }}HighMemoryUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 90
{{- else }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 90
{{- end }}
for: 10m
annotations:
description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 90% from last 5 minutes.
summary: Memory Utilization went over 90% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: critical
{{- if .Values.istioMetrics.enabled }}
- alert: {{ .Values.appLabel }}High5xxErrorRate
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: sum(rate(istio_requests_total{app="{{ .Release.Name }}-{{ .Values.appLabel }}", destination_app=~"{{ .Release.Name }}-{{ .Values.appLabel }}", response_code=~"5.."}[5m])) by (instance) / sum(rate(istio_requests_total{app="{{ .Release.Name }}-{{ .Values.appLabel }}", destination_app=~"{{ .Release.Name }}-{{ .Values.appLabel }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate5xx }}
{{- else }}
expr: sum(rate(istio_requests_total{app="{{ .Values.appLabel }}", destination_app=~"{{ .Values.appLabel }}", response_code=~"5.."}[5m])) by (instance) / sum(rate(istio_requests_total{app="{{ .Values.appLabel }}", destination_app=~"{{ .Values.appLabel }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate5xx }}
{{- end }}
annotations:
description: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 5xx errors rate from last 5 minutes.
summary: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 5xx error rate.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: critical
- alert: {{ .Values.appLabel }}High4xxErrorRate
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: sum(rate(istio_requests_total{app="{{ .Release.Name }}-{{ .Values.appLabel }}", destination_app=~"{{ .Release.Name }}-{{ .Values.appLabel }}", response_code=~"4.."}[5m])) by (instance) / sum(rate(istio_requests_total{app="{{ .Release.Name }}-{{ .Values.appLabel }}", destination_app=~"{{ .Release.Name }}-{{ .Values.appLabel }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate4xx }}
{{- else }}
expr: sum(rate(istio_requests_total{app="{{ .Values.appLabel }}", destination_app=~"{{ .Values.appLabel }}", response_code=~"4.."}[5m])) by (instance) / sum(rate(istio_requests_total{app="{{ .Values.appLabel }}", destination_app=~"{{ .Values.appLabel }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate4xx }}
{{- end }}
for: 5m
annotations:
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
description: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 4xx errors rate from last 5 minutes.
summary: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 4xx error rate.
labels:
severity: warning
{{- end }}
{{- if .Values.nginxIngressMetrics.enabled }}
- alert: {{ .Values.appLabel }}IngressHigh5xxErrorRate
expr: sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"5.."}[5m])) / sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}"}[5m])) * 100 > {{ .Values.errorRate5xx }}
annotations:
description: {{ .Values.appLabel }} service is experiencing high 5xx errors rate from last 5 minutes.
summary: {{ .Values.appLabel }} is experiencing high 5xx error rate.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{ .Values.appLabel }}-overview
{{- end }}
labels:
severity: critical
- alert: {{ .Values.appLabel }}IngressHigh4xxErrorRate
expr: sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"4.."}[5m])) / sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}"}[5m])) * 100 > {{ .Values.rrorRate4xx }}
for: 10m
annotations:
description: {{ .Values.appLabel }} service is experiencing high 4xx errors rate from last 5 minutes.
summary: {{ .Values.appLabel }} service is experiencing high 4xx error rate.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{ .Values.appLabel }}-overview
{{- end }}
labels:
severity: warning
{{- end }}
{{- end }}
Loading

0 comments on commit 7f9b90b

Please sign in to comment.