Skip to content

Commit

Permalink
Merge pull request #556 from grdryn/INTLY-6496
Browse files Browse the repository at this point in the history
🚨 Fix alerts that only group by container
  • Loading branch information
openshift-merge-robot authored Mar 26, 2020
2 parents ee67540 + 52e3e33 commit 4afda0e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
20 changes: 10 additions & 10 deletions templates/monitoring/kube_state_metrics_3scale_alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ kind: PrometheusRule
metadata:
name: ksm-3scale-alerts
namespace: {{ index .Params "Namespace" }}
spec:
groups:
spec:
groups:
- name: 3scale.rules
rules:
rules:
- alert: ThreeScaleApicastStagingPod
annotations:
sop_url: https://github.com/RHCloudServices/integreatly-help/blob/master/sops/alerts_and_troubleshooting.md
Expand Down Expand Up @@ -112,23 +112,23 @@ spec:
for: 5m
labels:
severity: critical
- alert: ThreeScalePodHighMemory
- alert: ThreeScaleContainerHighMemory
annotations:
sop_url: https://github.com/RHCloudServices/integreatly-help/blob/master/sops/alerts_and_troubleshooting.md
message: The {{ "{{" }} $labels.container {{ "}}" }} pod has been using {{ "{{" }} printf "%.0f" $value {{ "}}" }}% of available memory for longer than 15 minutes.
message: The {{ "{{" }} $labels.container {{ "}}" }} Container in the {{ "{{" }} $labels.pod {{ "}}" }} Pod has been using {{ "{{" }} printf "%.0f" $value {{ "}}" }}% of available memory for longer than 15 minutes.
scaling_plan: https://github.com/integr8ly/middleware-load-testing/blob/master/sops/3scale-scaling.md
expr: |
sum by(container) (container_memory_usage_bytes{container!="",namespace="{{ index .Params "Namespace" }}"}) / sum by(container) (kube_pod_container_resource_limits_memory_bytes{namespace="{{ index .Params "Namespace" }}"}) * 100 > 90
sum by(container, pod) (container_memory_usage_bytes{container!="",namespace="{{ index .Params "Namespace" }}"}) / sum by(container, pod) (kube_pod_container_resource_limits_memory_bytes{namespace="{{ index .Params "Namespace" }}"}) * 100 > 90
for: 15m
labels:
severity: warning
- alert: ThreeScalePodHighCPU
- alert: ThreeScaleContainerHighCPU
annotations:
sop_url: https://github.com/RHCloudServices/integreatly-help/blob/master/sops/alerts_and_troubleshooting.md
message: The {{ "{{" }} $labels.container {{ "}}" }} pod has been using {{ "{{" }} printf "%.0f" $value {{ "}}" }}% of available CPU for longer than 15 minutes.
message: The {{ "{{" }} $labels.container {{ "}}" }} Container in the {{ "{{" }} $labels.pod {{ "}}" }} Pod has been using {{ "{{" }} printf "%.0f" $value {{ "}}" }}% of available CPU for longer than 15 minutes.
scaling_plan: https://github.com/integr8ly/middleware-load-testing/blob/master/sops/3scale-scaling.md
expr: |
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="{{ index .Params "Namespace" }}"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{namespace="{{ index .Params "Namespace" }}"}) by (container) * 100 > 90
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="{{ index .Params "Namespace" }}"}) by (container, pod) / sum(kube_pod_container_resource_limits_cpu_cores{namespace="{{ index .Params "Namespace" }}"}) by (container, pod) * 100 > 90
for: 15m
labels:
severity: warning
severity: warning
8 changes: 4 additions & 4 deletions templates/monitoring/kube_state_metrics_amqonline_alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ spec:
for: 5m
labels:
severity: critical
- alert: AMQOnlinePodHighMemory
- alert: AMQOnlineContainerHighMemory
annotations:
sop_url: https://github.com/RHCloudServices/integreatly-help/blob/master/sops/alerts_and_troubleshooting.md
message: The {{ "{{" }} $labels.container {{ "}}" }} pod has been using {{ "{{" }} printf "%.0f" $value {{ "}}" }}% of available memory for longer than 15 minutes.
message: The {{ "{{" }} $labels.container {{ "}}" }} Container in the {{ "{{" }} $labels.pod {{ "}}" }} Pod has been using {{ "{{" }} printf "%.0f" $value {{ "}}" }}% of available memory for longer than 15 minutes.
scaling_plan: https://github.com/integr8ly/middleware-load-testing/blob/master/sops/amq-scaling.md
expr: |
sum by(container) (container_memory_usage_bytes{container!="",namespace="{{ index .Params "Namespace" }}"}) / sum by(container) (kube_pod_container_resource_limits_memory_bytes{namespace="{{ index .Params "Namespace" }}"}) * 100 > 90
sum by(container, pod) (container_memory_usage_bytes{container!="",namespace="{{ index .Params "Namespace" }}"}) / sum by(container, pod) (kube_pod_container_resource_limits_memory_bytes{namespace="{{ index .Params "Namespace" }}"}) * 100 > 90
for: 15m
labels:
severity: warning
severity: warning
6 changes: 3 additions & 3 deletions test/common/alerts_exist.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ var expectedRules = []alertsTestRule{
File: "redhat-rhmi-amq-online-ksm-amqonline-alerts.yaml",
Rules: []string{
"AMQOnlinePodCount",
"AMQOnlinePodHighMemory",
"AMQOnlineContainerHighMemory",
},
},
{
Expand Down Expand Up @@ -150,8 +150,8 @@ var expectedRules = []alertsTestRule{
"ThreeScaleAdminUIBBT",
"ThreeScaleDeveloperUIBBT",
"ThreeScaleSystemAdminUIBBT",
"ThreeScalePodHighMemory",
"ThreeScalePodHighCPU",
"ThreeScaleContainerHighMemory",
"ThreeScaleContainerHighCPU",
"ThreeScaleZyncPodAvailability",
"ThreeScaleZyncDatabasePodAvailability",
},
Expand Down

0 comments on commit 4afda0e

Please sign in to comment.