diff --git a/clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/dptp_alerts.libsonnet b/clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/dptp_alerts.libsonnet index d2c29d4b9b56..c2a3e902f160 100644 --- a/clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/dptp_alerts.libsonnet +++ b/clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/dptp_alerts.libsonnet @@ -241,9 +241,15 @@ { alert: 'openshift-mirroring-failures', expr: ||| - increase(prowjob_state_transitions{job_name="periodic-image-mirroring-openshift",state="failure"}[5m]) > 0 + sum by (job_name) ( + rate( + prowjob_state_transitions{job="prow-controller-manager",job_name!~"rehearse.*",state="success"}[12h] + ) + ) + * on (job_name) group_left max by (job_name) (prow_job_labels{job_agent="kubernetes",label_ci_openshift_io_role="image-mirroring",label_ci_openshift_io_area="openshift"}) == 0 |||, 'for': '1m', + 'keep_firing_for': '2h', labels: { severity: 'critical', }, diff --git a/clusters/app.ci/openshift-user-workload-monitoring/mixins/prometheus_out/ci-alerts_prometheusrule.yaml b/clusters/app.ci/openshift-user-workload-monitoring/mixins/prometheus_out/ci-alerts_prometheusrule.yaml index 1b4123205f63..b476a6769433 100644 --- a/clusters/app.ci/openshift-user-workload-monitoring/mixins/prometheus_out/ci-alerts_prometheusrule.yaml +++ b/clusters/app.ci/openshift-user-workload-monitoring/mixins/prometheus_out/ci-alerts_prometheusrule.yaml @@ -292,8 +292,14 @@ spec: annotations: message: OpenShift image mirroring jobs have failed. View failed jobs at the . expr: | - increase(prowjob_state_transitions{job_name="periodic-image-mirroring-openshift",state="failure"}[5m]) > 0 + sum by (job_name) ( + rate( + prowjob_state_transitions{job="prow-controller-manager",job_name!~"rehearse.*",state="success"}[12h] + ) + ) + * on (job_name) group_left max by (job_name) (prow_job_labels{job_agent="kubernetes",label_ci_openshift_io_role="image-mirroring",label_ci_openshift_io_area="openshift"}) == 0 for: 1m + keep_firing_for: 2h labels: severity: critical - name: ghproxy