File tree Expand file tree Collapse file tree 2 files changed +54
-0
lines changed
rhobs/alerting/data_plane
test/promql/tests/data_plane Expand file tree Collapse file tree 2 files changed +54
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : monitoring.coreos.com/v1
2+ kind : PrometheusRule
3+ metadata :
4+ name : rhtap-pipelineruns-increase-alerting
5+ labels :
6+ tenant : rhtap
7+ spec :
8+ groups :
9+ - name : pipelineruns_increase_alerts
10+ interval : 30m
11+ rules :
12+
13+ - alert : PipelineRunsIncrease
14+ expr : |
15+ sum by (source_cluster,namespace) (increase(pipelinerun_duration_scheduled_seconds_count{namespace=~".*-tenant"}[3h])) > 2000
16+ for : 25m
17+ labels :
18+ severity : high
19+ annotations :
20+ summary : >-
21+ Increase number of Tekton pipelineruns in one namespace.
22+ description : >-
23+ Increase number of Tekton pipelineruns in one namespace in {{ $labels.source_cluster }}.
24+ alert_team_handle : <!subteam^S04PYECHCCU>
25+ team : pipelines
Original file line number Diff line number Diff line change 1+ evaluation_interval : 1m
2+
3+ rule_files :
4+ - prometheus.pipelineruns_increase.yaml
5+
6+ tests :
7+
8+ - interval : 1m
9+ input_series :
10+
11+ - series : ' pipelinerun_duration_scheduled_seconds_count{source_cluster="cluster01",namespace="webapp-tenant"}'
12+ values : ' 1000+12x210'
13+
14+ alert_rule_test :
15+ # Evaluate at 205m (180m for 3h window + 25m )
16+ - eval_time : 205m
17+ alertname : PipelineRunsIncrease
18+ exp_alerts :
19+ - exp_labels :
20+ severity : high
21+ source_cluster : cluster01
22+ namespace : webapp-tenant
23+ exp_annotations :
24+ summary : >-
25+ Increase number of Tekton pipelineruns in one namespace.
26+ description : >-
27+ Increase number of Tekton pipelineruns in one namespace in cluster01.
28+ alert_team_handle : <!subteam^S04PYECHCCU>
29+ team : pipelines
You can’t perform that action at this time.
0 commit comments