scale-catalog/clustertool/cluster/main/add-ons/monitoring/prometheus-rules.yaml
2024-06-12 16:49:02 +02:00

35 lines
1.2 KiB
YAML

---
# yaml-language-server: $schema=https://kubernetes-schemas.zinn.ca/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: flux
namespace: flux-system
spec:
groups:
- name: flux
rules:
- alert: FluxComponentAbsent
annotations:
description: Flux component has disappeared from Prometheus target discovery.
summary: Flux component is down.
expr: |
absent(up{job=~".*flux-system.*"} == 1)
for: 5m
labels:
severity: critical
- alert: FluxReconciliationFailure
annotations:
description:
"{{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation has been failing
for more than ten minutes."
summary: Flux reconciliation failure.
expr: |
max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind)
+
on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"})
by (namespace, name, kind)) * 2 == 1
for: 10m
labels:
severity: critical