Files
infrastructure/clusters/cl01tl/manifests/matrix-synapse/PrometheusRule-matrix-hookshot-backup-source-local.yaml

34 lines
1.2 KiB
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: matrix-hookshot-backup-source-local
namespace: matrix-synapse
labels:
helm.sh/chart: volsync-target-hookshot-1.1.0
app.kubernetes.io/instance: matrix-synapse
app.kubernetes.io/part-of: matrix-synapse
app.kubernetes.io/version: "1.1.0"
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: matrix-hookshot-backup-source-local
spec:
groups:
- name: volsync.alerts
rules:
- alert: VolSyncBackupPodFailed
expr: |
kube_pod_container_status_last_terminated_exit_code > 0
* on(pod, namespace) group_left(owner_name)
label_replace(
kube_pod_owner{owner_kind="Job"},
"owner_name", "$1", "owner_name", "(.*)"
)
for: 1m
labels:
severity: critical
annotations:
summary: "VolSync Backup Pod failed in {{ $labels.namespace }}"
description: |
A pod for the VolSync backup of PVC 'matrix-hookshot' failed with exit code {{ $value }}.
Job: {{ $labels.owner_name }}
Namespace: {{ $labels.namespace }}