Files
infrastructure/clusters/cl01tl/manifests/rclone/PrometheusRule-web-assets-remote-rclone.yaml

31 lines
1.1 KiB
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: web-assets-remote-rclone
namespace: rclone
labels:
helm.sh/chart: rclone-web-assets-remote-0.11.3
app.kubernetes.io/instance: rclone
app.kubernetes.io/part-of: rclone
app.kubernetes.io/version: "0.11.3"
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: web-assets-remote-rclone
spec:
groups:
- name: rclone.alerts
rules:
- alert: RclonePodFailed
expr: |
(kube_pod_container_status_last_terminated_exitcode > 0)
* on(pod, namespace) group_left(owner_name)
kube_pod_owner{owner_kind="Job", owner_name=~"rclone-.*"}
for: 1m
labels:
severity: critical
annotations:
summary: "Rclone Pod failed in {{ $labels.namespace }}"
description: |
A pod for the Rclone sync of s3 bucket 'web-assets-770aef58c931fcf4' failed with exit code {{ $value }}.
Job: {{ $labels.owner_name }}
Namespace: {{ $labels.namespace }}