feat: add rules
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: cert-manager
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: cert-manager
|
||||
{{- include "custom.labels" . | nindent 4 }}
|
||||
spec:
|
||||
groups:
|
||||
- name: EmbeddedExporter
|
||||
rules:
|
||||
- alert: Cert-ManagerAbsent
|
||||
expr: absent(up{job="cert-manager"})
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Cert-Manager absent (instance {{ `{{ $labels.instance }}` }})
|
||||
description: "Cert-Manager has disappeared from Prometheus service discovery. New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
|
||||
- alert: Cert-ManagerCertificateExpiringSoon
|
||||
expr: avg by (exported_namespace, namespace, name) (certmanager_certificate_expiration_timestamp_seconds - time()) < (21 * 24 * 3600)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Cert-Manager certificate expiring soon (instance {{ `{{ $labels.instance }}` }})
|
||||
description: "The certificate {{ `{{ $labels.name }}` }} is expiring in less than 21 days.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
|
||||
- alert: Cert-ManagerCertificateNotReady
|
||||
expr: max by (name, exported_namespace, namespace, condition) (certmanager_certificate_ready_status{condition!="True"} == 1)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Cert-Manager certificate not ready (instance {{ `{{ $labels.instance }}` }})
|
||||
description: "The certificate {{ `{{ $labels.name }}` }} in namespace {{ `{{ $labels.exported_namespace }}` }} is not ready to serve traffic.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
|
||||
- alert: Cert-ManagerHittingACMERateLimits
|
||||
expr: sum by (host) (rate(certmanager_acme_client_request_count{status="429"}[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Cert-Manager hitting ACME rate limits (instance {{ `{{ $labels.instance }}` }})
|
||||
description: "Cert-Manager is being rate-limited by the ACME provider. Certificate issuance and renewal may be blocked for up to a week.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
|
||||
Reference in New Issue
Block a user