Files
infrastructure/clusters/cl01tl/manifests/openbao/PrometheusRule-openbao.yaml

74 lines
2.9 KiB
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: openbao
namespace: openbao
labels:
helm.sh/chart: openbao-0.27.2
app.kubernetes.io/name: openbao
app.kubernetes.io/instance: openbao
app.kubernetes.io/managed-by: Helm
release: prometheus
spec:
groups:
- name: openbao
rules:
- alert: openBao-HighResponseTime
annotations:
message: The response time of OpenBao is over 500ms on average over the last 5 minutes.
expr: vault_core_handle_request{quantile="0.5", namespace="mynamespace"} > 500
for: 5m
labels:
severity: warning
- alert: openBao-HighResponseTime
annotations:
message: The response time of OpenBao is over 1s on average over the last 5 minutes.
expr: vault_core_handle_request{quantile="0.5", namespace="mynamespace"} > 1000
for: 5m
labels:
severity: critical
- alert: openBao-Sealed
annotations:
description: |-
OpenBao instance is sealed on {{ $labels.instance }}
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: OpenBao sealed (instance {{ $labels.instance }})
expr: vault_core_unsealed == 0
for: 1m
labels:
severity: critical
- alert: OpenBao-TooManyPendingTokens
annotations:
description: |-
Too many pending tokens on {{ $labels.instance }}: {{ $value }} tokens created but not yet stored.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: OpenBao too many pending tokens (instance {{ $labels.instance }})
expr: avg(vault_token_create_count - vault_token_store_count) > 0
for: 5m
labels:
severity: warning
- alert: OpenBao-TooManyInfinityTokens
annotations:
description: |-
Too many non-expiring tokens on {{ $labels.instance }}: {{ $value }} tokens with infinite TTL.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: OpenBao too many infinity tokens (instance {{ $labels.instance }})
expr: vault_token_count_by_ttl{creation_ttl="+Inf"} > 3
for: 5m
labels:
severity: warning
- alert: OpenBao-ClusterHealth
annotations:
description: |-
OpenBao cluster is not healthy: only {{ $value | humanizePercentage }} of nodes are active.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: OpenBao cluster health (instance {{ $labels.instance }})
expr: sum(vault_core_active) / count(vault_core_active) <= 0.5 and count(vault_core_active) > 0
for: 0m
labels:
severity: critical