39 lines
1.1 KiB
YAML
39 lines
1.1 KiB
YAML
groups:
|
|
- name: service
|
|
rules:
|
|
- alert: service_down
|
|
expr: up == 0
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Instance {{ $labels.job }} down"
|
|
description: "{{ $labels.job }} has been down for more than 10 minutes."
|
|
|
|
- name: infrastructure
|
|
rules:
|
|
- alert: high_load
|
|
expr: node_load1 > 8
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Instance {{ $labels.job }} under high load"
|
|
description: "{{ $labels.job }} is under high load."
|
|
|
|
- name: disk_space
|
|
rules:
|
|
- alert: disk_will_fill
|
|
expr: predict_linear(node_filesystem_free_bytes{job="host-ps03fd"}[4h], 4 * 3600) < 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
|
|
- alert: disk_10_percent_free
|
|
expr: node_exporter:node_filesystem_free_bytes:fs_used_percents >= 90
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Instance {{ $labels.instance }} is low on disk space"
|
|
description: "{{ $labels.instance }} has only {{ $value }}% free."
|