126 lines
7.5 KiB
YAML
126 lines
7.5 KiB
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: haproxy
|
|
namespace: argocd
|
|
labels:
|
|
app.kubernetes.io/name: haproxy
|
|
app.kubernetes.io/instance: argocd
|
|
app.kubernetes.io/part-of: argocd
|
|
spec:
|
|
groups:
|
|
- name: EmbeddedExporter
|
|
rules:
|
|
- alert: HAProxyHighHTTP4xxErrorRateBackend
|
|
expr: ((sum by (proxy) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (proxy) (rate(haproxy_server_http_responses_total[1m])) > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy high HTTP 4xx error rate backend (instance {{ $labels.instance }})
|
|
description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.proxy }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyHighHTTP5xxErrorRateBackend
|
|
expr: ((sum by (proxy) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (proxy) (rate(haproxy_server_http_responses_total[1m])) > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy high HTTP 5xx error rate backend (instance {{ $labels.instance }})
|
|
description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.proxy }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyHighHTTP4xxErrorRateServer
|
|
expr: ((sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy high HTTP 4xx error rate server (instance {{ $labels.instance }})
|
|
description: "Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyHighHTTP5xxErrorRateServer
|
|
expr: ((sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy high HTTP 5xx error rate server (instance {{ $labels.instance }})
|
|
description: "Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyServerResponseErrors
|
|
expr: (sum by (server) (rate(haproxy_server_response_errors_total[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100 > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy server response errors (instance {{ $labels.instance }})
|
|
description: "Too many response errors to {{ $labels.server }} server (> 5%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyBackendConnectionErrors
|
|
expr: (sum by (proxy) (rate(haproxy_backend_connection_errors_total[1m]))) > 100
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy backend connection errors (instance {{ $labels.instance }})
|
|
description: "Too many connection errors to {{ $labels.proxy }} backend (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyServerConnectionErrors
|
|
expr: (sum by (proxy) (rate(haproxy_server_connection_errors_total[1m]))) > 100
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAProxy server connection errors (instance {{ $labels.instance }})
|
|
description: "Too many connection errors to {{ $labels.proxy }} (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyBackendMaxActiveSession>80%
|
|
expr: (haproxy_backend_current_sessions / haproxy_backend_limit_sessions * 100) > 80 and haproxy_backend_limit_sessions > 0
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: HAProxy backend max active session > 80% (instance {{ $labels.instance }})
|
|
description: "Session limit from backend {{ $labels.proxy }} reached 80% of limit - {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyPendingRequests
|
|
expr: sum by (proxy) (haproxy_backend_current_queue) > 0
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: HAProxy pending requests (instance {{ $labels.instance }})
|
|
description: "Some HAProxy requests are pending on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyHTTPSlowingDown
|
|
expr: avg by (instance, proxy) (haproxy_backend_max_total_time_seconds) > 1
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: HAProxy HTTP slowing down (instance {{ $labels.instance }})
|
|
description: "HAProxy backend max total time is above 1s on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyRetryHigh
|
|
expr: sum by (proxy) (rate(haproxy_backend_retry_warnings_total[1m])) > 10
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: HAProxy retry high (instance {{ $labels.instance }})
|
|
description: "High rate of retry on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAproxyHasNoAliveBackends
|
|
expr: haproxy_backend_active_servers + haproxy_backend_backup_servers == 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: HAproxy has no alive backends (instance {{ $labels.instance }})
|
|
description: "HAProxy has no alive active or backup backends for {{ $labels.proxy }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyFrontendSecurityBlockedRequests
|
|
expr: sum by (proxy) (rate(haproxy_frontend_denied_connections_total[2m])) > 10
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: HAProxy frontend security blocked requests (instance {{ $labels.instance }})
|
|
description: "HAProxy is blocking requests for security reason\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
- alert: HAProxyServerHealthcheckFailure
|
|
expr: increase(haproxy_server_check_failures_total[1m]) > 2
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: HAProxy server healthcheck failure (instance {{ $labels.instance }})
|
|
description: "Some server healthcheck are failing on {{ $labels.server }} ({{ $value }} in the last 1m)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|