--- # Source: kube-prometheus-stack/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: kube-prometheus-stack-kubernetes-system-apiserver namespace: kube-prometheus-stack labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: kube-prometheus-stack app.kubernetes.io/version: "79.11.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-79.11.0 release: "kube-prometheus-stack" heritage: "Helm" spec: groups: - name: kubernetes-system-apiserver rules: - alert: KubeClientCertificateExpiration annotations: description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: |- histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 and on (job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 for: 5m labels: severity: warning - alert: KubeClientCertificateExpiration annotations: description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: |- histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 and on (job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 for: 5m labels: severity: critical - alert: KubeAggregatedAPIErrors annotations: description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors summary: Kubernetes aggregated API has reported errors. expr: sum by (cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 for: 10m labels: severity: warning - alert: KubeAggregatedAPIDown annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown summary: Kubernetes aggregated API is down. expr: (1 - max by (name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85 for: 5m labels: severity: warning - alert: KubeAPIDown annotations: description: KubeAPI has disappeared from Prometheus target discovery. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapidown summary: Target disappeared from Prometheus target discovery. expr: absent(up{job="apiserver"}) for: 15m labels: severity: critical - alert: KubeAPITerminatedRequests annotations: description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests on cluster {{ $labels.cluster }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. expr: sum by (cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum by (cluster) (rate(apiserver_request_total{job="apiserver"}[10m])) + sum by (cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 for: 5m labels: severity: warning