Files
infrastructure/clusters/cl01tl/manifests/gatus/gatus.yaml

1528 lines
47 KiB
YAML

---
# Source: gatus/charts/gatus/templates/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: gatus
namespace: gatus
labels:
helm.sh/chart: gatus-1.4.4
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
app.kubernetes.io/version: "v5.33.0"
app.kubernetes.io/managed-by: Helm
data:
config.yaml: |
alerting:
ntfy:
click: https://gatus.alexlebens.net
default-alert:
failure-threshold: 5
send-on-resolved: true
priority: 3
token: ${NTFY_TOKEN}
topic: gatus-alerts
url: http://ntfy.ntfy
connectivity:
checker:
interval: 60s
target: 1.1.1.1:53
default-endpoint:
alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
endpoints:
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 401'
group: core
interval: 30s
name: plex
url: http://plex.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: jellyfin
url: https://jellyfin.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: overseerr
url: https://overseerr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: yamtrack
url: https://yamtrack.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: tubearchivist
url: https://tubearchivist.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: immich
url: https://immich.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: photoview
url: https://photoview.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: audiobookshelf
url: https://audiobookshelf.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: home-assistant
url: https://home-assistant.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: actual
url: https://actual.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: ollama
url: https://ollama.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: searxng
url: https://searxng.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: roundcube
url: https://mail.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: kiwix
url: https://kiwix.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: gitea
url: https://gitea.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: home-assistant-code-server
url: https://home-assistant-code-server.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: argocd
url: https://argocd.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: komodo
url: https://komodo.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: argo-workflows
url: https://argo-workflows.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: n8n
url: https://n8n.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: omni-tools
url: https://omni-tools.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: headlamp
url: https://headlamp.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: hubble
url: https://hubble.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: grafana
url: https://grafana.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: prometheus
url: https://prometheus.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: alertmanager
url: https://alertmanager.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: tautulli
url: https://tautulli.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: jellystat
url: https://jellystat.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: authentik
url: https://authentik.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: stalwart
url: https://stalwart.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: ntfy
url: https://ntfy.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: traefik-cl01tl
url: https://traefik-cl01tl.alexlebens.net/dashboard/#/
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: harbor
url: https://harbor.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: unifi
url: https://unifi.alexlebens.net
- alerts:
- type: ntfy
client:
insecure: true
conditions:
- '[CONNECTED] == true'
group: core
interval: 30s
name: synology
url: https://synology.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
group: core
interval: 30s
name: hdhr
url: http://hdhr.alexlebens.net
- alerts:
- type: ntfy
client:
insecure: true
conditions:
- '[CONNECTED] == true'
group: core
interval: 30s
name: pikvm
url: https://pikvm.alexlebens.net/login/
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
group: core
interval: 30s
name: shelly
url: http://it05sp.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: ceph
url: https://ceph.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: pgadmin
url: https://pgadmin.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: whodb
url: https://whodb.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: vault
url: https://vault.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: sonarr
url: https://sonarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: sonarr-4k
url: https://sonarr-4k.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: sonarr-anime
url: https://sonarr-anime.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: radarr
url: https://radarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: radarr-4k
url: https://radarr-4k.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: radarr-anime
url: https://radarr-anime.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: radarr-standup
url: https://radarr-standup.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: lidarr
url: https://lidarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: lidatube
url: https://lidatube.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: slskd
url: https://slskd.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: qui
url: https://qui.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: qbittorrent
url: https://qbittorrent.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: prowlarr
url: https://prowlarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 401'
group: core
interval: 30s
name: bazarr
url: https://bazarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: huntarr
url: https://huntarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: core
interval: 30s
name: tdarr
url: https://tdarr.alexlebens.net
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: www
url: https://www.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: directus
url: https://directus.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 120s
name: postiz
url: https://postiz.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: matrix
url: https://chat.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: outline
url: https://wiki.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: vaultwarden
url: https://passwords.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: karakeep
url: https://karakeep.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 401'
group: external
interval: 30s
name: freshrss
url: https://rss.alexlebens.dev/i/
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: gitea-external
url: https://gitea.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: codeserver
url: https://codeserver.alexlebens.dev
- alerts:
- type: ntfy
conditions:
- '[STATUS] == 200'
- '[CERTIFICATE_EXPIRATION] > 240h'
group: external
interval: 30s
name: public homepage
url: https://home.alexlebens.dev
- conditions:
- '[STATUS] == 200'
- '[RESPONSE_TIME] < 400'
group: public
interval: 10s
name: discord
url: https://discord.com/app
- conditions:
- '[STATUS] == 200'
- '[RESPONSE_TIME] < 400'
group: public
interval: 10s
name: reddit
url: https://reddit.com
metrics: true
security:
oidc:
client-id: ${OIDC_CLIENT_ID}
client-secret: ${OIDC_CLIENT_SECRET}
issuer-url: https://authentik.alexlebens.net/application/o/gatus/
redirect-url: https://gatus.alexlebens.net/authorization-code/callback
scopes:
- openid
storage:
path: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}?sslmode=disable
type: postgres
---
# Source: gatus/charts/gatus/templates/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gatus
namespace: gatus
labels:
helm.sh/chart: gatus-1.4.4
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
app.kubernetes.io/version: "v5.33.0"
app.kubernetes.io/managed-by: Helm
finalizers:
- kubernetes.io/pvc-protection
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: "1Gi"
storageClassName: ceph-block
---
# Source: gatus/charts/gatus/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: gatus
namespace: gatus
labels:
helm.sh/chart: gatus-1.4.4
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
app.kubernetes.io/version: "v5.33.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
selector:
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
---
# Source: gatus/charts/gatus/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: gatus
namespace: gatus
labels:
helm.sh/chart: gatus-1.4.4
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
app.kubernetes.io/version: "v5.33.0"
app.kubernetes.io/managed-by: Helm
annotations:
reloader.stakater.com/auto: "true"
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
strategy:
type: Recreate
template:
metadata:
labels:
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
annotations:
checksum/config: 31c72cc890f0e181e18d8c8f38bec005f1ea27d18aeaebc241351d1e3d5dd21d
spec:
serviceAccountName: default
automountServiceAccountToken: false
securityContext:
fsGroup: 65534
containers:
- name: gatus
securityContext:
readOnlyRootFilesystem: true
runAsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
image: "ghcr.io/twin/gatus:v5.33.0"
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 8080
protocol: TCP
env:
- name: "NTFY_TOKEN"
valueFrom:
secretKeyRef:
key: NTFY_TOKEN
name: gatus-config-secret
- name: "OIDC_CLIENT_ID"
valueFrom:
secretKeyRef:
key: OIDC_CLIENT_ID
name: gatus-oidc-secret
- name: "OIDC_CLIENT_SECRET"
valueFrom:
secretKeyRef:
key: OIDC_CLIENT_SECRET
name: gatus-oidc-secret
- name: "POSTGRES_DB"
valueFrom:
secretKeyRef:
key: dbname
name: gatus-postgresql-17-cluster-app
- name: "POSTGRES_HOST"
valueFrom:
secretKeyRef:
key: host
name: gatus-postgresql-17-cluster-app
- name: "POSTGRES_PASSWORD"
valueFrom:
secretKeyRef:
key: password
name: gatus-postgresql-17-cluster-app
- name: "POSTGRES_PORT"
valueFrom:
secretKeyRef:
key: port
name: gatus-postgresql-17-cluster-app
- name: "POSTGRES_USER"
valueFrom:
secretKeyRef:
key: username
name: gatus-postgresql-17-cluster-app
envFrom:
- configMapRef:
name: gatus
readinessProbe:
httpGet:
path: /health
port: http
livenessProbe:
httpGet:
path: /health
port: http
resources:
requests:
cpu: 10m
memory: 128Mi
volumeMounts:
- name: gatus-config
mountPath: /config
readOnly: true
- name: gatus-data
mountPath: /data
volumes:
- name: gatus-config
configMap:
name: gatus
- name: gatus-data
persistentVolumeClaim:
claimName: gatus
---
# Source: gatus/charts/postgres-17-cluster/templates/cluster.yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: gatus-postgresql-17-cluster
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
instances: 3
imageName: "ghcr.io/cloudnative-pg/postgresql:17.7-standard-trixie"
imagePullPolicy: IfNotPresent
postgresUID: 26
postgresGID: 26
plugins:
- name: barman-cloud.cloudnative-pg.io
enabled: true
isWALArchiver: false
parameters:
barmanObjectName: "gatus-postgresql-17-external-backup"
serverName: "gatus-postgresql-17-backup-2"
- name: barman-cloud.cloudnative-pg.io
enabled: true
isWALArchiver: true
parameters:
barmanObjectName: "gatus-postgresql-17-garage-local-backup"
serverName: "gatus-postgresql-17-backup-1"
externalClusters:
- name: recovery
plugin:
name: barman-cloud.cloudnative-pg.io
parameters:
barmanObjectName: "gatus-postgresql-17-recovery"
serverName: gatus-postgresql-17-backup-1
storage:
size: 10Gi
storageClass: local-path
walStorage:
size: 2Gi
storageClass: local-path
resources:
limits:
hugepages-2Mi: 256Mi
requests:
cpu: 100m
memory: 256Mi
affinity:
enablePodAntiAffinity: true
topologyKey: kubernetes.io/hostname
primaryUpdateMethod: switchover
primaryUpdateStrategy: unsupervised
logLevel: info
enableSuperuserAccess: false
enablePDB: true
postgresql:
parameters:
hot_standby_feedback: "on"
max_slot_wal_keep_size: 2000MB
shared_buffers: 128MB
monitoring:
enablePodMonitor: true
disableDefaultQueries: false
bootstrap:
recovery:
database: app
source: gatus-postgresql-17-backup-1
externalClusters:
- name: gatus-postgresql-17-backup-1
plugin:
name: barman-cloud.cloudnative-pg.io
enabled: true
isWALArchiver: false
parameters:
barmanObjectName: "gatus-postgresql-17-recovery"
serverName: gatus-postgresql-17-backup-1
---
# Source: gatus/templates/external-secret.yaml
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gatus-config-secret
namespace: gatus
labels:
app.kubernetes.io/name: gatus-config-secret
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: NTFY_TOKEN
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /ntfy/user/cl01tl
metadataPolicy: None
property: token
---
# Source: gatus/templates/external-secret.yaml
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gatus-oidc-secret
namespace: gatus
labels:
app.kubernetes.io/name: gatus-oidc-secret
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: OIDC_CLIENT_ID
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /authentik/oidc/gatus
metadataPolicy: None
property: client
- secretKey: OIDC_CLIENT_SECRET
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /authentik/oidc/gatus
metadataPolicy: None
property: secret
---
# Source: gatus/templates/external-secret.yaml
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gatus-postgresql-17-cluster-backup-secret
namespace: gatus
labels:
app.kubernetes.io/name: gatus-postgresql-17-cluster-backup-secret
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /digital-ocean/home-infra/postgres-backups
metadataPolicy: None
property: access
- secretKey: ACCESS_SECRET_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /digital-ocean/home-infra/postgres-backups
metadataPolicy: None
property: secret
---
# Source: gatus/templates/external-secret.yaml
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gatus-postgresql-17-cluster-backup-secret-garage
namespace: gatus
labels:
app.kubernetes.io/name: gatus-postgresql-17-cluster-backup-secret-garage
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/postgres-backups
metadataPolicy: None
property: ACCESS_KEY_ID
- secretKey: ACCESS_SECRET_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/postgres-backups
metadataPolicy: None
property: ACCESS_SECRET_KEY
- secretKey: ACCESS_REGION
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/postgres-backups
metadataPolicy: None
property: ACCESS_REGION
---
# Source: gatus/templates/http-route.yaml
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: http-route-gatus
namespace: gatus
labels:
app.kubernetes.io/name: http-route-gatus
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: traefik-gateway
namespace: traefik
hostnames:
- gatus.alexlebens.net
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- group: ''
kind: Service
name: gatus
port: 80
weight: 100
---
# Source: gatus/charts/postgres-17-cluster/templates/object-store.yaml
apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: "gatus-postgresql-17-external-backup"
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
retentionPolicy: 30d
configuration:
destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/gatus/gatus-postgresql-17-cluster
endpointURL: https://nyc3.digitaloceanspaces.com
s3Credentials:
accessKeyId:
name: gatus-postgresql-17-cluster-backup-secret
key: ACCESS_KEY_ID
secretAccessKey:
name: gatus-postgresql-17-cluster-backup-secret
key: ACCESS_SECRET_KEY
---
# Source: gatus/charts/postgres-17-cluster/templates/object-store.yaml
apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: "gatus-postgresql-17-garage-local-backup"
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
retentionPolicy: 3d
configuration:
destinationPath: s3://postgres-backups/cl01tl/gatus/gatus-postgresql-17-cluster
endpointURL: http://garage-main.garage:3900
s3Credentials:
accessKeyId:
name: gatus-postgresql-17-cluster-backup-secret-garage
key: ACCESS_KEY_ID
secretAccessKey:
name: gatus-postgresql-17-cluster-backup-secret-garage
key: ACCESS_SECRET_KEY
region:
name: gatus-postgresql-17-cluster-backup-secret-garage
key: ACCESS_REGION
---
# Source: gatus/charts/postgres-17-cluster/templates/object-store.yaml
apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: "gatus-postgresql-17-recovery"
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
configuration:
destinationPath: s3://postgres-backups/cl01tl/gatus/gatus-postgresql-17-cluster
endpointURL: http://garage-main.garage:3900
wal:
compression: snappy
maxParallel: 1
data:
compression: snappy
jobs: 1
s3Credentials:
accessKeyId:
name: gatus-postgresql-17-cluster-backup-secret-garage
key: ACCESS_KEY_ID
secretAccessKey:
name: gatus-postgresql-17-cluster-backup-secret-garage
key: ACCESS_SECRET_KEY
---
# Source: gatus/charts/postgres-17-cluster/templates/prometheus-rule.yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: gatus-postgresql-17-alert-rules
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
groups:
- name: cloudnative-pg/gatus-postgresql-17
rules:
- alert: CNPGClusterBackendsWaitingWarning
annotations:
summary: CNPG Cluster a backend is waiting for longer than 5 minutes.
description: |-
Pod {{ $labels.pod }}
has been waiting for longer than 5 minutes
expr: |
cnpg_backends_waiting_total > 300
for: 1m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterDatabaseDeadlockConflictsWarning
annotations:
summary: CNPG Cluster has over 10 deadlock conflicts.
description: |-
There are over 10 deadlock conflicts in
{{ $labels.pod }}
expr: |
cnpg_pg_stat_database_deadlocks > 10
for: 1m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterHACritical
annotations:
summary: CNPG Cluster has no standby replicas!
description: |-
CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe
risk of data loss and downtime if the primary instance fails.
The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint
will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main.
This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less
instances. The replaced instance may need some time to catch-up with the cluster primary instance.
This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this
case you may want to silence it.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md
expr: |
max by (job) (cnpg_pg_replication_streaming_replicas{namespace="gatus"} - cnpg_pg_replication_is_wal_receiver_up{namespace="gatus"}) < 1
for: 5m
labels:
severity: critical
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterHAWarning
annotations:
summary: CNPG Cluster less than 2 standby replicas.
description: |-
CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting
your cluster at risk if another instance fails. The cluster is still able to operate normally, although
the `-ro` and `-r` endpoints operate at reduced capacity.
This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may
need some time to catch-up with the cluster primary instance.
This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances.
In this case you may want to silence it.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md
expr: |
max by (job) (cnpg_pg_replication_streaming_replicas{namespace="gatus"} - cnpg_pg_replication_is_wal_receiver_up{namespace="gatus"}) < 2
for: 5m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterHighConnectionsCritical
annotations:
summary: CNPG Instance maximum number of connections critical!
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of
the maximum number of connections.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md
expr: |
sum by (pod) (cnpg_backends_total{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) * 100 > 95
for: 5m
labels:
severity: critical
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterHighConnectionsWarning
annotations:
summary: CNPG Instance is approaching the maximum number of connections.
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of
the maximum number of connections.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md
expr: |
sum by (pod) (cnpg_backends_total{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) * 100 > 80
for: 5m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterHighReplicationLag
annotations:
summary: CNPG Cluster high replication lag
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" is experiencing a high replication lag of
{{`{{`}} $value {{`}}`}}ms.
High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md
expr: |
max(cnpg_pg_replication_lag{namespace="gatus",pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) * 1000 > 1000
for: 5m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterInstancesOnSameNode
annotations:
summary: CNPG Cluster instances are located on the same node.
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" has {{`{{`}} $value {{`}}`}}
instances on the same node {{`{{`}} $labels.node {{`}}`}}.
A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md
expr: |
count by (node) (kube_pod_info{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) > 1
for: 5m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterLongRunningTransactionWarning
annotations:
summary: CNPG Cluster query is taking longer than 5 minutes.
description: |-
CloudNativePG Cluster Pod {{ $labels.pod }}
is taking more than 5 minutes (300 seconds) for a query.
expr: |-
cnpg_backends_max_tx_duration_seconds > 300
for: 1m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterLowDiskSpaceCritical
annotations:
summary: CNPG Instance is running out of disk space!
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" is running extremely low on disk space. Check attached PVCs!
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md
expr: |
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"})) > 0.9 OR
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR
max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"})
/
sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"})
*
on(namespace, persistentvolumeclaim) group_left(volume)
kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}
) > 0.9
for: 5m
labels:
severity: critical
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterLowDiskSpaceWarning
annotations:
summary: CNPG Instance is running out of disk space.
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" is running low on disk space. Check attached PVCs.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md
expr: |
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"})) > 0.7 OR
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR
max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"})
/
sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"})
*
on(namespace, persistentvolumeclaim) group_left(volume)
kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}
) > 0.7
for: 5m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterOffline
annotations:
summary: CNPG Cluster has no running instances!
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" has no ready instances.
Having an offline cluster means your applications will not be able to access the database, leading to
potential service disruption and/or data loss.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md
expr: |
(count(cnpg_collector_up{namespace="gatus",pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0
for: 5m
labels:
severity: critical
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterPGDatabaseXidAgeWarning
annotations:
summary: CNPG Cluster has a number of transactions from the frozen XID to the current one.
description: |-
Over 300,000,000 transactions from frozen xid
on pod {{ $labels.pod }}
expr: |
cnpg_pg_database_xid_age > 300000000
for: 1m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterPGReplicationWarning
annotations:
summary: CNPG Cluster standby is lagging behind the primary.
description: |-
Standby is lagging behind by over 300 seconds (5 minutes)
expr: |
cnpg_pg_replication_lag > 300
for: 1m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterReplicaFailingReplicationWarning
annotations:
summary: CNPG Cluster has a replica is failing to replicate.
description: |-
Replica {{ $labels.pod }}
is failing to replicate
expr: |
cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up
for: 1m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
- alert: CNPGClusterZoneSpreadWarning
annotations:
summary: CNPG Cluster instances in the same zone.
description: |-
CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" has instances in the same availability zone.
A disaster in one availability zone will lead to a potential service disruption and/or data loss.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md
expr: |
3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3
for: 5m
labels:
severity: warning
namespace: gatus
cnpg_cluster: gatus-postgresql-17-cluster
---
# Source: gatus/charts/postgres-17-cluster/templates/scheduled-backup.yaml
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: "gatus-postgresql-17-daily-backup-scheduled-backup"
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
immediate: false
suspend: false
schedule: "0 0 0 * * *"
backupOwnerReference: self
cluster:
name: gatus-postgresql-17-cluster
method: plugin
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
parameters:
barmanObjectName: "gatus-postgresql-17-external-backup"
---
# Source: gatus/charts/postgres-17-cluster/templates/scheduled-backup.yaml
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: "gatus-postgresql-17-live-backup-scheduled-backup"
namespace: gatus
labels:
helm.sh/chart: postgres-17-cluster-6.16.0
app.kubernetes.io/name: gatus-postgresql-17
app.kubernetes.io/instance: gatus
app.kubernetes.io/part-of: gatus
app.kubernetes.io/version: "6.16.0"
app.kubernetes.io/managed-by: Helm
spec:
immediate: true
suspend: false
schedule: "0 0 0 * * *"
backupOwnerReference: self
cluster:
name: gatus-postgresql-17-cluster
method: plugin
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
parameters:
barmanObjectName: "gatus-postgresql-17-garage-local-backup"
---
# Source: gatus/charts/gatus/templates/servicemonitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: gatus
namespace: gatus
labels:
helm.sh/chart: gatus-1.4.4
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
app.kubernetes.io/version: "v5.33.0"
app.kubernetes.io/managed-by: Helm
spec:
endpoints:
- port: http
interval: 1m
scrapeTimeout: 30s
honorLabels: true
path: /metrics
scheme: http
jobLabel: "gatus"
selector:
matchLabels:
app.kubernetes.io/name: gatus
app.kubernetes.io/instance: gatus
namespaceSelector:
matchNames:
- gatus