--- # Source: gatus/charts/gatus/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: gatus namespace: gatus labels: helm.sh/chart: gatus-1.4.4 app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus app.kubernetes.io/version: "v5.33.0" app.kubernetes.io/managed-by: Helm data: config.yaml: | alerting: ntfy: click: https://gatus.alexlebens.net default-alert: failure-threshold: 5 send-on-resolved: true priority: 3 token: ${NTFY_TOKEN} topic: gatus-alerts url: http://ntfy.ntfy connectivity: checker: interval: 60s target: 1.1.1.1:53 default-endpoint: alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s endpoints: - alerts: - type: ntfy conditions: - '[STATUS] == 401' group: core interval: 30s name: plex url: http://plex.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: jellyfin url: https://jellyfin.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: overseerr url: https://overseerr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: yamtrack url: https://yamtrack.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: tubearchivist url: https://tubearchivist.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: immich url: https://immich.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: photoview url: https://photoview.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: audiobookshelf url: https://audiobookshelf.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: home-assistant url: https://home-assistant.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: actual url: https://actual.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: ollama url: https://ollama.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: searxng url: https://searxng.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: roundcube url: https://mail.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: kiwix url: https://kiwix.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: gitea url: https://gitea.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: home-assistant-code-server url: https://home-assistant-code-server.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: argocd url: https://argocd.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: komodo url: https://komodo.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: argo-workflows url: https://argo-workflows.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: n8n url: https://n8n.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: omni-tools url: https://omni-tools.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: headlamp url: https://headlamp.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: hubble url: https://hubble.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: grafana url: https://grafana.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: prometheus url: https://prometheus.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: alertmanager url: https://alertmanager.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: tautulli url: https://tautulli.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: jellystat url: https://jellystat.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: authentik url: https://authentik.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: stalwart url: https://stalwart.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: ntfy url: https://ntfy.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: traefik-cl01tl url: https://traefik-cl01tl.alexlebens.net/dashboard/#/ - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: harbor url: https://harbor.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: unifi url: https://unifi.alexlebens.net - alerts: - type: ntfy client: insecure: true conditions: - '[CONNECTED] == true' group: core interval: 30s name: synology url: https://synology.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' group: core interval: 30s name: hdhr url: http://hdhr.alexlebens.net - alerts: - type: ntfy client: insecure: true conditions: - '[CONNECTED] == true' group: core interval: 30s name: pikvm url: https://pikvm.alexlebens.net/login/ - alerts: - type: ntfy conditions: - '[STATUS] == 200' group: core interval: 30s name: shelly url: http://it05sp.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: ceph url: https://ceph.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: pgadmin url: https://pgadmin.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: whodb url: https://whodb.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: vault url: https://vault.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: sonarr url: https://sonarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: sonarr-4k url: https://sonarr-4k.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: sonarr-anime url: https://sonarr-anime.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: radarr url: https://radarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: radarr-4k url: https://radarr-4k.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: radarr-anime url: https://radarr-anime.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: radarr-standup url: https://radarr-standup.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: lidarr url: https://lidarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: lidatube url: https://lidatube.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: slskd url: https://slskd.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: qui url: https://qui.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: qbittorrent url: https://qbittorrent.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: prowlarr url: https://prowlarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 401' group: core interval: 30s name: bazarr url: https://bazarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: huntarr url: https://huntarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: core interval: 30s name: tdarr url: https://tdarr.alexlebens.net - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: www url: https://www.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: directus url: https://directus.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 120s name: postiz url: https://postiz.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: matrix url: https://chat.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: outline url: https://wiki.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: vaultwarden url: https://passwords.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: karakeep url: https://karakeep.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 401' group: external interval: 30s name: freshrss url: https://rss.alexlebens.dev/i/ - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: gitea-external url: https://gitea.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: codeserver url: https://codeserver.alexlebens.dev - alerts: - type: ntfy conditions: - '[STATUS] == 200' - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s name: public homepage url: https://home.alexlebens.dev - conditions: - '[STATUS] == 200' - '[RESPONSE_TIME] < 400' group: public interval: 10s name: discord url: https://discord.com/app - conditions: - '[STATUS] == 200' - '[RESPONSE_TIME] < 400' group: public interval: 10s name: reddit url: https://reddit.com metrics: true security: oidc: client-id: ${OIDC_CLIENT_ID} client-secret: ${OIDC_CLIENT_SECRET} issuer-url: https://authentik.alexlebens.net/application/o/gatus/ redirect-url: https://gatus.alexlebens.net/authorization-code/callback scopes: - openid storage: path: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}?sslmode=disable type: postgres --- # Source: gatus/charts/gatus/templates/pvc.yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: gatus namespace: gatus labels: helm.sh/chart: gatus-1.4.4 app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus app.kubernetes.io/version: "v5.33.0" app.kubernetes.io/managed-by: Helm finalizers: - kubernetes.io/pvc-protection spec: accessModes: - "ReadWriteOnce" resources: requests: storage: "1Gi" storageClassName: ceph-block --- # Source: gatus/charts/gatus/templates/service.yaml apiVersion: v1 kind: Service metadata: name: gatus namespace: gatus labels: helm.sh/chart: gatus-1.4.4 app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus app.kubernetes.io/version: "v5.33.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP ports: - name: http port: 80 targetPort: http protocol: TCP selector: app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus --- # Source: gatus/charts/gatus/templates/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: gatus namespace: gatus labels: helm.sh/chart: gatus-1.4.4 app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus app.kubernetes.io/version: "v5.33.0" app.kubernetes.io/managed-by: Helm annotations: reloader.stakater.com/auto: "true" spec: replicas: 1 revisionHistoryLimit: 10 selector: matchLabels: app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus strategy: type: Recreate template: metadata: labels: app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus annotations: checksum/config: 31c72cc890f0e181e18d8c8f38bec005f1ea27d18aeaebc241351d1e3d5dd21d spec: serviceAccountName: default automountServiceAccountToken: false securityContext: fsGroup: 65534 containers: - name: gatus securityContext: readOnlyRootFilesystem: true runAsGroup: 65534 runAsNonRoot: true runAsUser: 65534 image: "ghcr.io/twin/gatus:v5.33.0" imagePullPolicy: IfNotPresent ports: - name: http containerPort: 8080 protocol: TCP env: - name: "NTFY_TOKEN" valueFrom: secretKeyRef: key: NTFY_TOKEN name: gatus-config-secret - name: "OIDC_CLIENT_ID" valueFrom: secretKeyRef: key: OIDC_CLIENT_ID name: gatus-oidc-secret - name: "OIDC_CLIENT_SECRET" valueFrom: secretKeyRef: key: OIDC_CLIENT_SECRET name: gatus-oidc-secret - name: "POSTGRES_DB" valueFrom: secretKeyRef: key: dbname name: gatus-postgresql-17-cluster-app - name: "POSTGRES_HOST" valueFrom: secretKeyRef: key: host name: gatus-postgresql-17-cluster-app - name: "POSTGRES_PASSWORD" valueFrom: secretKeyRef: key: password name: gatus-postgresql-17-cluster-app - name: "POSTGRES_PORT" valueFrom: secretKeyRef: key: port name: gatus-postgresql-17-cluster-app - name: "POSTGRES_USER" valueFrom: secretKeyRef: key: username name: gatus-postgresql-17-cluster-app envFrom: - configMapRef: name: gatus readinessProbe: httpGet: path: /health port: http livenessProbe: httpGet: path: /health port: http resources: requests: cpu: 10m memory: 128Mi volumeMounts: - name: gatus-config mountPath: /config readOnly: true - name: gatus-data mountPath: /data volumes: - name: gatus-config configMap: name: gatus - name: gatus-data persistentVolumeClaim: claimName: gatus --- # Source: gatus/charts/postgres-17-cluster/templates/cluster.yaml apiVersion: postgresql.cnpg.io/v1 kind: Cluster metadata: name: gatus-postgresql-17-cluster namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: instances: 3 imageName: "ghcr.io/cloudnative-pg/postgresql:17.7-standard-trixie" imagePullPolicy: IfNotPresent postgresUID: 26 postgresGID: 26 plugins: - name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: false parameters: barmanObjectName: "gatus-postgresql-17-external-backup" serverName: "gatus-postgresql-17-backup-2" - name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: true parameters: barmanObjectName: "gatus-postgresql-17-garage-local-backup" serverName: "gatus-postgresql-17-backup-1" externalClusters: - name: recovery plugin: name: barman-cloud.cloudnative-pg.io parameters: barmanObjectName: "gatus-postgresql-17-recovery" serverName: gatus-postgresql-17-backup-1 storage: size: 10Gi storageClass: local-path walStorage: size: 2Gi storageClass: local-path resources: limits: hugepages-2Mi: 256Mi requests: cpu: 100m memory: 256Mi affinity: enablePodAntiAffinity: true topologyKey: kubernetes.io/hostname primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info enableSuperuserAccess: false enablePDB: true postgresql: parameters: hot_standby_feedback: "on" max_slot_wal_keep_size: 2000MB shared_buffers: 128MB monitoring: enablePodMonitor: true disableDefaultQueries: false bootstrap: recovery: database: app source: gatus-postgresql-17-backup-1 externalClusters: - name: gatus-postgresql-17-backup-1 plugin: name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: false parameters: barmanObjectName: "gatus-postgresql-17-recovery" serverName: gatus-postgresql-17-backup-1 --- # Source: gatus/templates/external-secret.yaml apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: gatus-config-secret namespace: gatus labels: app.kubernetes.io/name: gatus-config-secret app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: NTFY_TOKEN remoteRef: conversionStrategy: Default decodingStrategy: None key: /ntfy/user/cl01tl metadataPolicy: None property: token --- # Source: gatus/templates/external-secret.yaml apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: gatus-oidc-secret namespace: gatus labels: app.kubernetes.io/name: gatus-oidc-secret app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: OIDC_CLIENT_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /authentik/oidc/gatus metadataPolicy: None property: client - secretKey: OIDC_CLIENT_SECRET remoteRef: conversionStrategy: Default decodingStrategy: None key: /authentik/oidc/gatus metadataPolicy: None property: secret --- # Source: gatus/templates/external-secret.yaml apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: gatus-postgresql-17-cluster-backup-secret namespace: gatus labels: app.kubernetes.io/name: gatus-postgresql-17-cluster-backup-secret app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /digital-ocean/home-infra/postgres-backups metadataPolicy: None property: access - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /digital-ocean/home-infra/postgres-backups metadataPolicy: None property: secret --- # Source: gatus/templates/external-secret.yaml apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: gatus-postgresql-17-cluster-backup-secret-garage namespace: gatus labels: app.kubernetes.io/name: gatus-postgresql-17-cluster-backup-secret-garage app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_SECRET_KEY - secretKey: ACCESS_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_REGION --- # Source: gatus/templates/http-route.yaml apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: name: http-route-gatus namespace: gatus labels: app.kubernetes.io/name: http-route-gatus app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus spec: parentRefs: - group: gateway.networking.k8s.io kind: Gateway name: traefik-gateway namespace: traefik hostnames: - gatus.alexlebens.net rules: - matches: - path: type: PathPrefix value: / backendRefs: - group: '' kind: Service name: gatus port: 80 weight: 100 --- # Source: gatus/charts/postgres-17-cluster/templates/object-store.yaml apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: "gatus-postgresql-17-external-backup" namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 30d configuration: destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/gatus/gatus-postgresql-17-cluster endpointURL: https://nyc3.digitaloceanspaces.com s3Credentials: accessKeyId: name: gatus-postgresql-17-cluster-backup-secret key: ACCESS_KEY_ID secretAccessKey: name: gatus-postgresql-17-cluster-backup-secret key: ACCESS_SECRET_KEY --- # Source: gatus/charts/postgres-17-cluster/templates/object-store.yaml apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: "gatus-postgresql-17-garage-local-backup" namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 3d configuration: destinationPath: s3://postgres-backups/cl01tl/gatus/gatus-postgresql-17-cluster endpointURL: http://garage-main.garage:3900 s3Credentials: accessKeyId: name: gatus-postgresql-17-cluster-backup-secret-garage key: ACCESS_KEY_ID secretAccessKey: name: gatus-postgresql-17-cluster-backup-secret-garage key: ACCESS_SECRET_KEY region: name: gatus-postgresql-17-cluster-backup-secret-garage key: ACCESS_REGION --- # Source: gatus/charts/postgres-17-cluster/templates/object-store.yaml apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: "gatus-postgresql-17-recovery" namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: configuration: destinationPath: s3://postgres-backups/cl01tl/gatus/gatus-postgresql-17-cluster endpointURL: http://garage-main.garage:3900 wal: compression: snappy maxParallel: 1 data: compression: snappy jobs: 1 s3Credentials: accessKeyId: name: gatus-postgresql-17-cluster-backup-secret-garage key: ACCESS_KEY_ID secretAccessKey: name: gatus-postgresql-17-cluster-backup-secret-garage key: ACCESS_SECRET_KEY --- # Source: gatus/charts/postgres-17-cluster/templates/prometheus-rule.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: gatus-postgresql-17-alert-rules namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: groups: - name: cloudnative-pg/gatus-postgresql-17 rules: - alert: CNPGClusterBackendsWaitingWarning annotations: summary: CNPG Cluster a backend is waiting for longer than 5 minutes. description: |- Pod {{ $labels.pod }} has been waiting for longer than 5 minutes expr: | cnpg_backends_waiting_total > 300 for: 1m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterDatabaseDeadlockConflictsWarning annotations: summary: CNPG Cluster has over 10 deadlock conflicts. description: |- There are over 10 deadlock conflicts in {{ $labels.pod }} expr: | cnpg_pg_stat_database_deadlocks > 10 for: 1m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterHACritical annotations: summary: CNPG Cluster has no standby replicas! description: |- CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe risk of data loss and downtime if the primary instance fails. The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less instances. The replaced instance may need some time to catch-up with the cluster primary instance. This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this case you may want to silence it. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md expr: | max by (job) (cnpg_pg_replication_streaming_replicas{namespace="gatus"} - cnpg_pg_replication_is_wal_receiver_up{namespace="gatus"}) < 1 for: 5m labels: severity: critical namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterHAWarning annotations: summary: CNPG Cluster less than 2 standby replicas. description: |- CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting your cluster at risk if another instance fails. The cluster is still able to operate normally, although the `-ro` and `-r` endpoints operate at reduced capacity. This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may need some time to catch-up with the cluster primary instance. This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. In this case you may want to silence it. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md expr: | max by (job) (cnpg_pg_replication_streaming_replicas{namespace="gatus"} - cnpg_pg_replication_is_wal_receiver_up{namespace="gatus"}) < 2 for: 5m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterHighConnectionsCritical annotations: summary: CNPG Instance maximum number of connections critical! description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of the maximum number of connections. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md expr: | sum by (pod) (cnpg_backends_total{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) * 100 > 95 for: 5m labels: severity: critical namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterHighConnectionsWarning annotations: summary: CNPG Instance is approaching the maximum number of connections. description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of the maximum number of connections. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md expr: | sum by (pod) (cnpg_backends_total{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) * 100 > 80 for: 5m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterHighReplicationLag annotations: summary: CNPG Cluster high replication lag description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" is experiencing a high replication lag of {{`{{`}} $value {{`}}`}}ms. High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md expr: | max(cnpg_pg_replication_lag{namespace="gatus",pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 for: 5m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterInstancesOnSameNode annotations: summary: CNPG Cluster instances are located on the same node. description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" has {{`{{`}} $value {{`}}`}} instances on the same node {{`{{`}} $labels.node {{`}}`}}. A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md expr: | count by (node) (kube_pod_info{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) > 1 for: 5m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterLongRunningTransactionWarning annotations: summary: CNPG Cluster query is taking longer than 5 minutes. description: |- CloudNativePG Cluster Pod {{ $labels.pod }} is taking more than 5 minutes (300 seconds) for a query. expr: |- cnpg_backends_max_tx_duration_seconds > 300 for: 1m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterLowDiskSpaceCritical annotations: summary: CNPG Instance is running out of disk space! description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" is running extremely low on disk space. Check attached PVCs! runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md expr: | max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"})) > 0.9 OR max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"}) / sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"}) * on(namespace, persistentvolumeclaim) group_left(volume) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} ) > 0.9 for: 5m labels: severity: critical namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterLowDiskSpaceWarning annotations: summary: CNPG Instance is running out of disk space. description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" is running low on disk space. Check attached PVCs. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md expr: | max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"})) > 0.7 OR max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"}) / sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="gatus", persistentvolumeclaim=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$-tbs.*"}) * on(namespace, persistentvolumeclaim) group_left(volume) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} ) > 0.7 for: 5m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterOffline annotations: summary: CNPG Cluster has no running instances! description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" has no ready instances. Having an offline cluster means your applications will not be able to access the database, leading to potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md expr: | (count(cnpg_collector_up{namespace="gatus",pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 for: 5m labels: severity: critical namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterPGDatabaseXidAgeWarning annotations: summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. description: |- Over 300,000,000 transactions from frozen xid on pod {{ $labels.pod }} expr: | cnpg_pg_database_xid_age > 300000000 for: 1m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterPGReplicationWarning annotations: summary: CNPG Cluster standby is lagging behind the primary. description: |- Standby is lagging behind by over 300 seconds (5 minutes) expr: | cnpg_pg_replication_lag > 300 for: 1m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterReplicaFailingReplicationWarning annotations: summary: CNPG Cluster has a replica is failing to replicate. description: |- Replica {{ $labels.pod }} is failing to replicate expr: | cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up for: 1m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster - alert: CNPGClusterZoneSpreadWarning annotations: summary: CNPG Cluster instances in the same zone. description: |- CloudNativePG Cluster "gatus/gatus-postgresql-17-cluster" has instances in the same availability zone. A disaster in one availability zone will lead to a potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md expr: | 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="gatus", pod=~"gatus-postgresql-17-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 for: 5m labels: severity: warning namespace: gatus cnpg_cluster: gatus-postgresql-17-cluster --- # Source: gatus/charts/postgres-17-cluster/templates/scheduled-backup.yaml apiVersion: postgresql.cnpg.io/v1 kind: ScheduledBackup metadata: name: "gatus-postgresql-17-daily-backup-scheduled-backup" namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: immediate: false suspend: false schedule: "0 0 0 * * *" backupOwnerReference: self cluster: name: gatus-postgresql-17-cluster method: plugin pluginConfiguration: name: barman-cloud.cloudnative-pg.io parameters: barmanObjectName: "gatus-postgresql-17-external-backup" --- # Source: gatus/charts/postgres-17-cluster/templates/scheduled-backup.yaml apiVersion: postgresql.cnpg.io/v1 kind: ScheduledBackup metadata: name: "gatus-postgresql-17-live-backup-scheduled-backup" namespace: gatus labels: helm.sh/chart: postgres-17-cluster-6.16.0 app.kubernetes.io/name: gatus-postgresql-17 app.kubernetes.io/instance: gatus app.kubernetes.io/part-of: gatus app.kubernetes.io/version: "6.16.0" app.kubernetes.io/managed-by: Helm spec: immediate: true suspend: false schedule: "0 0 0 * * *" backupOwnerReference: self cluster: name: gatus-postgresql-17-cluster method: plugin pluginConfiguration: name: barman-cloud.cloudnative-pg.io parameters: barmanObjectName: "gatus-postgresql-17-garage-local-backup" --- # Source: gatus/charts/gatus/templates/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: gatus namespace: gatus labels: helm.sh/chart: gatus-1.4.4 app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus app.kubernetes.io/version: "v5.33.0" app.kubernetes.io/managed-by: Helm spec: endpoints: - port: http interval: 1m scrapeTimeout: 30s honorLabels: true path: /metrics scheme: http jobLabel: "gatus" selector: matchLabels: app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus namespaceSelector: matchNames: - gatus