diff --git a/clusters/cl01tl/applications/homepage/values.yaml b/clusters/cl01tl/applications/homepage/values.yaml index f2c3d95df..278393022 100644 --- a/clusters/cl01tl/applications/homepage/values.yaml +++ b/clusters/cl01tl/applications/homepage/values.yaml @@ -379,6 +379,12 @@ homepage: query: prometheus_tsdb_storage_blocks_bytes format: type: bytes + - Gatus: + icon: sh-gatus.webp + description: Uptime Monitoring + href: https://gatus.alexlebens.net + siteMonitor: http://gatus.gatus:80 + statusStyle: dot - Tautulli: icon: sh-tautulli.webp description: Plex Monitoring diff --git a/clusters/cl01tl/monitoring/gatus/Chart.yaml b/clusters/cl01tl/monitoring/gatus/Chart.yaml new file mode 100644 index 000000000..d3188419f --- /dev/null +++ b/clusters/cl01tl/monitoring/gatus/Chart.yaml @@ -0,0 +1,29 @@ +apiVersion: v2 +name: gatus +version: 1.0.0 +description: Gatus +keywords: + - gatus + - healthcheck + - uptime + - metrics +home: https://wiki.alexlebens.dev/s/2a2b0c83-81c7-49e3-aafc-daff4ff23ce2 +sources: + - https://github.com/TwiN/gatus + - https://github.com/cloudnative-pg/cloudnative-pg + - https://github.com/TwiN/gatus/pkgs/container/gatus + - https://github.com/TwiN/helm-charts/tree/master/charts/gatus + - https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/postgres-cluster +maintainers: + - name: alexlebens +dependencies: + - name: app-template + alias: gatus + repository: https://twin.github.io/helm-charts + version: 1.2.0 + - name: postgres-cluster + alias: postgres-17-cluster + version: 6.4.2 + repository: oci://harbor.alexlebens.net/helm-charts +icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/gatus.png +appVersion: v5.12.0 diff --git a/clusters/cl01tl/monitoring/gatus/templates/external-secret.yaml b/clusters/cl01tl/monitoring/gatus/templates/external-secret.yaml new file mode 100644 index 000000000..df3089ee7 --- /dev/null +++ b/clusters/cl01tl/monitoring/gatus/templates/external-secret.yaml @@ -0,0 +1,81 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: gatus-config-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: gatus-config-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: NTFY_TOKEN + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /ntfy/user/cl01tl + metadataPolicy: None + property: token + +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: gatus-oidc-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: gatus-oidc-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: OIDC_CLIENT_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /authentik/oidc/gatus + metadataPolicy: None + property: client + - secretKey: OIDC_CLIENT_SECRET + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /authentik/oidc/gatus + metadataPolicy: None + property: secret + +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: gatus-postgresql-17-cluster-backup-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: gatus-postgresql-17-cluster-backup-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: access + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: secret diff --git a/clusters/cl01tl/monitoring/gatus/templates/http-route.yaml b/clusters/cl01tl/monitoring/gatus/templates/http-route.yaml new file mode 100644 index 000000000..6bb269ba0 --- /dev/null +++ b/clusters/cl01tl/monitoring/gatus/templates/http-route.yaml @@ -0,0 +1,28 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: http-route-gatus + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: http-route-gatus + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: traefik-gateway + namespace: traefik + hostnames: + - home.alexlebens.net + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - group: '' + kind: Service + name: gatus + port: 80 + weight: 100 diff --git a/clusters/cl01tl/monitoring/gatus/templates/service-monitor.yaml b/clusters/cl01tl/monitoring/gatus/templates/service-monitor.yaml new file mode 100644 index 000000000..1e5b0c5d4 --- /dev/null +++ b/clusters/cl01tl/monitoring/gatus/templates/service-monitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: gatus + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: gatus + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: gatus + app.kubernetes.io/instance: {{ .Release.Name }} + endpoints: + - port: http + interval: 30s + scrapeTimeout: 10s + path: /metrics diff --git a/clusters/cl01tl/monitoring/gatus/values.yaml b/clusters/cl01tl/monitoring/gatus/values.yaml new file mode 100644 index 000000000..26f245b5d --- /dev/null +++ b/clusters/cl01tl/monitoring/gatus/values.yaml @@ -0,0 +1,139 @@ +gatus: + readinessProbe: + enabled: true + livenessProbe: + enabled: true + image: + repository: ghcr.io/twin/gatus + tag: v5.17.0 + annotations: + reloader.stakater.com/auto: "true" + service: + type: ClusterIP + port: 80 + targetPort: 8080 + ingress: + enabled: false + env: + NTFY_TOKEN: + valueFrom: + secretKeyRef: + name: gatus-config-secret + key: NTFY_TOKEN + OIDC_CLIENT_ID: + valueFrom: + secretKeyRef: + name: gatus-oidc-secret + key: OIDC_CLIENT_ID + OIDC_CLIENT_SECRET: + valueFrom: + secretKeyRef: + name: gatus-oidc-secret + key: OIDC_CLIENT_SECRET + POSTGRES_USER: + valueFrom: + secretKeyRef: + name: gatus-postgresql-17-cluster-app + key: username + POSTGRES_PASSWORD: + valueFrom: + secretKeyRef: + name: gatus-postgresql-17-cluster-app + key: password + POSTGRES_HOST: + valueFrom: + secretKeyRef: + name: gatus-postgresql-17-cluster-app + key: host + POSTGRES_PORT: + valueFrom: + secretKeyRef: + name: gatus-postgresql-17-cluster-app + key: port + POSTGRES_DB: + valueFrom: + secretKeyRef: + name: gatus-postgresql-17-cluster-app + key: dbname + resources: + requests: + cpu: 10m + memory: 128Mi + persistence: + enabled: true + size: 1Gi + mountPath: /data + accessModes: + - ReadWriteOnce + finalizers: + - kubernetes.io/pvc-protection + storageClassName: ceph-block + serviceMonitor: + enabled: true + interval: 1m + path: /metrics + scheme: http + scrapeTimeout: 30s + networkPolicy: + enabled: false + config: + metrics: true + connectivity: + checker: + target: 1.1.1.1:53 + interval: 60s + alerting: + ntfy: + topic: "gatus-alerts" + priority: 3 + url: http://ntfy.ntfy + token: ${NTFY_TOKEN} + default-alert: + failure-threshold: 3 + send-on-resolved: true + click: "https://gatus.alexlebens.net" + security: + oidc: + issuer-url: https://authentik.alexlebens.net/application/o/gatus/ + client-id: ${OIDC_CLIENT_ID} + client-secret: ${OIDC_CLIENT_SECRET} + redirect-url: https://gatus.alexlebens.net/authorization-code/callback + scopes: [openid] + storage: + type: postgres + path: "postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}?sslmode=disable" + endpoints: + - name: homepage + url: https://homepage.alexlebens.net + interval: 60s + conditions: + - "[STATUS] == 200" + - "[DOMAIN_EXPIRATION] > 720h" + - "[CERTIFICATE_EXPIRATION] > 240h" + +postgres-17-cluster: + mode: standalone + cluster: + storage: + storageClass: local-path + walStorage: + storageClass: local-path + monitoring: + enabled: true + prometheusRule: + enabled: true + recovery: + method: objectStore + objectStore: + destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/gatus/gatus-postgresql-17-cluster + index: 1 + backup: + enabled: false + objectStore: + - name: external + destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/gatus/gatus-postgresql-17-cluster + index: 1 + scheduledBackups: + - name: daily-backup + schedule: "0 0 */3 * *" + backupName: external diff --git a/clusters/cl01tl/monitoring/grafana-operator/templates/grafana-dashboard.yaml b/clusters/cl01tl/monitoring/grafana-operator/templates/grafana-dashboard.yaml index 3f029d484..7b1f3883a 100644 --- a/clusters/cl01tl/monitoring/grafana-operator/templates/grafana-dashboard.yaml +++ b/clusters/cl01tl/monitoring/grafana-operator/templates/grafana-dashboard.yaml @@ -206,6 +206,25 @@ spec: resyncPeriod: 1h url: http://gitea-http.gitea:3000/alexlebens/grafana-dashboards/raw/branch/main/dashboards/service/descheduler.json +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: grafana-dashboard-gatus + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: grafana-dashboard-gatus + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + instanceSelector: + matchLabels: + app: grafana-main + contentCacheDuration: 1h + folderUID: grafana-folder-service + resyncPeriod: 1h + url: http://gitea-http.gitea:3000/alexlebens/grafana-dashboards/raw/branch/main/dashboards/service/gatus.json + --- apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml index dc5ea001d..c55be5c51 100644 --- a/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml @@ -106,6 +106,7 @@ kube-prometheus-stack: - directus - external-dns - freshrss + - gatus - generic-device-plugin - gitea - grafana-operator diff --git a/clusters/cl01tl/services/blocky/values.yaml b/clusters/cl01tl/services/blocky/values.yaml index 65ba1f4f9..2cddb1b96 100644 --- a/clusters/cl01tl/services/blocky/values.yaml +++ b/clusters/cl01tl/services/blocky/values.yaml @@ -113,6 +113,7 @@ blocky: ceph IN CNAME traefik-cl01tl code-server IN CNAME traefik-cl01tl eigenfocus IN CNAME traefik-cl01tl + gatus IN CNAME traefik-cl01tl gitea IN CNAME traefik-cl01tl grafana IN CNAME traefik-cl01tl harbor IN CNAME traefik-cl01tl diff --git a/hosts/ps08rp/blocky/config.yml b/hosts/ps08rp/blocky/config.yml index 142d53f97..606b204ac 100644 --- a/hosts/ps08rp/blocky/config.yml +++ b/hosts/ps08rp/blocky/config.yml @@ -88,6 +88,7 @@ customDNS: ceph IN CNAME traefik-cl01tl code-server IN CNAME traefik-cl01tl eigenfocus IN CNAME traefik-cl01tl + gatus IN CNAME traefik-cl01tl gitea IN CNAME traefik-cl01tl grafana IN CNAME traefik-cl01tl harbor IN CNAME traefik-cl01tl diff --git a/hosts/ps09rp/blocky/config.yml b/hosts/ps09rp/blocky/config.yml index 142d53f97..606b204ac 100644 --- a/hosts/ps09rp/blocky/config.yml +++ b/hosts/ps09rp/blocky/config.yml @@ -88,6 +88,7 @@ customDNS: ceph IN CNAME traefik-cl01tl code-server IN CNAME traefik-cl01tl eigenfocus IN CNAME traefik-cl01tl + gatus IN CNAME traefik-cl01tl gitea IN CNAME traefik-cl01tl grafana IN CNAME traefik-cl01tl harbor IN CNAME traefik-cl01tl