diff --git a/clusters/cl01tl/manifests/gatus/ConfigMap-gatus.yaml b/clusters/cl01tl/manifests/gatus/ConfigMap-gatus.yaml index d7b96d7b7..19f3ac5ca 100644 --- a/clusters/cl01tl/manifests/gatus/ConfigMap-gatus.yaml +++ b/clusters/cl01tl/manifests/gatus/ConfigMap-gatus.yaml @@ -141,6 +141,15 @@ data: interval: 30s name: booklore url: https://booklore.alexlebens.net + - alerts: + - type: ntfy + conditions: + - '[STATUS] == 200' + - '[CERTIFICATE_EXPIRATION] > 240h' + group: core + interval: 30s + name: directus + url: https://directus.alexlebens.net - alerts: - type: ntfy conditions: @@ -624,8 +633,8 @@ data: - '[CERTIFICATE_EXPIRATION] > 240h' group: external interval: 30s - name: directus - url: https://directus.alexlebens.net + name: rybbit + url: https://rybbit.alexlebens.dev - alerts: - type: ntfy conditions: diff --git a/clusters/cl01tl/manifests/gatus/Deployment-gatus.yaml b/clusters/cl01tl/manifests/gatus/Deployment-gatus.yaml index a69b6c967..a9e14771a 100644 --- a/clusters/cl01tl/manifests/gatus/Deployment-gatus.yaml +++ b/clusters/cl01tl/manifests/gatus/Deployment-gatus.yaml @@ -26,7 +26,7 @@ spec: app.kubernetes.io/name: gatus app.kubernetes.io/instance: gatus annotations: - checksum/config: 43223b6ed26ee9d293f2b58589f73185e3c07145c23dcff2e741d325d650802f + checksum/config: dd594843ecccec917fff6e237e97e37e85fb8e06bd20e20e688ad4949679d8c9 spec: serviceAccountName: default automountServiceAccountToken: false diff --git a/clusters/cl01tl/manifests/homepage/ConfigMap-homepage.yaml b/clusters/cl01tl/manifests/homepage/ConfigMap-homepage.yaml index 04ef6e2b4..f09b5dded 100644 --- a/clusters/cl01tl/manifests/homepage/ConfigMap-homepage.yaml +++ b/clusters/cl01tl/manifests/homepage/ConfigMap-homepage.yaml @@ -141,6 +141,12 @@ data: href: https://directus.alexlebens.net siteMonitor: http://directus.directus:80 statusStyle: dot + - Analytics: + icon: sh-rybbit-light.webp + description: Rybbit + href: https://rybbit.alexlebens.dev + siteMonitor: http://rybbit-client.rybbit:80 + statusStyle: dot - Social Media Management: icon: sh-postiz.webp description: Postiz diff --git a/clusters/cl01tl/manifests/homepage/Deployment-homepage.yaml b/clusters/cl01tl/manifests/homepage/Deployment-homepage.yaml index 77f237eaa..797e67af1 100644 --- a/clusters/cl01tl/manifests/homepage/Deployment-homepage.yaml +++ b/clusters/cl01tl/manifests/homepage/Deployment-homepage.yaml @@ -24,7 +24,7 @@ spec: template: metadata: annotations: - checksum/configMaps: 97bf266fe7c0d76724e55614e5462a752e08472247f938336ac1880a3016275e + checksum/configMaps: 85bc0f6e9791b8fe8582bbf0bdcf1ec88404fea772e85b5c314d3c2f607bd9b0 checksum/secrets: d3ba83f111cd32f92c909268c55ad8bbd4f9e299b74b35b33c1a011180d8b378 labels: app.kubernetes.io/controller: main diff --git a/clusters/cl01tl/manifests/rybbit/Cluster-rybbit-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/rybbit/Cluster-rybbit-postgresql-18-cluster.yaml new file mode 100644 index 000000000..6c0faa361 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Cluster-rybbit-postgresql-18-cluster.yaml @@ -0,0 +1,58 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: rybbit-postgresql-18-cluster + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/cloudnative-pg/postgresql:18.1-standard-trixie" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 100m + memory: 256Mi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 128MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "rybbit-postgresql-18-backup-garage-local" + serverName: "rybbit-postgresql-18-backup-1" + bootstrap: + initdb: + database: app + owner: app + externalClusters: diff --git a/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-backend.yaml b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-backend.yaml new file mode 100644 index 000000000..50b02148c --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-backend.yaml @@ -0,0 +1,115 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rybbit-backend + labels: + app.kubernetes.io/controller: backend + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + helm.sh/chart: rybbit-4.6.2 + namespace: rybbit +spec: + revisionHistoryLimit: 3 + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/controller: backend + app.kubernetes.io/name: rybbit + app.kubernetes.io/instance: rybbit + template: + metadata: + labels: + app.kubernetes.io/controller: backend + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: rybbit + spec: + enableServiceLinks: false + serviceAccountName: default + automountServiceAccountToken: true + hostIPC: false + hostNetwork: false + hostPID: false + dnsPolicy: ClusterFirst + containers: + - env: + - name: NODE_ENV + value: production + - name: CLICKHOUSE_HOST + value: http://rybbit-clickhouse.rybbit:8123 + - name: CLICKHOUSE_DB + value: analytics + - name: CLICKHOUSE_USER + valueFrom: + secretKeyRef: + key: clickhouse-user + name: rybbit-config-secret + - name: CLICKHOUSE_PASSWORD + valueFrom: + secretKeyRef: + key: clickhouse-password + name: rybbit-config-secret + - name: POSTGRES_HOST + valueFrom: + secretKeyRef: + key: host + name: rybbit-postgresql-18-cluster-app + - name: POSTGRES_PORT + valueFrom: + secretKeyRef: + key: port + name: rybbit-postgresql-18-cluster-app + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + key: database + name: rybbit-postgresql-18-cluster-app + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: user + name: rybbit-postgresql-18-cluster-app + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: password + name: rybbit-postgresql-18-cluster-app + - name: BETTER_AUTH_SECRET + valueFrom: + secretKeyRef: + key: better-auth-secret + name: rybbit-config-secret + - name: BASE_URL + value: https://rybbit.alexlebens.dev + - name: DISABLE_SIGNUP + value: "false" + - name: DISABLE_TELEMETRY + value: "true" + - name: MAPBOX_TOKEN + valueFrom: + secretKeyRef: + key: mapbox-token + name: rybbit-config-secret + image: ghcr.io/rybbit-io/rybbit-backend:v2.4.0 + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - CMD + - wget + - --no-verbose + - --tries=1 + - --spider + - http://127.0.0.1:3001/api/health + failureThreshold: 5 + initialDelaySeconds: 10 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 5 + name: main + resources: + requests: + cpu: 10m + memory: 256Mi diff --git a/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-clickhouse.yaml b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-clickhouse.yaml new file mode 100644 index 000000000..41a9e9e8d --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-clickhouse.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rybbit-clickhouse + labels: + app.kubernetes.io/controller: clickhouse + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + helm.sh/chart: rybbit-4.6.2 + namespace: rybbit +spec: + revisionHistoryLimit: 3 + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/controller: clickhouse + app.kubernetes.io/name: rybbit + app.kubernetes.io/instance: rybbit + template: + metadata: + labels: + app.kubernetes.io/controller: clickhouse + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: rybbit + spec: + enableServiceLinks: false + serviceAccountName: default + automountServiceAccountToken: true + hostIPC: false + hostNetwork: false + hostPID: false + dnsPolicy: ClusterFirst + containers: + - env: + - name: CLICKHOUSE_DB + value: analytics + - name: CLICKHOUSE_USER + valueFrom: + secretKeyRef: + key: clickhouse-user + name: rybbit-config-secret + - name: CLICKHOUSE_PASSWORD + valueFrom: + secretKeyRef: + key: clickhouse-password + name: rybbit-config-secret + image: clickhouse/clickhouse-server:25.4.2 + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - CMD + - wget + - --no-verbose + - --tries=1 + - --spider + - http://localhost:8123/ping + failureThreshold: 5 + initialDelaySeconds: 10 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 5 + name: main + resources: + requests: + cpu: 10m + memory: 256Mi + volumeMounts: + - mountPath: /var/lib/clickhouse + name: clickhouse + volumes: + - name: clickhouse + persistentVolumeClaim: + claimName: clickhouse-data diff --git a/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-client.yaml b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-client.yaml new file mode 100644 index 000000000..a8486a858 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-client.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rybbit-client + labels: + app.kubernetes.io/controller: client + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + helm.sh/chart: rybbit-4.6.2 + namespace: rybbit +spec: + revisionHistoryLimit: 3 + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/controller: client + app.kubernetes.io/name: rybbit + app.kubernetes.io/instance: rybbit + template: + metadata: + labels: + app.kubernetes.io/controller: client + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: rybbit + spec: + enableServiceLinks: false + serviceAccountName: default + automountServiceAccountToken: true + hostIPC: false + hostNetwork: false + hostPID: false + dnsPolicy: ClusterFirst + containers: + - env: + - name: NODE_ENV + value: production + - name: NEXT_PUBLIC_BACKEND_URL + value: https://rybbit.alexlebens.dev + - name: NEXT_PUBLIC_DISABLE_SIGNUP + value: "false" + image: ghcr.io/rybbit-io/rybbit-client:v2.4.0 + imagePullPolicy: IfNotPresent + name: main + resources: + requests: + cpu: 10m + memory: 256Mi diff --git a/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-cloudflared.yaml b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-cloudflared.yaml new file mode 100644 index 000000000..7bf5348dc --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Deployment-rybbit-cloudflared.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rybbit-cloudflared + labels: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: cloudflared + app.kubernetes.io/version: 2026.2.0 + helm.sh/chart: cloudflared-2.3.0 + namespace: rybbit +spec: + revisionHistoryLimit: 3 + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/controller: main + app.kubernetes.io/name: cloudflared + app.kubernetes.io/instance: rybbit + template: + metadata: + labels: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: cloudflared + spec: + enableServiceLinks: false + serviceAccountName: default + automountServiceAccountToken: true + hostIPC: false + hostNetwork: false + hostPID: false + dnsPolicy: ClusterFirst + containers: + - args: + - tunnel + - --protocol + - http2 + - --no-autoupdate + - run + - --token + - $(CF_MANAGED_TUNNEL_TOKEN) + env: + - name: CF_MANAGED_TUNNEL_TOKEN + valueFrom: + secretKeyRef: + key: cf-tunnel-token + name: rybbit-cloudflared-secret + image: cloudflare/cloudflared:2026.2.0 + imagePullPolicy: IfNotPresent + name: main + resources: + requests: + cpu: 10m + memory: 128Mi diff --git a/clusters/cl01tl/manifests/rybbit/ExternalSecret-data-backup-secret-external.yaml b/clusters/cl01tl/manifests/rybbit/ExternalSecret-data-backup-secret-external.yaml new file mode 100644 index 000000000..2dc04b51f --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ExternalSecret-data-backup-secret-external.yaml @@ -0,0 +1,58 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: data-backup-secret-external + namespace: rybbit + labels: + helm.sh/chart: volsync-target-config-0.7.0 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "0.7.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: data-backup-secret-external +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/rybbit/data" + data: + - secretKey: BUCKET_ENDPOINT + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/digital-ocean + metadataPolicy: None + property: BUCKET_ENDPOINT + - secretKey: RESTIC_PASSWORD + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/digital-ocean + metadataPolicy: None + property: RESTIC_PASSWORD + - secretKey: AWS_DEFAULT_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/volsync-backups + metadataPolicy: None + property: AWS_DEFAULT_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/volsync-backups + metadataPolicy: None + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/volsync-backups + metadataPolicy: None + property: AWS_SECRET_ACCESS_KEY diff --git a/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-cloudflared-secret.yaml b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-cloudflared-secret.yaml new file mode 100644 index 000000000..c8f11eaf6 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-cloudflared-secret.yaml @@ -0,0 +1,24 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rybbit-cloudflared-secret + namespace: rybbit + labels: + helm.sh/chart: cloudflared-2.3.0 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "2.3.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit-cloudflared-secret +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: cf-tunnel-token + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cloudflare/tunnels/rybbit + metadataPolicy: None + property: token diff --git a/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-config-secret.yaml b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-config-secret.yaml new file mode 100644 index 000000000..68934df54 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-config-secret.yaml @@ -0,0 +1,42 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rybbit-config-secret + namespace: rybbit + labels: + app.kubernetes.io/name: rybbit-config-secret + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: clickhouse-user + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/rybbit/clickhouse + metadataPolicy: None + property: user + - secretKey: clickhouse-password + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/rybbit/clickhouse + metadataPolicy: None + property: password + - secretKey: better-auth-secret + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/rybbit/auth + metadataPolicy: None + property: better-auth-secret + - secretKey: mapbox-token + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/rybbit/auth + metadataPolicy: None + property: mapbox-token diff --git a/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-postgresql-18-backup-garage-local-secret.yaml b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-postgresql-18-backup-garage-local-secret.yaml new file mode 100644 index 000000000..354b86777 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-postgresql-18-backup-garage-local-secret.yaml @@ -0,0 +1,39 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rybbit-postgresql-18-backup-garage-local-secret + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit-postgresql-18-backup-garage-local-secret +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-postgresql-18-recovery-secret.yaml b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-postgresql-18-recovery-secret.yaml new file mode 100644 index 000000000..7bb0dfe29 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ExternalSecret-rybbit-postgresql-18-recovery-secret.yaml @@ -0,0 +1,39 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rybbit-postgresql-18-recovery-secret + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit-postgresql-18-recovery-secret +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/rybbit/ObjectStore-rybbit-postgresql-18-backup-garage-local.yaml b/clusters/cl01tl/manifests/rybbit/ObjectStore-rybbit-postgresql-18-backup-garage-local.yaml new file mode 100644 index 000000000..af42f5653 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ObjectStore-rybbit-postgresql-18-backup-garage-local.yaml @@ -0,0 +1,34 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: rybbit-postgresql-18-backup-garage-local + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit-postgresql-18-backup-garage-local +spec: + retentionPolicy: 7d + instanceSidecarConfiguration: + env: + - name: AWS_REQUEST_CHECKSUM_CALCULATION + value: when_required + - name: AWS_RESPONSE_CHECKSUM_VALIDATION + value: when_required + configuration: + destinationPath: s3://postgres-backups/cl01tl/rybbit/rybbit-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: rybbit-postgresql-18-backup-garage-local-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: rybbit-postgresql-18-backup-garage-local-secret + key: ACCESS_SECRET_KEY + region: + name: rybbit-postgresql-18-backup-garage-local-secret + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/rybbit/ObjectStore-rybbit-postgresql-18-recovery.yaml b/clusters/cl01tl/manifests/rybbit/ObjectStore-rybbit-postgresql-18-recovery.yaml new file mode 100644 index 000000000..c511ec417 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ObjectStore-rybbit-postgresql-18-recovery.yaml @@ -0,0 +1,33 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "rybbit-postgresql-18-recovery" + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: "rybbit-postgresql-18-recovery" +spec: + configuration: + destinationPath: s3://postgres-backups/cl01tl/rybbit/rybbit-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + wal: + compression: snappy + maxParallel: 1 + data: + compression: snappy + jobs: 1 + s3Credentials: + accessKeyId: + name: rybbit-postgresql-18-recovery-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: rybbit-postgresql-18-recovery-secret + key: ACCESS_SECRET_KEY + region: + name: rybbit-postgresql-18-recovery-secret + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/rybbit/PersistentVolumeClaim-clickhouse-data.yaml b/clusters/cl01tl/manifests/rybbit/PersistentVolumeClaim-clickhouse-data.yaml new file mode 100644 index 000000000..a65944027 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/PersistentVolumeClaim-clickhouse-data.yaml @@ -0,0 +1,19 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: clickhouse-data + labels: + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + helm.sh/chart: rybbit-4.6.2 + annotations: + helm.sh/resource-policy: keep + namespace: rybbit +spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "10Gi" + storageClassName: "ceph-block" diff --git a/clusters/cl01tl/manifests/rybbit/PrometheusRule-rybbit-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/rybbit/PrometheusRule-rybbit-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..c039e6ba2 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/PrometheusRule-rybbit-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: rybbit-postgresql-18-alert-rules + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/rybbit-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total{namespace="rybbit"} > 300 + for: 1m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks{namespace="rybbit"} > 10 + for: 1m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="rybbit"} - cnpg_pg_replication_is_wal_receiver_up{namespace="rybbit"}) < 1 + for: 5m + labels: + severity: critical + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="rybbit"} - cnpg_pg_replication_is_wal_receiver_up{namespace="rybbit"}) < 2 + for: 5m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="rybbit", pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="rybbit", pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="rybbit", pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="rybbit", pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="rybbit",pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="rybbit", pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds{namespace="rybbit"} > 300 + for: 1m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="rybbit", persistentvolumeclaim=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="rybbit",pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age{namespace="rybbit"} > 300000000 + for: 1m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag{namespace="rybbit"} > 300 + for: 1m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery{namespace="rybbit"} > cnpg_pg_replication_is_wal_receiver_up{namespace="rybbit"} + for: 1m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "rybbit/rybbit-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="rybbit", pod=~"rybbit-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: rybbit + cnpg_cluster: rybbit-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/rybbit/ReplicationSource-data-backup-source-external.yaml b/clusters/cl01tl/manifests/rybbit/ReplicationSource-data-backup-source-external.yaml new file mode 100644 index 000000000..12e29fe57 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ReplicationSource-data-backup-source-external.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: data-backup-source-external + namespace: rybbit + labels: + helm.sh/chart: volsync-target-config-0.7.0 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "0.7.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: data-backup +spec: + sourcePVC: data + trigger: + schedule: 0 9 * * * + restic: + pruneIntervalDays: 7 + repository: data-backup-secret-external + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 1Gi diff --git a/clusters/cl01tl/manifests/rybbit/ScheduledBackup-rybbit-postgresql-18-scheduled-backup-live-backup.yaml b/clusters/cl01tl/manifests/rybbit/ScheduledBackup-rybbit-postgresql-18-scheduled-backup-live-backup.yaml new file mode 100644 index 000000000..0212061d0 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/ScheduledBackup-rybbit-postgresql-18-scheduled-backup-live-backup.yaml @@ -0,0 +1,25 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "rybbit-postgresql-18-scheduled-backup-live-backup" + namespace: rybbit + labels: + helm.sh/chart: postgres-18-cluster-7.5.1 + app.kubernetes.io/name: rybbit-postgresql-18 + app.kubernetes.io/instance: rybbit + app.kubernetes.io/part-of: rybbit + app.kubernetes.io/version: "7.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: "rybbit-postgresql-18-scheduled-backup-live-backup" +spec: + immediate: true + suspend: true + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: rybbit-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "rybbit-postgresql-18-backup-garage-local" diff --git a/clusters/cl01tl/manifests/rybbit/Service-rybbit-backend.yaml b/clusters/cl01tl/manifests/rybbit/Service-rybbit-backend.yaml new file mode 100644 index 000000000..5458d73d6 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Service-rybbit-backend.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: rybbit-backend + labels: + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + app.kubernetes.io/service: rybbit-backend + helm.sh/chart: rybbit-4.6.2 + namespace: rybbit +spec: + type: ClusterIP + ports: + - port: 3001 + targetPort: 3001 + protocol: TCP + name: http + selector: + app.kubernetes.io/controller: backend + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: rybbit diff --git a/clusters/cl01tl/manifests/rybbit/Service-rybbit-clickhouse.yaml b/clusters/cl01tl/manifests/rybbit/Service-rybbit-clickhouse.yaml new file mode 100644 index 000000000..7e895404c --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Service-rybbit-clickhouse.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: rybbit-clickhouse + labels: + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + app.kubernetes.io/service: rybbit-clickhouse + helm.sh/chart: rybbit-4.6.2 + namespace: rybbit +spec: + type: ClusterIP + ports: + - port: 8123 + targetPort: 8123 + protocol: TCP + name: http + selector: + app.kubernetes.io/controller: clickhouse + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: rybbit diff --git a/clusters/cl01tl/manifests/rybbit/Service-rybbit-client.yaml b/clusters/cl01tl/manifests/rybbit/Service-rybbit-client.yaml new file mode 100644 index 000000000..5bb968d00 --- /dev/null +++ b/clusters/cl01tl/manifests/rybbit/Service-rybbit-client.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: rybbit-client + labels: + app.kubernetes.io/instance: rybbit + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: rybbit + app.kubernetes.io/service: rybbit-client + helm.sh/chart: rybbit-4.6.2 + namespace: rybbit +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 3002 + protocol: TCP + name: http + selector: + app.kubernetes.io/controller: client + app.kubernetes.io/instance: rybbit + app.kubernetes.io/name: rybbit