diff --git a/clusters/cl01tl/manifests/jellystat/-.yaml b/clusters/cl01tl/manifests/jellystat/-.yaml deleted file mode 100644 index 8b1378917..000000000 --- a/clusters/cl01tl/manifests/jellystat/-.yaml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/clusters/cl01tl/manifests/jellystat/Cluster-jellystat-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/jellystat/Cluster-jellystat-postgresql-18-cluster.yaml new file mode 100644 index 000000000..12db48253 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/Cluster-jellystat-postgresql-18-cluster.yaml @@ -0,0 +1,66 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: jellystat-postgresql-18-cluster + namespace: jellystat + labels: + app.kubernetes.io/name: jellystat-postgresql-18-cluster + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/cloudnative-pg/postgresql:18.3-standard-trixie" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 20m + memory: 80Mi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 128MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "jellystat-postgresql-18-backup-garage-local" + serverName: "jellystat-postgresql-18-backup-1" + bootstrap: + recovery: + database: app + source: jellystat-postgresql-18-backup-1 + externalClusters: + - name: jellystat-postgresql-18-backup-1 + plugin: + name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: false + parameters: + barmanObjectName: "jellystat-postgresql-18-recovery" + serverName: jellystat-postgresql-18-backup-1 diff --git a/clusters/cl01tl/manifests/jellystat/Deployment-jellystat.yaml b/clusters/cl01tl/manifests/jellystat/Deployment-jellystat.yaml new file mode 100644 index 000000000..5eb7ae4bb --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/Deployment-jellystat.yaml @@ -0,0 +1,92 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jellystat + labels: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: jellystat + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat + helm.sh/chart: jellystat-4.6.2 + namespace: jellystat +spec: + revisionHistoryLimit: 3 + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/controller: main + app.kubernetes.io/name: jellystat + app.kubernetes.io/instance: jellystat + template: + metadata: + labels: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: jellystat + app.kubernetes.io/name: jellystat + spec: + enableServiceLinks: false + serviceAccountName: default + automountServiceAccountToken: true + hostIPC: false + hostNetwork: false + hostPID: false + dnsPolicy: ClusterFirst + containers: + - env: + - name: TZ + value: America/Chicago + - name: JWT_SECRET + valueFrom: + secretKeyRef: + key: secret-key + name: jellystat-secret + - name: JS_USER + valueFrom: + secretKeyRef: + key: user + name: jellystat-secret + - name: JS_PASSWORD + valueFrom: + secretKeyRef: + key: password + name: jellystat-secret + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: username + name: jellystat-postgresql-18-cluster-app + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: password + name: jellystat-postgresql-18-cluster-app + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + key: dbname + name: jellystat-postgresql-18-cluster-app + - name: POSTGRES_IP + valueFrom: + secretKeyRef: + key: host + name: jellystat-postgresql-18-cluster-app + - name: POSTGRES_PORT + valueFrom: + secretKeyRef: + key: port + name: jellystat-postgresql-18-cluster-app + image: ghcr.io/cyfershepard/jellystat:1.1.9@sha256:f7f56aabad139faa996b8bb21a36dd3e65f7c87e10408921815b95a28a4efbaf + name: main + resources: + requests: + cpu: 10m + memory: 400Mi + volumeMounts: + - mountPath: /app/backend/backup-data + name: data + volumes: + - name: data + persistentVolumeClaim: + claimName: jellystat-data diff --git a/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-external.yaml b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-external.yaml new file mode 100644 index 000000000..7a9d50432 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-external.yaml @@ -0,0 +1,58 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: jellystat-data-backup-secret-external + namespace: jellystat + labels: + helm.sh/chart: volsync-target-data-0.8.0 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "0.8.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-data-backup-secret-external +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/jellystat/jellystat-data" + data: + - secretKey: BUCKET_ENDPOINT + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/digital-ocean + metadataPolicy: None + property: BUCKET_ENDPOINT + - secretKey: RESTIC_PASSWORD + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/digital-ocean + metadataPolicy: None + property: RESTIC_PASSWORD + - secretKey: AWS_DEFAULT_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/volsync-backups + metadataPolicy: None + property: AWS_DEFAULT_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/volsync-backups + metadataPolicy: None + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/volsync-backups + metadataPolicy: None + property: AWS_SECRET_ACCESS_KEY diff --git a/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-local.yaml b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-local.yaml new file mode 100644 index 000000000..81697c742 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-local.yaml @@ -0,0 +1,58 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: jellystat-data-backup-secret-local + namespace: jellystat + labels: + helm.sh/chart: volsync-target-data-0.8.0 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "0.8.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-data-backup-secret-local +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/jellystat/jellystat-data" + data: + - secretKey: BUCKET_ENDPOINT + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/garage-local + metadataPolicy: None + property: BUCKET_ENDPOINT + - secretKey: RESTIC_PASSWORD + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/garage-local + metadataPolicy: None + property: RESTIC_PASSWORD + - secretKey: AWS_DEFAULT_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/volsync-backups + metadataPolicy: None + property: ACCESS_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/volsync-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/volsync-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-remote.yaml b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-remote.yaml new file mode 100644 index 000000000..118d0de1f --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-data-backup-secret-remote.yaml @@ -0,0 +1,58 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: jellystat-data-backup-secret-remote + namespace: jellystat + labels: + helm.sh/chart: volsync-target-data-0.8.0 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "0.8.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-data-backup-secret-remote +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/jellystat/jellystat-data" + data: + - secretKey: BUCKET_ENDPOINT + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/garage-remote + metadataPolicy: None + property: BUCKET_ENDPOINT + - secretKey: RESTIC_PASSWORD + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /volsync/restic/garage-remote + metadataPolicy: None + property: RESTIC_PASSWORD + - secretKey: AWS_DEFAULT_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/volsync-backups + metadataPolicy: None + property: ACCESS_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/volsync-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/volsync-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-postgresql-18-backup-garage-local-secret.yaml b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-postgresql-18-backup-garage-local-secret.yaml new file mode 100644 index 000000000..b0891b1b7 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-postgresql-18-backup-garage-local-secret.yaml @@ -0,0 +1,38 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: jellystat-postgresql-18-backup-garage-local-secret + namespace: jellystat + labels: + app.kubernetes.io/name: jellystat-postgresql-18-backup-garage-local-secret + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-postgresql-18-recovery-secret.yaml b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-postgresql-18-recovery-secret.yaml new file mode 100644 index 000000000..f35a6abd8 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-postgresql-18-recovery-secret.yaml @@ -0,0 +1,38 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: jellystat-postgresql-18-recovery-secret + namespace: jellystat + labels: + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-postgresql-18-recovery-secret +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-secret.yaml b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-secret.yaml new file mode 100644 index 000000000..572a849d9 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ExternalSecret-jellystat-secret.yaml @@ -0,0 +1,26 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: jellystat-secret + namespace: jellystat + labels: + app.kubernetes.io/name: jellystat-secret + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: secret-key + remoteRef: + key: /cl01tl/jellystat/auth + property: secret-key + - secretKey: user + remoteRef: + key: /cl01tl/jellystat/auth + property: user + - secretKey: password + remoteRef: + key: /cl01tl/jellystat/auth + property: password diff --git a/clusters/cl01tl/manifests/jellystat/HTTPRoute-jellystat.yaml b/clusters/cl01tl/manifests/jellystat/HTTPRoute-jellystat.yaml new file mode 100644 index 000000000..c8c409790 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/HTTPRoute-jellystat.yaml @@ -0,0 +1,30 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: jellystat + labels: + app.kubernetes.io/instance: jellystat + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat + helm.sh/chart: jellystat-4.6.2 + namespace: jellystat +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: traefik-gateway + namespace: traefik + hostnames: + - "jellystat.alexlebens.net" + rules: + - backendRefs: + - group: "" + kind: Service + name: jellystat + namespace: jellystat + port: 80 + weight: 1 + matches: + - path: + type: PathPrefix + value: / diff --git a/clusters/cl01tl/manifests/jellystat/ObjectStore-jellystat-postgresql-18-backup-garage-local.yaml b/clusters/cl01tl/manifests/jellystat/ObjectStore-jellystat-postgresql-18-backup-garage-local.yaml new file mode 100644 index 000000000..af7dc454d --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ObjectStore-jellystat-postgresql-18-backup-garage-local.yaml @@ -0,0 +1,33 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: jellystat-postgresql-18-backup-garage-local + namespace: jellystat + labels: + app.kubernetes.io/name: jellystat-postgresql-18-backup-garage-local + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 7d + instanceSidecarConfiguration: + env: + - name: AWS_REQUEST_CHECKSUM_CALCULATION + value: when_required + - name: AWS_RESPONSE_CHECKSUM_VALIDATION + value: when_required + configuration: + destinationPath: s3://postgres-backups/cl01tl/jellystat/jellystat-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: jellystat-postgresql-18-backup-garage-local-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: jellystat-postgresql-18-backup-garage-local-secret + key: ACCESS_SECRET_KEY + region: + name: jellystat-postgresql-18-backup-garage-local-secret + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/jellystat/ObjectStore-jellystat-postgresql-18-recovery.yaml b/clusters/cl01tl/manifests/jellystat/ObjectStore-jellystat-postgresql-18-recovery.yaml new file mode 100644 index 000000000..8ede8f578 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ObjectStore-jellystat-postgresql-18-recovery.yaml @@ -0,0 +1,32 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "jellystat-postgresql-18-recovery" + namespace: jellystat + labels: + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: "jellystat-postgresql-18-recovery" +spec: + configuration: + destinationPath: s3://postgres-backups/cl01tl/jellystat/jellystat-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + wal: + compression: snappy + maxParallel: 1 + data: + compression: snappy + jobs: 1 + s3Credentials: + accessKeyId: + name: jellystat-postgresql-18-recovery-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: jellystat-postgresql-18-recovery-secret + key: ACCESS_SECRET_KEY + region: + name: jellystat-postgresql-18-recovery-secret + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/jellystat/PersistentVolumeClaim-jellystat-data.yaml b/clusters/cl01tl/manifests/jellystat/PersistentVolumeClaim-jellystat-data.yaml new file mode 100644 index 000000000..b386d6ce0 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/PersistentVolumeClaim-jellystat-data.yaml @@ -0,0 +1,17 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: jellystat-data + labels: + app.kubernetes.io/instance: jellystat + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat + helm.sh/chart: jellystat-4.6.2 + namespace: jellystat +spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "5Gi" + storageClassName: "ceph-block" diff --git a/clusters/cl01tl/manifests/jellystat/PrometheusRule-jellystat-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/jellystat/PrometheusRule-jellystat-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..f7fb67fcd --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/PrometheusRule-jellystat-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: jellystat-postgresql-18-alert-rules + namespace: jellystat + labels: + app.kubernetes.io/name: jellystat-postgresql-18-alert-rules + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/jellystat-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total{namespace="jellystat"} > 300 + for: 1m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks{namespace="jellystat"} > 10 + for: 1m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="jellystat"} - cnpg_pg_replication_is_wal_receiver_up{namespace="jellystat"}) < 1 + for: 5m + labels: + severity: critical + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="jellystat"} - cnpg_pg_replication_is_wal_receiver_up{namespace="jellystat"}) < 2 + for: 5m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="jellystat", pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="jellystat", pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="jellystat", pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="jellystat", pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="jellystat",pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="jellystat", pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds{namespace="jellystat"} > 300 + for: 1m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="jellystat", persistentvolumeclaim=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="jellystat",pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age{namespace="jellystat"} > 300000000 + for: 1m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag{namespace="jellystat"} > 300 + for: 1m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery{namespace="jellystat"} > cnpg_pg_replication_is_wal_receiver_up{namespace="jellystat"} + for: 1m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "jellystat/jellystat-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="jellystat", pod=~"jellystat-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: jellystat + cnpg_cluster: jellystat-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-external.yaml b/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-external.yaml new file mode 100644 index 000000000..f454ce3b8 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-external.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: jellystat-data-backup-source-external + namespace: jellystat + labels: + helm.sh/chart: volsync-target-data-0.8.0 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "0.8.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-data-backup +spec: + sourcePVC: jellystat-data + trigger: + schedule: 28 10 * * * + restic: + pruneIntervalDays: 7 + repository: jellystat-data-backup-secret-external + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 1Gi diff --git a/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-local.yaml b/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-local.yaml new file mode 100644 index 000000000..66a18105a --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-local.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: jellystat-data-backup-source-local + namespace: jellystat + labels: + helm.sh/chart: volsync-target-data-0.8.0 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "0.8.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-data-backup +spec: + sourcePVC: jellystat-data + trigger: + schedule: 28 8 * * * + restic: + pruneIntervalDays: 7 + repository: jellystat-data-backup-secret-local + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 1Gi diff --git a/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-remote.yaml b/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-remote.yaml new file mode 100644 index 000000000..c821fb142 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ReplicationSource-jellystat-data-backup-source-remote.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: jellystat-data-backup-source-remote + namespace: jellystat + labels: + helm.sh/chart: volsync-target-data-0.8.0 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "0.8.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat-data-backup +spec: + sourcePVC: jellystat-data + trigger: + schedule: 28 9 * * * + restic: + pruneIntervalDays: 7 + repository: jellystat-data-backup-secret-remote + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 1Gi diff --git a/clusters/cl01tl/manifests/jellystat/ScheduledBackup-jellystat-postgresql-18-scheduled-backup-live-backup.yaml b/clusters/cl01tl/manifests/jellystat/ScheduledBackup-jellystat-postgresql-18-scheduled-backup-live-backup.yaml new file mode 100644 index 000000000..015fd95c8 --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/ScheduledBackup-jellystat-postgresql-18-scheduled-backup-live-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "jellystat-postgresql-18-scheduled-backup-live-backup" + namespace: jellystat + labels: + app.kubernetes.io/name: "jellystat-postgresql-18-scheduled-backup-live-backup" + helm.sh/chart: postgres-18-cluster-7.11.1 + app.kubernetes.io/instance: jellystat + app.kubernetes.io/part-of: jellystat + app.kubernetes.io/version: "7.11.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 45 14 * * *" + backupOwnerReference: self + cluster: + name: jellystat-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "jellystat-postgresql-18-backup-garage-local" diff --git a/clusters/cl01tl/manifests/jellystat/Service-jellystat.yaml b/clusters/cl01tl/manifests/jellystat/Service-jellystat.yaml new file mode 100644 index 000000000..1f1af641f --- /dev/null +++ b/clusters/cl01tl/manifests/jellystat/Service-jellystat.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: jellystat + labels: + app.kubernetes.io/instance: jellystat + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: jellystat + app.kubernetes.io/service: jellystat + helm.sh/chart: jellystat-4.6.2 + namespace: jellystat +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 3000 + protocol: TCP + name: http + selector: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: jellystat + app.kubernetes.io/name: jellystat