diff --git a/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr-4k-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr-4k-postgresql-18-cluster.yaml new file mode 100644 index 000000000..00e04bd4b --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr-4k-postgresql-18-cluster.yaml @@ -0,0 +1,84 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: radarr-4k-postgresql-18-cluster + namespace: radarr-4k + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-4k-postgresql-18 + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/cloudnative-pg/postgresql:18.1-standard-trixie" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: false + parameters: + barmanObjectName: "radarr-4k-postgresql-18-external-backup" + serverName: "radarr-4k-postgresql-18-backup-1" + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "radarr-4k-postgresql-18-garage-local-backup" + serverName: "radarr-4k-postgresql-18-backup-1" + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 100m + memory: 256Mi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 128MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + bootstrap: + initdb: + database: app + owner: app + import: + source: + externalCluster: importSource + type: monolith + databases: + - radarr-main + - radarr-log + roles: + - app + schemaOnly: false + externalClusters: + - name: importSource + connectionParameters: + host: "radarr5-4k-postgresql-17-cluster-rw" + port: "5432" + user: "postgres" + dbname: "*" + sslmode: "disable" + password: + name: radarr5-4k-postgresql-17-cluster-superuser + key: password diff --git a/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr5-4k-postgresql-17-cluster.yaml b/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr5-4k-postgresql-17-cluster.yaml index 32c1e7366..802323934 100644 --- a/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr5-4k-postgresql-17-cluster.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/Cluster-radarr5-4k-postgresql-17-cluster.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-4k-postgresql-17-cluster namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: instances: 3 @@ -54,7 +54,7 @@ spec: primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info - enableSuperuserAccess: false + enableSuperuserAccess: true enablePDB: true postgresql: parameters: diff --git a/clusters/cl01tl/manifests/radarr-4k/ExternalSecret-radarr-4k-postgresql-18-cluster-backup-secret-garage.yaml b/clusters/cl01tl/manifests/radarr-4k/ExternalSecret-radarr-4k-postgresql-18-cluster-backup-secret-garage.yaml new file mode 100644 index 000000000..959f1d17d --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/ExternalSecret-radarr-4k-postgresql-18-cluster-backup-secret-garage.yaml @@ -0,0 +1,35 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-4k-postgresql-18-cluster-backup-secret-garage + namespace: radarr-4k + labels: + app.kubernetes.io/name: radarr-4k-postgresql-18-cluster-backup-secret-garage + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr-4k/ExternalSecret-radarr-4k-postgresql-18-cluster-backup-secret.yaml b/clusters/cl01tl/manifests/radarr-4k/ExternalSecret-radarr-4k-postgresql-18-cluster-backup-secret.yaml new file mode 100644 index 000000000..44c1a88b6 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/ExternalSecret-radarr-4k-postgresql-18-cluster-backup-secret.yaml @@ -0,0 +1,28 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-4k-postgresql-18-cluster-backup-secret + namespace: radarr-4k + labels: + app.kubernetes.io/name: radarr-4k-postgresql-18-cluster-backup-secret + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: access + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: secret diff --git a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr-4k-postgresql-18-external-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr-4k-postgresql-18-external-backup.yaml new file mode 100644 index 000000000..645f541e8 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr-4k-postgresql-18-external-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-4k-postgresql-18-external-backup" + namespace: radarr-4k + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-4k-postgresql-18 + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 30d + configuration: + destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/radarr-4k/radarr-4k-postgresql-18-cluster + endpointURL: https://nyc3.digitaloceanspaces.com + s3Credentials: + accessKeyId: + name: radarr-4k-postgresql-18-cluster-backup-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-4k-postgresql-18-cluster-backup-secret + key: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr-4k-postgresql-18-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr-4k-postgresql-18-garage-local-backup.yaml new file mode 100644 index 000000000..186360abd --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr-4k-postgresql-18-garage-local-backup.yaml @@ -0,0 +1,27 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-4k-postgresql-18-garage-local-backup" + namespace: radarr-4k + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-4k-postgresql-18 + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 3d + configuration: + destinationPath: s3://postgres-backups/cl01tl/radarr-4k/radarr-4k-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: radarr-4k-postgresql-18-cluster-backup-secret-garage + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-4k-postgresql-18-cluster-backup-secret-garage + key: ACCESS_SECRET_KEY + region: + name: radarr-4k-postgresql-18-cluster-backup-secret-garage + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-external-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-external-backup.yaml index 98e01f6c7..0c0271a19 100644 --- a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-external-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-external-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-4k-postgresql-17-external-backup" namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 30d diff --git a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-garage-local-backup.yaml index 3d963b3fb..b3211ead6 100644 --- a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-garage-local-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-garage-local-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-4k-postgresql-17-garage-local-backup" namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 3d diff --git a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-recovery.yaml b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-recovery.yaml index 78f501be4..21edda6d8 100644 --- a/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-recovery.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/ObjectStore-radarr5-4k-postgresql-17-recovery.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-4k-postgresql-17-recovery" namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: configuration: diff --git a/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr-4k-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr-4k-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..65fa4d660 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr-4k-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: radarr-4k-postgresql-18-alert-rules + namespace: radarr-4k + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-4k-postgresql-18 + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/radarr-4k-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total > 300 + for: 1m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks > 10 + for: 1m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-4k"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-4k"}) < 1 + for: 5m + labels: + severity: critical + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-4k"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-4k"}) < 2 + for: 5m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr-4k", pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-4k", pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr-4k", pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-4k", pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="radarr-4k",pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="radarr-4k", pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds > 300 + for: 1m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-4k", persistentvolumeclaim=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="radarr-4k",pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age > 300000000 + for: 1m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag > 300 + for: 1m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up + for: 1m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "radarr-4k/radarr-4k-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="radarr-4k", pod=~"radarr-4k-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: radarr-4k + cnpg_cluster: radarr-4k-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr5-4k-postgresql-17-alert-rules.yaml b/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr5-4k-postgresql-17-alert-rules.yaml index 27dbb1f00..0fc0e7564 100644 --- a/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr5-4k-postgresql-17-alert-rules.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/PrometheusRule-radarr5-4k-postgresql-17-alert-rules.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-4k-postgresql-17-alert-rules namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: groups: diff --git a/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr-4k-postgresql-18-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr-4k-postgresql-18-daily-backup-scheduled-backup.yaml new file mode 100644 index 000000000..db06b6645 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr-4k-postgresql-18-daily-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-4k-postgresql-18-daily-backup-scheduled-backup" + namespace: radarr-4k + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-4k-postgresql-18 + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-4k-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-4k-postgresql-18-external-backup" diff --git a/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr-4k-postgresql-18-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr-4k-postgresql-18-live-backup-scheduled-backup.yaml new file mode 100644 index 000000000..9f61eaf99 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr-4k-postgresql-18-live-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-4k-postgresql-18-live-backup-scheduled-backup" + namespace: radarr-4k + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-4k-postgresql-18 + app.kubernetes.io/instance: radarr-4k + app.kubernetes.io/part-of: radarr-4k + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-4k-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-4k-postgresql-18-garage-local-backup" diff --git a/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-daily-backup-scheduled-backup.yaml index 21bb3cff5..5eca4762f 100644 --- a/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-daily-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-daily-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-4k-postgresql-17-daily-backup-scheduled-backup" namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: false diff --git a/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-live-backup-scheduled-backup.yaml index 8261583d7..90acff8e2 100644 --- a/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-live-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-4k/ScheduledBackup-radarr5-4k-postgresql-17-live-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-4k-postgresql-17-live-backup-scheduled-backup" namespace: radarr-4k labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-4k-postgresql-17 app.kubernetes.io/instance: radarr-4k app.kubernetes.io/part-of: radarr-4k - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: true diff --git a/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr-anime-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr-anime-postgresql-18-cluster.yaml new file mode 100644 index 000000000..9a46aec60 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr-anime-postgresql-18-cluster.yaml @@ -0,0 +1,84 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: radarr-anime-postgresql-18-cluster + namespace: radarr-anime + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-anime-postgresql-18 + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/cloudnative-pg/postgresql:18.1-standard-trixie" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: false + parameters: + barmanObjectName: "radarr-anime-postgresql-18-external-backup" + serverName: "radarr-anime-postgresql-18-backup-1" + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "radarr-anime-postgresql-18-garage-local-backup" + serverName: "radarr-anime-postgresql-18-backup-1" + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 100m + memory: 256Mi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 128MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + bootstrap: + initdb: + database: app + owner: app + import: + source: + externalCluster: importSource + type: monolith + databases: + - radarr-main + - radarr-log + roles: + - app + schemaOnly: false + externalClusters: + - name: importSource + connectionParameters: + host: "radarr5-anime-postgresql-17-cluster-rw" + port: "5432" + user: "postgres" + dbname: "*" + sslmode: "disable" + password: + name: radarr5-anime-postgresql-17-cluster-superuser + key: password diff --git a/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr5-anime-postgresql-17-cluster.yaml b/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr5-anime-postgresql-17-cluster.yaml index bb1a0741f..8678a7733 100644 --- a/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr5-anime-postgresql-17-cluster.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/Cluster-radarr5-anime-postgresql-17-cluster.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-anime-postgresql-17-cluster namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: instances: 3 @@ -54,7 +54,7 @@ spec: primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info - enableSuperuserAccess: false + enableSuperuserAccess: true enablePDB: true postgresql: parameters: diff --git a/clusters/cl01tl/manifests/radarr-anime/ExternalSecret-radarr-anime-postgresql-18-cluster-backup-secret-garage.yaml b/clusters/cl01tl/manifests/radarr-anime/ExternalSecret-radarr-anime-postgresql-18-cluster-backup-secret-garage.yaml new file mode 100644 index 000000000..19774abc4 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/ExternalSecret-radarr-anime-postgresql-18-cluster-backup-secret-garage.yaml @@ -0,0 +1,35 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-anime-postgresql-18-cluster-backup-secret-garage + namespace: radarr-anime + labels: + app.kubernetes.io/name: radarr-anime-postgresql-18-cluster-backup-secret-garage + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr-anime/ExternalSecret-radarr-anime-postgresql-18-cluster-backup-secret.yaml b/clusters/cl01tl/manifests/radarr-anime/ExternalSecret-radarr-anime-postgresql-18-cluster-backup-secret.yaml new file mode 100644 index 000000000..b6b30da6a --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/ExternalSecret-radarr-anime-postgresql-18-cluster-backup-secret.yaml @@ -0,0 +1,28 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-anime-postgresql-18-cluster-backup-secret + namespace: radarr-anime + labels: + app.kubernetes.io/name: radarr-anime-postgresql-18-cluster-backup-secret + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: access + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: secret diff --git a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr-anime-postgresql-18-external-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr-anime-postgresql-18-external-backup.yaml new file mode 100644 index 000000000..fc8aa04ee --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr-anime-postgresql-18-external-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-anime-postgresql-18-external-backup" + namespace: radarr-anime + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-anime-postgresql-18 + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 30d + configuration: + destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/radarr-anime/radarr-anime-postgresql-18-cluster + endpointURL: https://nyc3.digitaloceanspaces.com + s3Credentials: + accessKeyId: + name: radarr-anime-postgresql-18-cluster-backup-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-anime-postgresql-18-cluster-backup-secret + key: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr-anime-postgresql-18-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr-anime-postgresql-18-garage-local-backup.yaml new file mode 100644 index 000000000..708228a47 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr-anime-postgresql-18-garage-local-backup.yaml @@ -0,0 +1,27 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-anime-postgresql-18-garage-local-backup" + namespace: radarr-anime + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-anime-postgresql-18 + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 3d + configuration: + destinationPath: s3://postgres-backups/cl01tl/radarr-anime/radarr-anime-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: radarr-anime-postgresql-18-cluster-backup-secret-garage + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-anime-postgresql-18-cluster-backup-secret-garage + key: ACCESS_SECRET_KEY + region: + name: radarr-anime-postgresql-18-cluster-backup-secret-garage + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-external-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-external-backup.yaml index c515f2b6f..4c811c66e 100644 --- a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-external-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-external-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-anime-postgresql-17-external-backup" namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 30d diff --git a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-garage-local-backup.yaml index c5ab00ccd..39762ff9e 100644 --- a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-garage-local-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-garage-local-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-anime-postgresql-17-garage-local-backup" namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 3d diff --git a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-recovery.yaml b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-recovery.yaml index 2b62fcd57..d2a4917ce 100644 --- a/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-recovery.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/ObjectStore-radarr5-anime-postgresql-17-recovery.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-anime-postgresql-17-recovery" namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: configuration: diff --git a/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr-anime-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr-anime-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..daf31ff00 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr-anime-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: radarr-anime-postgresql-18-alert-rules + namespace: radarr-anime + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-anime-postgresql-18 + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/radarr-anime-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total > 300 + for: 1m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks > 10 + for: 1m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-anime"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-anime"}) < 1 + for: 5m + labels: + severity: critical + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-anime"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-anime"}) < 2 + for: 5m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr-anime", pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-anime", pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr-anime", pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-anime", pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="radarr-anime",pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="radarr-anime", pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds > 300 + for: 1m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-anime", persistentvolumeclaim=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="radarr-anime",pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age > 300000000 + for: 1m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag > 300 + for: 1m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up + for: 1m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "radarr-anime/radarr-anime-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="radarr-anime", pod=~"radarr-anime-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: radarr-anime + cnpg_cluster: radarr-anime-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr5-anime-postgresql-17-alert-rules.yaml b/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr5-anime-postgresql-17-alert-rules.yaml index cce443334..f26d7a01d 100644 --- a/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr5-anime-postgresql-17-alert-rules.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/PrometheusRule-radarr5-anime-postgresql-17-alert-rules.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-anime-postgresql-17-alert-rules namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: groups: diff --git a/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr-anime-postgresql-18-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr-anime-postgresql-18-daily-backup-scheduled-backup.yaml new file mode 100644 index 000000000..80e830fb5 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr-anime-postgresql-18-daily-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-anime-postgresql-18-daily-backup-scheduled-backup" + namespace: radarr-anime + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-anime-postgresql-18 + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-anime-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-anime-postgresql-18-external-backup" diff --git a/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr-anime-postgresql-18-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr-anime-postgresql-18-live-backup-scheduled-backup.yaml new file mode 100644 index 000000000..907e9bb00 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr-anime-postgresql-18-live-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-anime-postgresql-18-live-backup-scheduled-backup" + namespace: radarr-anime + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-anime-postgresql-18 + app.kubernetes.io/instance: radarr-anime + app.kubernetes.io/part-of: radarr-anime + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-anime-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-anime-postgresql-18-garage-local-backup" diff --git a/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-daily-backup-scheduled-backup.yaml index 2899acc8a..67b37aeee 100644 --- a/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-daily-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-daily-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-anime-postgresql-17-daily-backup-scheduled-backup" namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: false diff --git a/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-live-backup-scheduled-backup.yaml index 0eff2c6fb..c77d4beba 100644 --- a/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-live-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-anime/ScheduledBackup-radarr5-anime-postgresql-17-live-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-anime-postgresql-17-live-backup-scheduled-backup" namespace: radarr-anime labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-anime-postgresql-17 app.kubernetes.io/instance: radarr-anime app.kubernetes.io/part-of: radarr-anime - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: true diff --git a/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr-standup-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr-standup-postgresql-18-cluster.yaml new file mode 100644 index 000000000..b188487a6 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr-standup-postgresql-18-cluster.yaml @@ -0,0 +1,84 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: radarr-standup-postgresql-18-cluster + namespace: radarr-standup + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-standup-postgresql-18 + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/cloudnative-pg/postgresql:18.1-standard-trixie" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: false + parameters: + barmanObjectName: "radarr-standup-postgresql-18-external-backup" + serverName: "radarr-standup-postgresql-18-backup-1" + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "radarr-standup-postgresql-18-garage-local-backup" + serverName: "radarr-standup-postgresql-18-backup-1" + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 100m + memory: 256Mi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 128MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + bootstrap: + initdb: + database: app + owner: app + import: + source: + externalCluster: importSource + type: monolith + databases: + - radarr-main + - radarr-log + roles: + - app + schemaOnly: false + externalClusters: + - name: importSource + connectionParameters: + host: "radarr5-standup-postgresql-17-cluster-rw" + port: "5432" + user: "postgres" + dbname: "*" + sslmode: "disable" + password: + name: radarr5-standup-postgresql-17-cluster-superuser + key: password diff --git a/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr5-standup-postgresql-17-cluster.yaml b/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr5-standup-postgresql-17-cluster.yaml index f8d9edfde..4b2dc1f4b 100644 --- a/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr5-standup-postgresql-17-cluster.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/Cluster-radarr5-standup-postgresql-17-cluster.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-standup-postgresql-17-cluster namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: instances: 3 @@ -54,7 +54,7 @@ spec: primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info - enableSuperuserAccess: false + enableSuperuserAccess: true enablePDB: true postgresql: parameters: diff --git a/clusters/cl01tl/manifests/radarr-standup/ExternalSecret-radarr-standup-postgresql-18-cluster-backup-secret-garage.yaml b/clusters/cl01tl/manifests/radarr-standup/ExternalSecret-radarr-standup-postgresql-18-cluster-backup-secret-garage.yaml new file mode 100644 index 000000000..99e948f3e --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/ExternalSecret-radarr-standup-postgresql-18-cluster-backup-secret-garage.yaml @@ -0,0 +1,35 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-standup-postgresql-18-cluster-backup-secret-garage + namespace: radarr-standup + labels: + app.kubernetes.io/name: radarr-standup-postgresql-18-cluster-backup-secret-garage + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr-standup/ExternalSecret-radarr-standup-postgresql-18-cluster-backup-secret.yaml b/clusters/cl01tl/manifests/radarr-standup/ExternalSecret-radarr-standup-postgresql-18-cluster-backup-secret.yaml new file mode 100644 index 000000000..4a355829d --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/ExternalSecret-radarr-standup-postgresql-18-cluster-backup-secret.yaml @@ -0,0 +1,28 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-standup-postgresql-18-cluster-backup-secret + namespace: radarr-standup + labels: + app.kubernetes.io/name: radarr-standup-postgresql-18-cluster-backup-secret + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: access + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: secret diff --git a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr-standup-postgresql-18-external-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr-standup-postgresql-18-external-backup.yaml new file mode 100644 index 000000000..17eb2c587 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr-standup-postgresql-18-external-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-standup-postgresql-18-external-backup" + namespace: radarr-standup + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-standup-postgresql-18 + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 30d + configuration: + destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/radarr-standup/radarr-standup-postgresql-18-cluster + endpointURL: https://nyc3.digitaloceanspaces.com + s3Credentials: + accessKeyId: + name: radarr-standup-postgresql-18-cluster-backup-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-standup-postgresql-18-cluster-backup-secret + key: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr-standup-postgresql-18-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr-standup-postgresql-18-garage-local-backup.yaml new file mode 100644 index 000000000..781c6cc4c --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr-standup-postgresql-18-garage-local-backup.yaml @@ -0,0 +1,27 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-standup-postgresql-18-garage-local-backup" + namespace: radarr-standup + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-standup-postgresql-18 + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 3d + configuration: + destinationPath: s3://postgres-backups/cl01tl/radarr-standup/radarr-standup-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: radarr-standup-postgresql-18-cluster-backup-secret-garage + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-standup-postgresql-18-cluster-backup-secret-garage + key: ACCESS_SECRET_KEY + region: + name: radarr-standup-postgresql-18-cluster-backup-secret-garage + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-external-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-external-backup.yaml index 812c43105..0649bd3d3 100644 --- a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-external-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-external-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-standup-postgresql-17-external-backup" namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 30d diff --git a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-garage-local-backup.yaml index 8a4c7f55a..4acbeccd6 100644 --- a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-garage-local-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-garage-local-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-standup-postgresql-17-garage-local-backup" namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 3d diff --git a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-recovery.yaml b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-recovery.yaml index 7632a9b68..4735d5b7a 100644 --- a/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-recovery.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/ObjectStore-radarr5-standup-postgresql-17-recovery.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-standup-postgresql-17-recovery" namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: configuration: diff --git a/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr-standup-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr-standup-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..10578b5e8 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr-standup-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: radarr-standup-postgresql-18-alert-rules + namespace: radarr-standup + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-standup-postgresql-18 + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/radarr-standup-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total > 300 + for: 1m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks > 10 + for: 1m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-standup"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-standup"}) < 1 + for: 5m + labels: + severity: critical + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-standup"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-standup"}) < 2 + for: 5m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="radarr-standup",pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds > 300 + for: 1m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="radarr-standup",pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age > 300000000 + for: 1m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag > 300 + for: 1m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up + for: 1m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: radarr-standup + cnpg_cluster: radarr-standup-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr5-standup-postgresql-17-alert-rules.yaml b/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr5-standup-postgresql-17-alert-rules.yaml index 49de49299..a622e1392 100644 --- a/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr5-standup-postgresql-17-alert-rules.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/PrometheusRule-radarr5-standup-postgresql-17-alert-rules.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-standup-postgresql-17-alert-rules namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: groups: diff --git a/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr-standup-postgresql-18-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr-standup-postgresql-18-daily-backup-scheduled-backup.yaml new file mode 100644 index 000000000..c6412eb29 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr-standup-postgresql-18-daily-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-standup-postgresql-18-daily-backup-scheduled-backup" + namespace: radarr-standup + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-standup-postgresql-18 + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-standup-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-standup-postgresql-18-external-backup" diff --git a/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr-standup-postgresql-18-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr-standup-postgresql-18-live-backup-scheduled-backup.yaml new file mode 100644 index 000000000..06c9c2651 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr-standup-postgresql-18-live-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-standup-postgresql-18-live-backup-scheduled-backup" + namespace: radarr-standup + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-standup-postgresql-18 + app.kubernetes.io/instance: radarr-standup + app.kubernetes.io/part-of: radarr-standup + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-standup-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-standup-postgresql-18-garage-local-backup" diff --git a/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-daily-backup-scheduled-backup.yaml index 7abd3c632..2d93eca3f 100644 --- a/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-daily-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-daily-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-standup-postgresql-17-daily-backup-scheduled-backup" namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: false diff --git a/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-live-backup-scheduled-backup.yaml index 4fe11cfa4..cf071b604 100644 --- a/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-live-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr-standup/ScheduledBackup-radarr5-standup-postgresql-17-live-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-standup-postgresql-17-live-backup-scheduled-backup" namespace: radarr-standup labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-standup-postgresql-17 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: true diff --git a/clusters/cl01tl/manifests/radarr/Cluster-radarr-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/radarr/Cluster-radarr-postgresql-18-cluster.yaml new file mode 100644 index 000000000..c99f076ee --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/Cluster-radarr-postgresql-18-cluster.yaml @@ -0,0 +1,84 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: radarr-postgresql-18-cluster + namespace: radarr + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-postgresql-18 + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/cloudnative-pg/postgresql:18.1-standard-trixie" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: false + parameters: + barmanObjectName: "radarr-postgresql-18-external-backup" + serverName: "radarr-postgresql-18-backup-1" + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "radarr-postgresql-18-garage-local-backup" + serverName: "radarr-postgresql-18-backup-1" + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 200m + memory: 1Gi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 128MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + bootstrap: + initdb: + database: app + owner: app + import: + source: + externalCluster: importSource + type: monolith + databases: + - radarr-main + - radarr-log + roles: + - app + schemaOnly: false + externalClusters: + - name: importSource + connectionParameters: + host: "radarr5-postgresql-17-cluster-rw" + port: "5432" + user: "postgres" + dbname: "*" + sslmode: "disable" + password: + name: radarr5-postgresql-17-cluster-superuser + key: password diff --git a/clusters/cl01tl/manifests/radarr/Cluster-radarr5-postgresql-17-cluster.yaml b/clusters/cl01tl/manifests/radarr/Cluster-radarr5-postgresql-17-cluster.yaml index 8d979cc26..9a13c078a 100644 --- a/clusters/cl01tl/manifests/radarr/Cluster-radarr5-postgresql-17-cluster.yaml +++ b/clusters/cl01tl/manifests/radarr/Cluster-radarr5-postgresql-17-cluster.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-postgresql-17-cluster namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: instances: 3 @@ -54,7 +54,7 @@ spec: primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info - enableSuperuserAccess: false + enableSuperuserAccess: true enablePDB: true postgresql: parameters: diff --git a/clusters/cl01tl/manifests/radarr/ExternalSecret-radarr-postgresql-18-cluster-backup-secret-garage.yaml b/clusters/cl01tl/manifests/radarr/ExternalSecret-radarr-postgresql-18-cluster-backup-secret-garage.yaml new file mode 100644 index 000000000..026492502 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/ExternalSecret-radarr-postgresql-18-cluster-backup-secret-garage.yaml @@ -0,0 +1,35 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-postgresql-18-cluster-backup-secret-garage + namespace: radarr + labels: + app.kubernetes.io/name: radarr-postgresql-18-cluster-backup-secret-garage + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_SECRET_KEY + - secretKey: ACCESS_REGION + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/postgres-backups + metadataPolicy: None + property: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr/ExternalSecret-radarr-postgresql-18-cluster-backup-secret.yaml b/clusters/cl01tl/manifests/radarr/ExternalSecret-radarr-postgresql-18-cluster-backup-secret.yaml new file mode 100644 index 000000000..8896145bf --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/ExternalSecret-radarr-postgresql-18-cluster-backup-secret.yaml @@ -0,0 +1,28 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: radarr-postgresql-18-cluster-backup-secret + namespace: radarr + labels: + app.kubernetes.io/name: radarr-postgresql-18-cluster-backup-secret + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: access + - secretKey: ACCESS_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/postgres-backups + metadataPolicy: None + property: secret diff --git a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr-postgresql-18-external-backup.yaml b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr-postgresql-18-external-backup.yaml new file mode 100644 index 000000000..38dbb0b26 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr-postgresql-18-external-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-postgresql-18-external-backup" + namespace: radarr + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-postgresql-18 + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 30d + configuration: + destinationPath: s3://postgres-backups-ce540ddf106d186bbddca68a/cl01tl/radarr/radarr-postgresql-18-cluster + endpointURL: https://nyc3.digitaloceanspaces.com + s3Credentials: + accessKeyId: + name: radarr-postgresql-18-cluster-backup-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-postgresql-18-cluster-backup-secret + key: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr-postgresql-18-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr-postgresql-18-garage-local-backup.yaml new file mode 100644 index 000000000..b608ae424 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr-postgresql-18-garage-local-backup.yaml @@ -0,0 +1,27 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "radarr-postgresql-18-garage-local-backup" + namespace: radarr + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-postgresql-18 + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 3d + configuration: + destinationPath: s3://postgres-backups/cl01tl/radarr/radarr-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: radarr-postgresql-18-cluster-backup-secret-garage + key: ACCESS_KEY_ID + secretAccessKey: + name: radarr-postgresql-18-cluster-backup-secret-garage + key: ACCESS_SECRET_KEY + region: + name: radarr-postgresql-18-cluster-backup-secret-garage + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-external-backup.yaml b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-external-backup.yaml index 4c6fce585..58bca807c 100644 --- a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-external-backup.yaml +++ b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-external-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-postgresql-17-external-backup" namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 30d diff --git a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-garage-local-backup.yaml b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-garage-local-backup.yaml index a3a09e08c..a5371ca38 100644 --- a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-garage-local-backup.yaml +++ b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-garage-local-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-postgresql-17-garage-local-backup" namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: retentionPolicy: 3d diff --git a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-recovery.yaml b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-recovery.yaml index eda9e6f17..5e11970a2 100644 --- a/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-recovery.yaml +++ b/clusters/cl01tl/manifests/radarr/ObjectStore-radarr5-postgresql-17-recovery.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-postgresql-17-recovery" namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: configuration: diff --git a/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..de26ad2fa --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: radarr-postgresql-18-alert-rules + namespace: radarr + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-postgresql-18 + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/radarr-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total > 300 + for: 1m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks > 10 + for: 1m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr"}) < 1 + for: 5m + labels: + severity: critical + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr"}) < 2 + for: 5m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr", pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr", pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="radarr", pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr", pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="radarr",pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="radarr", pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds > 300 + for: 1m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr", persistentvolumeclaim=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="radarr",pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age > 300000000 + for: 1m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag > 300 + for: 1m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up + for: 1m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "radarr/radarr-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="radarr", pod=~"radarr-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: radarr + cnpg_cluster: radarr-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr5-postgresql-17-alert-rules.yaml b/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr5-postgresql-17-alert-rules.yaml index 280581d34..40ec1b921 100644 --- a/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr5-postgresql-17-alert-rules.yaml +++ b/clusters/cl01tl/manifests/radarr/PrometheusRule-radarr5-postgresql-17-alert-rules.yaml @@ -4,11 +4,11 @@ metadata: name: radarr5-postgresql-17-alert-rules namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: groups: diff --git a/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr-postgresql-18-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr-postgresql-18-daily-backup-scheduled-backup.yaml new file mode 100644 index 000000000..1cc15d6e9 --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr-postgresql-18-daily-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-postgresql-18-daily-backup-scheduled-backup" + namespace: radarr + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-postgresql-18 + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-postgresql-18-external-backup" diff --git a/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr-postgresql-18-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr-postgresql-18-live-backup-scheduled-backup.yaml new file mode 100644 index 000000000..0ccd6a53e --- /dev/null +++ b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr-postgresql-18-live-backup-scheduled-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "radarr-postgresql-18-live-backup-scheduled-backup" + namespace: radarr + labels: + helm.sh/chart: postgres-18-cluster-6.17.1 + app.kubernetes.io/name: radarr-postgresql-18 + app.kubernetes.io/instance: radarr + app.kubernetes.io/part-of: radarr + app.kubernetes.io/version: "6.17.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 0 0 * * *" + backupOwnerReference: self + cluster: + name: radarr-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "radarr-postgresql-18-garage-local-backup" diff --git a/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-daily-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-daily-backup-scheduled-backup.yaml index 7e5ce6662..20a610675 100644 --- a/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-daily-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-daily-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-postgresql-17-daily-backup-scheduled-backup" namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: false diff --git a/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-live-backup-scheduled-backup.yaml b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-live-backup-scheduled-backup.yaml index 647488411..eac293d0c 100644 --- a/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-live-backup-scheduled-backup.yaml +++ b/clusters/cl01tl/manifests/radarr/ScheduledBackup-radarr5-postgresql-17-live-backup-scheduled-backup.yaml @@ -4,11 +4,11 @@ metadata: name: "radarr5-postgresql-17-live-backup-scheduled-backup" namespace: radarr labels: - helm.sh/chart: postgres-17-cluster-6.16.1 + helm.sh/chart: postgres-17-cluster-6.17.1 app.kubernetes.io/name: radarr5-postgresql-17 app.kubernetes.io/instance: radarr app.kubernetes.io/part-of: radarr - app.kubernetes.io/version: "6.16.1" + app.kubernetes.io/version: "6.17.1" app.kubernetes.io/managed-by: Helm spec: immediate: true