apiVersion: postgresql.cnpg.io/v1 kind: Cluster metadata: name: radarr-standup-postgresql-18-cluster namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm spec: instances: 3 imageName: "ghcr.io/cloudnative-pg/postgresql:18.3-standard-trixie" imagePullPolicy: IfNotPresent postgresUID: 26 postgresGID: 26 storage: size: 10Gi storageClass: local-path walStorage: size: 2Gi storageClass: local-path resources: limits: hugepages-2Mi: 256Mi requests: cpu: 100m memory: 256Mi affinity: enablePodAntiAffinity: true topologyKey: kubernetes.io/hostname primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info enableSuperuserAccess: false enablePDB: true postgresql: parameters: hot_standby_feedback: "on" max_slot_wal_keep_size: 2000MB shared_buffers: 128MB monitoring: enablePodMonitor: true disableDefaultQueries: false plugins: - name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: true parameters: barmanObjectName: "radarr-standup-postgresql-18-backup-garage-local" serverName: "radarr-standup-postgresql-18-backup-1" bootstrap: recovery: database: app source: radarr-standup-postgresql-18-backup-1 externalClusters: - name: radarr-standup-postgresql-18-backup-1 plugin: name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: false parameters: barmanObjectName: "radarr-standup-postgresql-18-recovery" serverName: radarr-standup-postgresql-18-backup-1 --- apiVersion: apps/v1 kind: Deployment metadata: name: radarr-standup labels: app.kubernetes.io/controller: main app.kubernetes.io/instance: radarr-standup app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup helm.sh/chart: radarr-standup-4.6.2 namespace: radarr-standup spec: revisionHistoryLimit: 3 replicas: 1 strategy: type: Recreate selector: matchLabels: app.kubernetes.io/controller: main app.kubernetes.io/name: radarr-standup app.kubernetes.io/instance: radarr-standup template: metadata: labels: app.kubernetes.io/controller: main app.kubernetes.io/instance: radarr-standup app.kubernetes.io/name: radarr-standup spec: enableServiceLinks: false serviceAccountName: default automountServiceAccountToken: true securityContext: fsGroup: 1000 fsGroupChangePolicy: OnRootMismatch hostIPC: false hostNetwork: false hostPID: false dnsPolicy: ClusterFirst containers: - env: - name: TZ value: US/Central - name: PUID value: "1000" - name: PGID value: "1000" image: ghcr.io/linuxserver/radarr:6.0.4@sha256:ca43905eaf2dd11425efdcfe184892e43806b1ae0a830440c825cecbc2629cfb imagePullPolicy: IfNotPresent name: main resources: requests: cpu: 10m memory: 256Mi volumeMounts: - mountPath: /config name: config - mountPath: /mnt/store name: media - args: - radarr env: - name: URL value: http://localhost - name: CONFIG value: /config/config.xml - name: PORT value: "9793" - name: ENABLE_ADDITIONAL_METRICS value: "false" - name: ENABLE_UNKNOWN_QUEUE_ITEMS value: "false" image: ghcr.io/onedr0p/exportarr:v2.3.0 imagePullPolicy: IfNotPresent name: metrics resources: requests: cpu: 10m memory: 128Mi volumeMounts: - mountPath: /config name: config readOnly: true volumes: - name: config persistentVolumeClaim: claimName: radarr-standup-config - name: media persistentVolumeClaim: claimName: radarr-standup-nfs-storage --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: radarr-standup-config-backup-secret-external namespace: radarr-standup labels: helm.sh/chart: volsync-target-config-0.8.0 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "0.8.0" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-config-backup-secret-external spec: secretStoreRef: kind: ClusterSecretStore name: vault target: template: mergePolicy: Merge engineVersion: v2 data: RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/radarr-standup/radarr-standup-config" data: - secretKey: BUCKET_ENDPOINT remoteRef: conversionStrategy: Default decodingStrategy: None key: /volsync/restic/digital-ocean metadataPolicy: None property: BUCKET_ENDPOINT - secretKey: RESTIC_PASSWORD remoteRef: conversionStrategy: Default decodingStrategy: None key: /volsync/restic/digital-ocean metadataPolicy: None property: RESTIC_PASSWORD - secretKey: AWS_DEFAULT_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /digital-ocean/home-infra/volsync-backups metadataPolicy: None property: AWS_DEFAULT_REGION - secretKey: AWS_ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /digital-ocean/home-infra/volsync-backups metadataPolicy: None property: AWS_ACCESS_KEY_ID - secretKey: AWS_SECRET_ACCESS_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /digital-ocean/home-infra/volsync-backups metadataPolicy: None property: AWS_SECRET_ACCESS_KEY --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: radarr-standup-config-backup-secret-local namespace: radarr-standup labels: helm.sh/chart: volsync-target-config-0.8.0 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "0.8.0" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-config-backup-secret-local spec: secretStoreRef: kind: ClusterSecretStore name: vault target: template: mergePolicy: Merge engineVersion: v2 data: RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/radarr-standup/radarr-standup-config" data: - secretKey: BUCKET_ENDPOINT remoteRef: conversionStrategy: Default decodingStrategy: None key: /volsync/restic/garage-local metadataPolicy: None property: BUCKET_ENDPOINT - secretKey: RESTIC_PASSWORD remoteRef: conversionStrategy: Default decodingStrategy: None key: /volsync/restic/garage-local metadataPolicy: None property: RESTIC_PASSWORD - secretKey: AWS_DEFAULT_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/volsync-backups metadataPolicy: None property: ACCESS_REGION - secretKey: AWS_ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/volsync-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: AWS_SECRET_ACCESS_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/volsync-backups metadataPolicy: None property: ACCESS_SECRET_KEY --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: radarr-standup-config-backup-secret-remote namespace: radarr-standup labels: helm.sh/chart: volsync-target-config-0.8.0 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "0.8.0" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-config-backup-secret-remote spec: secretStoreRef: kind: ClusterSecretStore name: vault target: template: mergePolicy: Merge engineVersion: v2 data: RESTIC_REPOSITORY: "{{ .BUCKET_ENDPOINT }}/radarr-standup/radarr-standup-config" data: - secretKey: BUCKET_ENDPOINT remoteRef: conversionStrategy: Default decodingStrategy: None key: /volsync/restic/garage-remote metadataPolicy: None property: BUCKET_ENDPOINT - secretKey: RESTIC_PASSWORD remoteRef: conversionStrategy: Default decodingStrategy: None key: /volsync/restic/garage-remote metadataPolicy: None property: RESTIC_PASSWORD - secretKey: AWS_DEFAULT_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/volsync-backups metadataPolicy: None property: ACCESS_REGION - secretKey: AWS_ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/volsync-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: AWS_SECRET_ACCESS_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/volsync-backups metadataPolicy: None property: ACCESS_SECRET_KEY --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: radarr-standup-postgresql-18-backup-garage-local-secret namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-postgresql-18-backup-garage-local-secret spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_REGION - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_SECRET_KEY --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: radarr-standup-postgresql-18-recovery-secret namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-postgresql-18-recovery-secret spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_REGION - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_SECRET_KEY --- apiVersion: gateway.networking.k8s.io/v1alpha2 kind: HTTPRoute metadata: name: radarr-standup labels: app.kubernetes.io/instance: radarr-standup app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup helm.sh/chart: radarr-standup-4.6.2 namespace: radarr-standup spec: parentRefs: - group: gateway.networking.k8s.io kind: Gateway name: traefik-gateway namespace: traefik hostnames: - "radarr-standup.alexlebens.net" rules: - backendRefs: - group: "" kind: Service name: ak-outpost-traefik-proxy-auth namespace: authentik port: 9000 weight: 100 matches: - path: type: PathPrefix value: /outpost.goauthentik.io - backendRefs: - group: "" kind: Service name: radarr-standup namespace: radarr-standup port: 80 weight: 100 matches: - path: type: PathPrefix value: / filters: - extensionRef: group: traefik.io kind: Middleware name: oidc-forward-auth type: ExtensionRef --- apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: name: oidc-forward-auth namespace: radarr-standup labels: app.kubernetes.io/name: oidc-forward-auth app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup spec: forwardAuth: address: http://ak-outpost-traefik-proxy-auth.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/traefik trustForwardHeader: true authResponseHeaders: - X-authentik-username - X-authentik-groups - X-authentik-entitlements - X-authentik-email - X-authentik-name - X-authentik-uid - X-authentik-jwt - X-authentik-meta-jwks - X-authentik-meta-outpost - X-authentik-meta-provider - X-authentik-meta-app - X-authentik-meta-version --- apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: radarr-standup-postgresql-18-backup-garage-local namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-postgresql-18-backup-garage-local spec: retentionPolicy: 7d instanceSidecarConfiguration: env: - name: AWS_REQUEST_CHECKSUM_CALCULATION value: when_required - name: AWS_RESPONSE_CHECKSUM_VALIDATION value: when_required configuration: destinationPath: s3://postgres-backups/cl01tl/radarr-standup/radarr-standup-postgresql-18-cluster endpointURL: http://garage-main.garage:3900 s3Credentials: accessKeyId: name: radarr-standup-postgresql-18-backup-garage-local-secret key: ACCESS_KEY_ID secretAccessKey: name: radarr-standup-postgresql-18-backup-garage-local-secret key: ACCESS_SECRET_KEY region: name: radarr-standup-postgresql-18-backup-garage-local-secret key: ACCESS_REGION --- apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: "radarr-standup-postgresql-18-recovery" namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: "radarr-standup-postgresql-18-recovery" spec: configuration: destinationPath: s3://postgres-backups/cl01tl/radarr-standup/radarr-standup-postgresql-18-cluster endpointURL: http://garage-main.garage:3900 wal: compression: snappy maxParallel: 1 data: compression: snappy jobs: 1 s3Credentials: accessKeyId: name: radarr-standup-postgresql-18-recovery-secret key: ACCESS_KEY_ID secretAccessKey: name: radarr-standup-postgresql-18-recovery-secret key: ACCESS_SECRET_KEY region: name: radarr-standup-postgresql-18-recovery-secret key: ACCESS_REGION --- apiVersion: v1 kind: PersistentVolume metadata: name: radarr-standup-nfs-storage namespace: radarr-standup labels: app.kubernetes.io/name: radarr-standup-nfs-storage app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup spec: persistentVolumeReclaimPolicy: Retain storageClassName: nfs-client capacity: storage: 1Gi accessModes: - ReadWriteMany nfs: path: /volume2/Storage server: synologybond.alexlebens.net mountOptions: - vers=4 - minorversion=1 - noac --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: radarr-standup-config labels: app.kubernetes.io/instance: radarr-standup app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup helm.sh/chart: radarr-standup-4.6.2 annotations: helm.sh/resource-policy: keep namespace: radarr-standup spec: accessModes: - "ReadWriteOnce" resources: requests: storage: "20Gi" storageClassName: "ceph-block" --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: radarr-standup-nfs-storage namespace: radarr-standup labels: app.kubernetes.io/name: radarr-standup-nfs-storage app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup spec: volumeName: radarr-standup-nfs-storage storageClassName: nfs-client accessModes: - ReadWriteMany resources: requests: storage: 1Gi --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: radarr-standup-postgresql-18-alert-rules namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm spec: groups: - name: cloudnative-pg/radarr-standup-postgresql-18 rules: - alert: CNPGClusterBackendsWaitingWarning annotations: summary: CNPG Cluster a backend is waiting for longer than 5 minutes. description: |- Pod {{ $labels.pod }} has been waiting for longer than 5 minutes expr: | cnpg_backends_waiting_total{namespace="radarr-standup"} > 300 for: 1m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterDatabaseDeadlockConflictsWarning annotations: summary: CNPG Cluster has over 10 deadlock conflicts. description: |- There are over 10 deadlock conflicts in {{ $labels.pod }} expr: | cnpg_pg_stat_database_deadlocks{namespace="radarr-standup"} > 10 for: 1m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterHACritical annotations: summary: CNPG Cluster has no standby replicas! description: |- CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe risk of data loss and downtime if the primary instance fails. The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less instances. The replaced instance may need some time to catch-up with the cluster primary instance. This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this case you may want to silence it. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md expr: | max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-standup"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-standup"}) < 1 for: 5m labels: severity: critical namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterHAWarning annotations: summary: CNPG Cluster less than 2 standby replicas. description: |- CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting your cluster at risk if another instance fails. The cluster is still able to operate normally, although the `-ro` and `-r` endpoints operate at reduced capacity. This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may need some time to catch-up with the cluster primary instance. This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. In this case you may want to silence it. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md expr: | max by (job) (cnpg_pg_replication_streaming_replicas{namespace="radarr-standup"} - cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-standup"}) < 2 for: 5m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterHighConnectionsCritical annotations: summary: CNPG Instance maximum number of connections critical! description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of the maximum number of connections. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md expr: | sum by (pod) (cnpg_backends_total{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 for: 5m labels: severity: critical namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterHighConnectionsWarning annotations: summary: CNPG Instance is approaching the maximum number of connections. description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of the maximum number of connections. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md expr: | sum by (pod) (cnpg_backends_total{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 for: 5m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterHighReplicationLag annotations: summary: CNPG Cluster high replication lag description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" is experiencing a high replication lag of {{`{{`}} $value {{`}}`}}ms. High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md expr: | max(cnpg_pg_replication_lag{namespace="radarr-standup",pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 for: 5m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterInstancesOnSameNode annotations: summary: CNPG Cluster instances are located on the same node. description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} instances on the same node {{`{{`}} $labels.node {{`}}`}}. A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md expr: | count by (node) (kube_pod_info{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 for: 5m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterLongRunningTransactionWarning annotations: summary: CNPG Cluster query is taking longer than 5 minutes. description: |- CloudNativePG Cluster Pod {{ $labels.pod }} is taking more than 5 minutes (300 seconds) for a query. expr: |- cnpg_backends_max_tx_duration_seconds{namespace="radarr-standup"} > 300 for: 1m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterLowDiskSpaceCritical annotations: summary: CNPG Instance is running out of disk space! description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md expr: | max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) / sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) * on(namespace, persistentvolumeclaim) group_left(volume) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} ) > 0.9 for: 5m labels: severity: critical namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterLowDiskSpaceWarning annotations: summary: CNPG Instance is running out of disk space. description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" is running low on disk space. Check attached PVCs. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md expr: | max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) / sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="radarr-standup", persistentvolumeclaim=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) * on(namespace, persistentvolumeclaim) group_left(volume) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} ) > 0.7 for: 5m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterOffline annotations: summary: CNPG Cluster has no running instances! description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" has no ready instances. Having an offline cluster means your applications will not be able to access the database, leading to potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md expr: | (count(cnpg_collector_up{namespace="radarr-standup",pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 for: 5m labels: severity: critical namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterPGDatabaseXidAgeWarning annotations: summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. description: |- Over 300,000,000 transactions from frozen xid on pod {{ $labels.pod }} expr: | cnpg_pg_database_xid_age{namespace="radarr-standup"} > 300000000 for: 1m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterPGReplicationWarning annotations: summary: CNPG Cluster standby is lagging behind the primary. description: |- Standby is lagging behind by over 300 seconds (5 minutes) expr: | cnpg_pg_replication_lag{namespace="radarr-standup"} > 300 for: 1m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterReplicaFailingReplicationWarning annotations: summary: CNPG Cluster has a replica is failing to replicate. description: |- Replica {{ $labels.pod }} is failing to replicate expr: | cnpg_pg_replication_in_recovery{namespace="radarr-standup"} > cnpg_pg_replication_is_wal_receiver_up{namespace="radarr-standup"} for: 1m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster - alert: CNPGClusterZoneSpreadWarning annotations: summary: CNPG Cluster instances in the same zone. description: |- CloudNativePG Cluster "radarr-standup/radarr-standup-postgresql-18-cluster" has instances in the same availability zone. A disaster in one availability zone will lead to a potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md expr: | 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="radarr-standup", pod=~"radarr-standup-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 for: 5m labels: severity: warning namespace: radarr-standup cnpg_cluster: radarr-standup-postgresql-18-cluster --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: radarr-standup namespace: radarr-standup labels: app.kubernetes.io/name: radarr-standup app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup spec: groups: - name: radarr-standup rules: - alert: ExportarrAbsent annotations: description: Radarr Stand Up Exportarr has disappeared from Prometheus service discovery. summary: Exportarr is down. expr: | absent(up{job=~".*radarr-standup.*"} == 1) for: 5m labels: severity: critical - alert: RadarrStandUpDown annotations: description: Radarr Stand Up service is down. summary: Radarr Stand Up is down. expr: | radarr_standup_system_status{job=~".*radarr-standup.*"} == 0 for: 5m labels: severity: critical --- apiVersion: volsync.backube/v1alpha1 kind: ReplicationSource metadata: name: radarr-standup-config-backup-source-external namespace: radarr-standup labels: helm.sh/chart: volsync-target-config-0.8.0 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "0.8.0" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-config-backup spec: sourcePVC: radarr-standup-config trigger: schedule: 10 13 * * * restic: pruneIntervalDays: 7 repository: radarr-standup-config-backup-secret-external retain: daily: 7 hourly: 0 monthly: 3 weekly: 4 yearly: 1 moverSecurityContext: fsGroup: 1000 fsGroupChangePolicy: OnRootMismatch runAsGroup: 1000 runAsUser: 1000 copyMethod: Snapshot storageClassName: ceph-block volumeSnapshotClassName: ceph-blockpool-snapshot cacheCapacity: 1Gi --- apiVersion: volsync.backube/v1alpha1 kind: ReplicationSource metadata: name: radarr-standup-config-backup-source-local namespace: radarr-standup labels: helm.sh/chart: volsync-target-config-0.8.0 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "0.8.0" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-config-backup spec: sourcePVC: radarr-standup-config trigger: schedule: 10 11 * * * restic: pruneIntervalDays: 7 repository: radarr-standup-config-backup-secret-local retain: daily: 7 hourly: 0 monthly: 3 weekly: 4 yearly: 1 moverSecurityContext: fsGroup: 1000 fsGroupChangePolicy: OnRootMismatch runAsGroup: 1000 runAsUser: 1000 copyMethod: Snapshot storageClassName: ceph-block volumeSnapshotClassName: ceph-blockpool-snapshot cacheCapacity: 1Gi --- apiVersion: volsync.backube/v1alpha1 kind: ReplicationSource metadata: name: radarr-standup-config-backup-source-remote namespace: radarr-standup labels: helm.sh/chart: volsync-target-config-0.8.0 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "0.8.0" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup-config-backup spec: sourcePVC: radarr-standup-config trigger: schedule: 10 12 * * * restic: pruneIntervalDays: 7 repository: radarr-standup-config-backup-secret-remote retain: daily: 7 hourly: 0 monthly: 3 weekly: 4 yearly: 1 moverSecurityContext: fsGroup: 1000 fsGroupChangePolicy: OnRootMismatch runAsGroup: 1000 runAsUser: 1000 copyMethod: Snapshot storageClassName: ceph-block volumeSnapshotClassName: ceph-blockpool-snapshot cacheCapacity: 1Gi --- apiVersion: postgresql.cnpg.io/v1 kind: ScheduledBackup metadata: name: "radarr-standup-postgresql-18-scheduled-backup-live-backup" namespace: radarr-standup labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: radarr-standup-postgresql-18 app.kubernetes.io/instance: radarr-standup app.kubernetes.io/part-of: radarr-standup app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: "radarr-standup-postgresql-18-scheduled-backup-live-backup" spec: immediate: true suspend: false schedule: "0 35 15 * * *" backupOwnerReference: self cluster: name: radarr-standup-postgresql-18-cluster method: plugin pluginConfiguration: name: barman-cloud.cloudnative-pg.io parameters: barmanObjectName: "radarr-standup-postgresql-18-backup-garage-local" --- apiVersion: v1 kind: Service metadata: name: radarr-standup labels: app.kubernetes.io/instance: radarr-standup app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup app.kubernetes.io/service: radarr-standup helm.sh/chart: radarr-standup-4.6.2 namespace: radarr-standup spec: type: ClusterIP ports: - port: 80 targetPort: 7878 protocol: TCP name: http - port: 9793 targetPort: 9793 protocol: TCP name: metrics selector: app.kubernetes.io/controller: main app.kubernetes.io/instance: radarr-standup app.kubernetes.io/name: radarr-standup --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: radarr-standup labels: app.kubernetes.io/instance: radarr-standup app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: radarr-standup helm.sh/chart: radarr-standup-4.6.2 namespace: radarr-standup spec: jobLabel: radarr-standup namespaceSelector: matchNames: - radarr-standup selector: matchLabels: app.kubernetes.io/instance: radarr-standup app.kubernetes.io/name: radarr-standup endpoints: - interval: 3m path: /metrics port: metrics scrapeTimeout: 1m