apiVersion: postgresql.cnpg.io/v1 kind: Cluster metadata: name: directus-postgresql-18-cluster namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm spec: instances: 3 imageName: "ghcr.io/cloudnative-pg/postgresql:18.3-standard-trixie" imagePullPolicy: IfNotPresent postgresUID: 26 postgresGID: 26 storage: size: 10Gi storageClass: local-path walStorage: size: 2Gi storageClass: local-path resources: limits: hugepages-2Mi: 256Mi requests: cpu: 100m memory: 256Mi affinity: enablePodAntiAffinity: true topologyKey: kubernetes.io/hostname primaryUpdateMethod: switchover primaryUpdateStrategy: unsupervised logLevel: info enableSuperuserAccess: false enablePDB: true postgresql: parameters: hot_standby_feedback: "on" max_slot_wal_keep_size: 2000MB shared_buffers: 128MB monitoring: enablePodMonitor: true disableDefaultQueries: false plugins: - name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: true parameters: barmanObjectName: "directus-postgresql-18-backup-garage-local" serverName: "directus-postgresql-18-backup-1" bootstrap: recovery: database: app source: directus-postgresql-18-backup-1 externalClusters: - name: directus-postgresql-18-backup-1 plugin: name: barman-cloud.cloudnative-pg.io enabled: true isWALArchiver: false parameters: barmanObjectName: "directus-postgresql-18-recovery" serverName: directus-postgresql-18-backup-1 --- apiVersion: v1 kind: ConfigMap metadata: name: directus-valkey-init-scripts labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm data: init.sh: |- #!/bin/sh set -eu # Default config paths VALKEY_CONFIG=${VALKEY_CONFIG_PATH:-/data/conf/valkey.conf} LOGFILE="/data/init.log" DATA_DIR="/data/conf" # Logging function (outputs to stderr and file) log() { echo "$(date) $1" | tee -a "$LOGFILE" >&2 } # Function to get password for a user # Usage: get_user_password [password_key] # Returns: password via stdout, exits with error if not found get_user_password() { username="$1" password_key="${2:-$username}" password="" # Try to get password from existing secret first (priority) if [ -f "/valkey-users-secret/$password_key" ]; then password=$(cat "/valkey-users-secret/$password_key") log "Using password from existing secret for user $username" elif [ -f "/valkey-auth-secret/${username}-password" ]; then # Fallback to inline password password=$(cat "/valkey-auth-secret/${username}-password") log "Using inline password for user $username" else log "ERROR: No password found for user $username" return 1 fi echo "$password" } # Clean old log if requested if [ "${KEEP_OLD_LOGS:-false}" != "true" ]; then rm -f "$LOGFILE" fi if [ -f "$LOGFILE" ]; then log "Detected restart of this instance ($HOSTNAME)" fi log "Creating configuration in $DATA_DIR..." mkdir -p "$DATA_DIR" rm -f "$VALKEY_CONFIG" # Base valkey.conf log "Generating base valkey.conf" { echo "port 6379" echo "protected-mode no" echo "bind * -::*" echo "dir /data" } >>"$VALKEY_CONFIG" # Create secure directory for ACL file log "Creating /etc/valkey directory for ACL file" mkdir -p /etc/valkey # Set aclfile path in valkey.conf echo "aclfile /etc/valkey/users.acl" >>"$VALKEY_CONFIG" # Remove or reset existing ACL file if present (it may be read-only from previous run) log "Preparing ACL file at /etc/valkey/users.acl" if [ -f /etc/valkey/users.acl ]; then log "Removing existing read-only users.acl file" chmod 0600 /etc/valkey/users.acl rm -f /etc/valkey/users.acl fi # Create ACL file with secure permissions touch /etc/valkey/users.acl chmod 0600 /etc/valkey/users.acl # Generate ACL entries for each user log "Generating ACL entries for users" # User: default PASSWORD=$(get_user_password "default" "default") || exit 1 # Hash the password and write ACL entry PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ") echo "user default on #$PASSHASH ~* &* +@all" >> /etc/valkey/users.acl # Set final permissions chmod 0400 /etc/valkey/users.acl log "ACL file created with 0400 permissions" # Replica mode configuration log "Configuring replication mode" # Use POD_INDEX from Kubernetes metadata POD_INDEX=${POD_INDEX:-0} IS_MASTER=false # Check if this is pod-0 (master) if [ "$POD_INDEX" = "0" ]; then IS_MASTER=true log "This pod (index $POD_INDEX) is configured as MASTER" else log "This pod (index $POD_INDEX) is configured as REPLICA" fi # Configure replica settings if [ "$IS_MASTER" = "false" ]; then MASTER_HOST="directus-valkey-0.directus-valkey-headless.directus.svc.cluster.local" MASTER_PORT="6379" log "Configuring replica to follow master at $MASTER_HOST:$MASTER_PORT" { echo "" echo "# Replica Configuration" echo "replicaof $MASTER_HOST $MASTER_PORT" echo "replica-announce-ip directus-valkey-$POD_INDEX.directus-valkey-headless.directus.svc.cluster.local" echo "" echo "# Master authentication" } >>"$VALKEY_CONFIG" # Get the password for the replication user REPL_PASSWORD=$(get_user_password "default" "default") || exit 1 # Write masterauth configuration echo "masterauth $REPL_PASSWORD" >>"$VALKEY_CONFIG" echo "masteruser default" >>"$VALKEY_CONFIG" log "Configured masterauth with user default" fi # Append extra configs if present if [ -f /usr/local/etc/valkey/valkey.conf ]; then log "Appending /usr/local/etc/valkey/valkey.conf" cat /usr/local/etc/valkey/valkey.conf >>"$VALKEY_CONFIG" fi if [ -d /extravalkeyconfigs ]; then log "Appending files in /extravalkeyconfigs/" cat /extravalkeyconfigs/* >>"$VALKEY_CONFIG" fi --- apiVersion: apps/v1 kind: Deployment metadata: name: directus labels: app.kubernetes.io/controller: main app.kubernetes.io/instance: directus app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus helm.sh/chart: directus-4.6.2 namespace: directus spec: revisionHistoryLimit: 3 replicas: 1 strategy: type: Recreate selector: matchLabels: app.kubernetes.io/controller: main app.kubernetes.io/name: directus app.kubernetes.io/instance: directus template: metadata: labels: app.kubernetes.io/controller: main app.kubernetes.io/instance: directus app.kubernetes.io/name: directus spec: enableServiceLinks: false serviceAccountName: default automountServiceAccountToken: true hostIPC: false hostNetwork: false hostPID: false dnsPolicy: ClusterFirst containers: - env: - name: PUBLIC_URL value: https://directus.alexlebens.net - name: WEBSOCKETS_ENABLED value: "true" - name: ADMIN_EMAIL valueFrom: secretKeyRef: key: admin-email name: directus-config - name: ADMIN_PASSWORD valueFrom: secretKeyRef: key: admin-password name: directus-config - name: SECRET valueFrom: secretKeyRef: key: secret name: directus-config - name: KEY valueFrom: secretKeyRef: key: key name: directus-config - name: DB_CLIENT value: postgres - name: DB_HOST valueFrom: secretKeyRef: key: host name: directus-postgresql-18-cluster-app - name: DB_DATABASE valueFrom: secretKeyRef: key: dbname name: directus-postgresql-18-cluster-app - name: DB_PORT valueFrom: secretKeyRef: key: port name: directus-postgresql-18-cluster-app - name: DB_USER valueFrom: secretKeyRef: key: user name: directus-postgresql-18-cluster-app - name: DB_PASSWORD valueFrom: secretKeyRef: key: password name: directus-postgresql-18-cluster-app - name: SYNCHRONIZATION_STORE value: redis - name: CACHE_ENABLED value: "true" - name: CACHE_STORE value: redis - name: REDIS_ENABLED value: "true" - name: REDIS_HOST value: directus-valkey - name: REDIS_USERNAME valueFrom: secretKeyRef: key: user name: directus-valkey-config - name: REDIS_PASSWORD valueFrom: secretKeyRef: key: password name: directus-valkey-config - name: STORAGE_LOCATIONS value: s3 - name: STORAGE_S3_DRIVER value: s3 - name: STORAGE_S3_KEY valueFrom: secretKeyRef: key: ACCESS_KEY_ID name: directus-bucket-garage - name: STORAGE_S3_SECRET valueFrom: secretKeyRef: key: ACCESS_SECRET_KEY name: directus-bucket-garage - name: STORAGE_S3_REGION valueFrom: secretKeyRef: key: ACCESS_REGION name: directus-bucket-garage - name: STORAGE_S3_BUCKET value: directus-assets - name: STORAGE_S3_ENDPOINT value: http://garage-main.garage:3900 - name: STORAGE_S3_FORCE_PATH_STYLE value: "true" - name: AUTH_PROVIDERS value: AUTHENTIK - name: AUTH_AUTHENTIK_DRIVER value: openid - name: AUTH_AUTHENTIK_CLIENT_ID valueFrom: secretKeyRef: key: OIDC_CLIENT_ID name: directus-oidc-secret - name: AUTH_AUTHENTIK_CLIENT_SECRET valueFrom: secretKeyRef: key: OIDC_CLIENT_SECRET name: directus-oidc-secret - name: AUTH_AUTHENTIK_SCOPE value: openid profile email - name: AUTH_AUTHENTIK_ISSUER_URL value: https://authentik.alexlebens.net/application/o/directus/.well-known/openid-configuration - name: AUTH_AUTHENTIK_IDENTIFIER_KEY value: email - name: AUTH_AUTHENTIK_ALLOW_PUBLIC_REGISTRATION value: "true" - name: AUTH_AUTHENTIK_LABEL value: Authentik - name: TELEMETRY value: "false" - name: METRICS_ENABLED value: "true" - name: METRICS_TOKENS valueFrom: secretKeyRef: key: metric-token name: directus-metric-token image: directus/directus:11.16.1 imagePullPolicy: IfNotPresent name: main resources: requests: cpu: 10m memory: 256Mi --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-bucket-garage namespace: directus labels: app.kubernetes.io/name: directus-bucket-garage app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/directus-assets metadataPolicy: None property: ACCESS_KEY_ID - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/directus-assets metadataPolicy: None property: ACCESS_SECRET_KEY - secretKey: ACCESS_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/directus-assets metadataPolicy: None property: ACCESS_REGION --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-config namespace: directus labels: app.kubernetes.io/name: directus-config app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: admin-email remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/config metadataPolicy: None property: admin-email - secretKey: admin-password remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/config metadataPolicy: None property: admin-password - secretKey: secret remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/config metadataPolicy: None property: secret - secretKey: key remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/config metadataPolicy: None property: key --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-metric-token namespace: directus labels: app.kubernetes.io/name: directus-metric-token app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: metric-token remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/metrics metadataPolicy: None property: metric-token --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-oidc-secret namespace: directus labels: app.kubernetes.io/name: directus-oidc-secret app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: OIDC_CLIENT_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /authentik/oidc/directus metadataPolicy: None property: client - secretKey: OIDC_CLIENT_SECRET remoteRef: conversionStrategy: Default decodingStrategy: None key: /authentik/oidc/directus metadataPolicy: None property: secret --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-postgresql-18-backup-garage-local-secret namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus-postgresql-18-backup-garage-local-secret spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_REGION - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_SECRET_KEY --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-postgresql-18-recovery-secret namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus-postgresql-18-recovery-secret spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: ACCESS_REGION remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_REGION - secretKey: ACCESS_KEY_ID remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_KEY_ID - secretKey: ACCESS_SECRET_KEY remoteRef: conversionStrategy: Default decodingStrategy: None key: /garage/home-infra/postgres-backups metadataPolicy: None property: ACCESS_SECRET_KEY --- apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: directus-valkey-config namespace: directus labels: app.kubernetes.io/name: directus-valkey-config app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus spec: secretStoreRef: kind: ClusterSecretStore name: vault data: - secretKey: default remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/valkey metadataPolicy: None property: password - secretKey: user remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/valkey metadataPolicy: None property: user - secretKey: password remoteRef: conversionStrategy: Default decodingStrategy: None key: /cl01tl/directus/valkey metadataPolicy: None property: password --- apiVersion: gateway.networking.k8s.io/v1alpha2 kind: HTTPRoute metadata: name: directus labels: app.kubernetes.io/instance: directus app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus helm.sh/chart: directus-4.6.2 namespace: directus spec: parentRefs: - group: gateway.networking.k8s.io kind: Gateway name: traefik-gateway namespace: traefik hostnames: - "directus.alexlebens.net" rules: - backendRefs: - group: "" kind: Service name: directus namespace: directus port: 80 weight: 100 matches: - path: type: PathPrefix value: / --- apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: directus-postgresql-18-backup-garage-local namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus-postgresql-18-backup-garage-local spec: retentionPolicy: 7d instanceSidecarConfiguration: env: - name: AWS_REQUEST_CHECKSUM_CALCULATION value: when_required - name: AWS_RESPONSE_CHECKSUM_VALIDATION value: when_required configuration: destinationPath: s3://postgres-backups/cl01tl/directus/directus-postgresql-18-cluster endpointURL: http://garage-main.garage:3900 s3Credentials: accessKeyId: name: directus-postgresql-18-backup-garage-local-secret key: ACCESS_KEY_ID secretAccessKey: name: directus-postgresql-18-backup-garage-local-secret key: ACCESS_SECRET_KEY region: name: directus-postgresql-18-backup-garage-local-secret key: ACCESS_REGION --- apiVersion: barmancloud.cnpg.io/v1 kind: ObjectStore metadata: name: "directus-postgresql-18-recovery" namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: "directus-postgresql-18-recovery" spec: configuration: destinationPath: s3://postgres-backups/cl01tl/directus/directus-postgresql-18-cluster endpointURL: http://garage-main.garage:3900 wal: compression: snappy maxParallel: 1 data: compression: snappy jobs: 1 s3Credentials: accessKeyId: name: directus-postgresql-18-recovery-secret key: ACCESS_KEY_ID secretAccessKey: name: directus-postgresql-18-recovery-secret key: ACCESS_SECRET_KEY region: name: directus-postgresql-18-recovery-secret key: ACCESS_REGION --- apiVersion: v1 kind: Pod metadata: name: directus-valkey-test-auth-existing labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm annotations: "helm.sh/hook": test "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: restartPolicy: Never containers: - name: test-auth image: "valkey/valkey:9.0.3" command: - sh - -c - | set -e echo "Testing authentication with usersExistingSecret..." TLS_FLAGS="" # Test basic connection (no auth - will fail if auth is properly configured) PING_RESULT=$(valkey-cli -h directus-valkey -p 6379 $TLS_FLAGS PING 2>&1 || true) if [ "$PING_RESULT" = "PONG" ]; then echo "✗ Authentication test failed: server allows unauthenticated access" exit 1 fi echo "✓ Authentication is enforced (unauthenticated access denied)" echo "✓ Received expected error: $PING_RESULT" echo "⚠ Manual verification recommended for usersExistingSecret configuration" exit 0 volumeMounts: - name: valkey-users-secret mountPath: /valkey-users-secret readOnly: true volumes: - name: valkey-users-secret secret: secretName: directus-valkey-config --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: directus-postgresql-18-alert-rules namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm spec: groups: - name: cloudnative-pg/directus-postgresql-18 rules: - alert: CNPGClusterBackendsWaitingWarning annotations: summary: CNPG Cluster a backend is waiting for longer than 5 minutes. description: |- Pod {{ $labels.pod }} has been waiting for longer than 5 minutes expr: | cnpg_backends_waiting_total{namespace="directus"} > 300 for: 1m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterDatabaseDeadlockConflictsWarning annotations: summary: CNPG Cluster has over 10 deadlock conflicts. description: |- There are over 10 deadlock conflicts in {{ $labels.pod }} expr: | cnpg_pg_stat_database_deadlocks{namespace="directus"} > 10 for: 1m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterHACritical annotations: summary: CNPG Cluster has no standby replicas! description: |- CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe risk of data loss and downtime if the primary instance fails. The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less instances. The replaced instance may need some time to catch-up with the cluster primary instance. This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this case you may want to silence it. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md expr: | max by (job) (cnpg_pg_replication_streaming_replicas{namespace="directus"} - cnpg_pg_replication_is_wal_receiver_up{namespace="directus"}) < 1 for: 5m labels: severity: critical namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterHAWarning annotations: summary: CNPG Cluster less than 2 standby replicas. description: |- CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting your cluster at risk if another instance fails. The cluster is still able to operate normally, although the `-ro` and `-r` endpoints operate at reduced capacity. This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may need some time to catch-up with the cluster primary instance. This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. In this case you may want to silence it. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md expr: | max by (job) (cnpg_pg_replication_streaming_replicas{namespace="directus"} - cnpg_pg_replication_is_wal_receiver_up{namespace="directus"}) < 2 for: 5m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterHighConnectionsCritical annotations: summary: CNPG Instance maximum number of connections critical! description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of the maximum number of connections. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md expr: | sum by (pod) (cnpg_backends_total{namespace="directus", pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="directus", pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 for: 5m labels: severity: critical namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterHighConnectionsWarning annotations: summary: CNPG Instance is approaching the maximum number of connections. description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of the maximum number of connections. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md expr: | sum by (pod) (cnpg_backends_total{namespace="directus", pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="directus", pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 for: 5m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterHighReplicationLag annotations: summary: CNPG Cluster high replication lag description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" is experiencing a high replication lag of {{`{{`}} $value {{`}}`}}ms. High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md expr: | max(cnpg_pg_replication_lag{namespace="directus",pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 for: 5m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterInstancesOnSameNode annotations: summary: CNPG Cluster instances are located on the same node. description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} instances on the same node {{`{{`}} $labels.node {{`}}`}}. A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md expr: | count by (node) (kube_pod_info{namespace="directus", pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 for: 5m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterLongRunningTransactionWarning annotations: summary: CNPG Cluster query is taking longer than 5 minutes. description: |- CloudNativePG Cluster Pod {{ $labels.pod }} is taking more than 5 minutes (300 seconds) for a query. expr: |- cnpg_backends_max_tx_duration_seconds{namespace="directus"} > 300 for: 1m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterLowDiskSpaceCritical annotations: summary: CNPG Instance is running out of disk space! description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md expr: | max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) / sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) * on(namespace, persistentvolumeclaim) group_left(volume) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"} ) > 0.9 for: 5m labels: severity: critical namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterLowDiskSpaceWarning annotations: summary: CNPG Instance is running out of disk space. description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" is running low on disk space. Check attached PVCs. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md expr: | max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) / sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="directus", persistentvolumeclaim=~"directus-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) * on(namespace, persistentvolumeclaim) group_left(volume) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"} ) > 0.7 for: 5m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterOffline annotations: summary: CNPG Cluster has no running instances! description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" has no ready instances. Having an offline cluster means your applications will not be able to access the database, leading to potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md expr: | (count(cnpg_collector_up{namespace="directus",pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 for: 5m labels: severity: critical namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterPGDatabaseXidAgeWarning annotations: summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. description: |- Over 300,000,000 transactions from frozen xid on pod {{ $labels.pod }} expr: | cnpg_pg_database_xid_age{namespace="directus"} > 300000000 for: 1m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterPGReplicationWarning annotations: summary: CNPG Cluster standby is lagging behind the primary. description: |- Standby is lagging behind by over 300 seconds (5 minutes) expr: | cnpg_pg_replication_lag{namespace="directus"} > 300 for: 1m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterReplicaFailingReplicationWarning annotations: summary: CNPG Cluster has a replica is failing to replicate. description: |- Replica {{ $labels.pod }} is failing to replicate expr: | cnpg_pg_replication_in_recovery{namespace="directus"} > cnpg_pg_replication_is_wal_receiver_up{namespace="directus"} for: 1m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster - alert: CNPGClusterZoneSpreadWarning annotations: summary: CNPG Cluster instances in the same zone. description: |- CloudNativePG Cluster "directus/directus-postgresql-18-cluster" has instances in the same availability zone. A disaster in one availability zone will lead to a potential service disruption and/or data loss. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md expr: | 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="directus", pod=~"directus-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 for: 5m labels: severity: warning namespace: directus cnpg_cluster: directus-postgresql-18-cluster --- apiVersion: postgresql.cnpg.io/v1 kind: ScheduledBackup metadata: name: "directus-postgresql-18-scheduled-backup-live-backup" namespace: directus labels: helm.sh/chart: postgres-18-cluster-7.9.1 app.kubernetes.io/name: directus-postgresql-18 app.kubernetes.io/instance: directus app.kubernetes.io/part-of: directus app.kubernetes.io/version: "7.9.1" app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: "directus-postgresql-18-scheduled-backup-live-backup" spec: immediate: true suspend: false schedule: "0 15 14 * * *" backupOwnerReference: self cluster: name: directus-postgresql-18-cluster method: plugin pluginConfiguration: name: barman-cloud.cloudnative-pg.io parameters: barmanObjectName: "directus-postgresql-18-backup-garage-local" --- apiVersion: v1 kind: Service metadata: name: directus-valkey-headless labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: headless spec: type: ClusterIP clusterIP: None publishNotReadyAddresses: true ports: - name: tcp port: 6379 targetPort: tcp protocol: TCP selector: app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus --- apiVersion: v1 kind: Service metadata: name: directus-valkey-read labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: read spec: type: ClusterIP ports: - name: tcp port: 6379 targetPort: tcp protocol: TCP selector: app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus --- apiVersion: v1 kind: Service metadata: name: directus-valkey labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: primary spec: type: ClusterIP ports: - port: 6379 targetPort: tcp protocol: TCP name: tcp selector: app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus statefulset.kubernetes.io/pod-name: directus-valkey-0 --- apiVersion: v1 kind: Service metadata: name: directus labels: app.kubernetes.io/instance: directus app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus app.kubernetes.io/service: directus helm.sh/chart: directus-4.6.2 namespace: directus spec: type: ClusterIP ports: - port: 80 targetPort: 8055 protocol: TCP name: http selector: app.kubernetes.io/controller: main app.kubernetes.io/instance: directus app.kubernetes.io/name: directus --- apiVersion: v1 kind: ServiceAccount metadata: name: directus-valkey labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm automountServiceAccountToken: false --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: directus labels: app.kubernetes.io/instance: directus app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: directus helm.sh/chart: directus-4.6.2 namespace: directus spec: jobLabel: directus namespaceSelector: matchNames: - directus selector: matchLabels: app.kubernetes.io/instance: directus app.kubernetes.io/name: directus endpoints: - bearerTokenSecret: key: metric-token name: directus-metric-token interval: 30s path: /metrics port: http scrapeTimeout: 15s --- apiVersion: apps/v1 kind: StatefulSet metadata: name: directus-valkey labels: helm.sh/chart: valkey-0.9.3 app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus app.kubernetes.io/version: "9.0.3" app.kubernetes.io/managed-by: Helm spec: serviceName: directus-valkey-headless replicas: 3 podManagementPolicy: OrderedReady selector: matchLabels: app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus volumeClaimTemplates: - metadata: name: valkey-data spec: accessModes: - ReadWriteOnce storageClassName: "ceph-block" resources: requests: storage: "1Gi" template: metadata: labels: app.kubernetes.io/name: valkey app.kubernetes.io/instance: directus annotations: checksum/initconfig: "6307ecb287c2f05dc09ba3cf7cdfd155" spec: automountServiceAccountToken: false serviceAccountName: directus-valkey securityContext: fsGroup: 1000 runAsGroup: 1000 runAsUser: 1000 initContainers: - name: directus-valkey-init image: docker.io/valkey/valkey:9.0.3 imagePullPolicy: IfNotPresent securityContext: capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000 command: ["/scripts/init.sh"] env: - name: POD_INDEX valueFrom: fieldRef: fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] volumeMounts: - name: valkey-data mountPath: /data - name: scripts mountPath: /scripts - name: valkey-acl mountPath: /etc/valkey - name: valkey-users-secret mountPath: /valkey-users-secret readOnly: true containers: - name: directus-valkey image: docker.io/valkey/valkey:9.0.3 imagePullPolicy: IfNotPresent command: ["valkey-server"] args: ["/data/conf/valkey.conf"] securityContext: capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000 env: - name: POD_INDEX valueFrom: fieldRef: fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] - name: VALKEY_LOGLEVEL value: "notice" ports: - name: tcp containerPort: 6379 protocol: TCP startupProbe: exec: command: ["sh", "-c", "valkey-cli ping"] livenessProbe: exec: command: ["sh", "-c", "valkey-cli ping"] resources: requests: cpu: 10m memory: 128Mi volumeMounts: - name: valkey-data mountPath: /data - name: valkey-acl mountPath: /etc/valkey volumes: - name: scripts configMap: name: directus-valkey-init-scripts defaultMode: 0555 - name: valkey-acl emptyDir: medium: Memory - name: valkey-users-secret secret: secretName: directus-valkey-config defaultMode: 0400