From 4f3c2d89a07eec675a57bf661887523f97193d0a Mon Sep 17 00:00:00 2001 From: gitea-bot Date: Sun, 3 May 2026 00:23:59 +0000 Subject: [PATCH 1/5] chore: Update manifests after change --- clusters/cl01tl/manifests/immich/-.yaml | 1 + .../Cluster-immich-postgresql-18-cluster.yaml | 68 ----- .../ConfigMap-immich-valkey-init-scripts.yaml | 87 ------ .../manifests/immich/Deployment-immich.yaml | 125 -------- ...lSecret-immich-backup-secret-external.yaml | 47 --- ...rnalSecret-immich-backup-secret-local.yaml | 47 --- ...nalSecret-immich-backup-secret-remote.yaml | 47 --- ...tgresql-18-backup-garage-local-secret.yaml | 29 -- ...-immich-postgresql-18-recovery-secret.yaml | 29 -- .../manifests/immich/HTTPRoute-immich.yaml | 30 -- ...ich-postgresql-18-backup-garage-local.yaml | 33 --- ...ctStore-immich-postgresql-18-recovery.yaml | 32 --- .../immich/PersistentVolumeClaim-immich.yaml | 19 -- .../immich/PodMonitor-immich-valkey.yaml | 23 -- ...etheusRule-immich-backup-source-local.yaml | 30 -- ...Rule-immich-postgresql-18-alert-rules.yaml | 270 ------------------ .../immich/PrometheusRule-immich-valkey.yaml | 47 --- ...nSource-immich-backup-source-external.yaml | 29 -- ...tionSource-immich-backup-source-local.yaml | 29 -- ...ionSource-immich-backup-source-remote.yaml | 29 -- ...resql-18-scheduled-backup-live-backup.yaml | 24 -- .../immich/Secret-immich-immich-sa-token.yaml | 13 - .../SecretProviderClass-immich-config.yaml | 19 -- .../Service-immich-valkey-headless.yaml | 23 -- .../immich/Service-immich-valkey-metrics.yaml | 23 -- .../immich/Service-immich-valkey-read.yaml | 21 -- .../immich/Service-immich-valkey.yaml | 22 -- .../manifests/immich/Service-immich.yaml | 30 -- .../immich/ServiceAccount-immich-valkey.yaml | 11 - .../immich/ServiceAccount-immich.yaml | 12 - .../immich/ServiceMonitor-immich-valkey.yaml | 24 -- .../immich/ServiceMonitor-immich.yaml | 28 -- .../immich/StatefulSet-immich-valkey.yaml | 133 --------- 33 files changed, 1 insertion(+), 1433 deletions(-) create mode 100644 clusters/cl01tl/manifests/immich/-.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Deployment-immich.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml delete mode 100644 clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml delete mode 100644 clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml delete mode 100644 clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml delete mode 100644 clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml delete mode 100644 clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml delete mode 100644 clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml delete mode 100644 clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml delete mode 100644 clusters/cl01tl/manifests/immich/Service-immich.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml delete mode 100644 clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml delete mode 100644 clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml diff --git a/clusters/cl01tl/manifests/immich/-.yaml b/clusters/cl01tl/manifests/immich/-.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/-.yaml @@ -0,0 +1 @@ + diff --git a/clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml deleted file mode 100644 index cfeec6e35..000000000 --- a/clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml +++ /dev/null @@ -1,68 +0,0 @@ -apiVersion: postgresql.cnpg.io/v1 -kind: Cluster -metadata: - name: immich-postgresql-18-cluster - namespace: immich - labels: - app.kubernetes.io/name: immich-postgresql-18-cluster - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm -spec: - instances: 3 - imageName: "ghcr.io/tensorchord/cloudnative-vectorchord:18.0-0.5.3" - imagePullPolicy: IfNotPresent - postgresUID: 26 - postgresGID: 26 - storage: - size: 10Gi - storageClass: local-path - walStorage: - size: 2Gi - storageClass: local-path - resources: - limits: - hugepages-2Mi: 256Mi - requests: - cpu: 20m - memory: 80Mi - affinity: - enablePodAntiAffinity: true - topologyKey: kubernetes.io/hostname - primaryUpdateMethod: switchover - primaryUpdateStrategy: unsupervised - logLevel: info - enableSuperuserAccess: false - enablePDB: true - postgresql: - shared_preload_libraries: - - vchord.so - parameters: - hot_standby_feedback: "on" - max_slot_wal_keep_size: 2000MB - shared_buffers: 256MB - monitoring: - enablePodMonitor: true - disableDefaultQueries: false - plugins: - - name: barman-cloud.cloudnative-pg.io - enabled: true - isWALArchiver: true - parameters: - barmanObjectName: "immich-postgresql-18-backup-garage-local" - serverName: "immich-postgresql-18-backup-1" - bootstrap: - recovery: - database: app - source: immich-postgresql-18-backup-1 - externalClusters: - - name: immich-postgresql-18-backup-1 - plugin: - name: barman-cloud.cloudnative-pg.io - enabled: true - isWALArchiver: false - parameters: - barmanObjectName: "immich-postgresql-18-recovery" - serverName: immich-postgresql-18-backup-1 diff --git a/clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml b/clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml deleted file mode 100644 index 6eff7c092..000000000 --- a/clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml +++ /dev/null @@ -1,87 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: immich-valkey-init-scripts - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm -data: - init.sh: |- - #!/bin/sh - set -eu - - # Default config paths - VALKEY_CONFIG=${VALKEY_CONFIG_PATH:-/data/conf/valkey.conf} - - LOGFILE="/data/init.log" - DATA_DIR="/data/conf" - - # Logging function (outputs to stderr and file) - log() { - echo "$(date) $1" | tee -a "$LOGFILE" >&2 - } - - # Clean old log if requested - if [ "${KEEP_OLD_LOGS:-false}" != "true" ]; then - rm -f "$LOGFILE" - fi - - if [ -f "$LOGFILE" ]; then - log "Detected restart of this instance ($HOSTNAME)" - fi - - log "Creating configuration in $DATA_DIR..." - mkdir -p "$DATA_DIR" - rm -f "$VALKEY_CONFIG" - - - # Base valkey.conf - log "Generating base valkey.conf" - { - echo "port 6379" - echo "protected-mode no" - echo "bind * -::*" - echo "dir /data" - } >>"$VALKEY_CONFIG" - # Replica mode configuration - log "Configuring replication mode" - - # Use POD_INDEX from Kubernetes metadata - POD_INDEX=${POD_INDEX:-0} - IS_MASTER=false - - # Check if this is pod-0 (master) - if [ "$POD_INDEX" = "0" ]; then - IS_MASTER=true - log "This pod (index $POD_INDEX) is configured as MASTER" - else - log "This pod (index $POD_INDEX) is configured as REPLICA" - fi - - # Configure replica settings - if [ "$IS_MASTER" = "false" ]; then - MASTER_HOST="immich-valkey-0.immich-valkey-headless.immich.svc.cluster.local" - MASTER_PORT="6379" - - log "Configuring replica to follow master at $MASTER_HOST:$MASTER_PORT" - - { - echo "" - echo "# Replica Configuration" - echo "replicaof $MASTER_HOST $MASTER_PORT" - echo "replica-announce-ip immich-valkey-$POD_INDEX.immich-valkey-headless.immich.svc.cluster.local" - } >>"$VALKEY_CONFIG" - fi - - # Append extra configs if present - if [ -f /usr/local/etc/valkey/valkey.conf ]; then - log "Appending /usr/local/etc/valkey/valkey.conf" - cat /usr/local/etc/valkey/valkey.conf >>"$VALKEY_CONFIG" - fi - if [ -d /extravalkeyconfigs ]; then - log "Appending files in /extravalkeyconfigs/" - cat /extravalkeyconfigs/* >>"$VALKEY_CONFIG" - fi diff --git a/clusters/cl01tl/manifests/immich/Deployment-immich.yaml b/clusters/cl01tl/manifests/immich/Deployment-immich.yaml deleted file mode 100644 index 545ba0eea..000000000 --- a/clusters/cl01tl/manifests/immich/Deployment-immich.yaml +++ /dev/null @@ -1,125 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: immich - labels: - app.kubernetes.io/controller: main - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - helm.sh/chart: immich-4.6.2 - namespace: immich -spec: - revisionHistoryLimit: 3 - replicas: 1 - strategy: - type: Recreate - selector: - matchLabels: - app.kubernetes.io/controller: main - app.kubernetes.io/name: immich - app.kubernetes.io/instance: immich - template: - metadata: - annotations: - checksum/secrets: 46a3f57ca394cccffc419e0c17f5d5f366374b0651c02c507636c53c0b5f33e6 - labels: - app.kubernetes.io/controller: main - app.kubernetes.io/instance: immich - app.kubernetes.io/name: immich - spec: - enableServiceLinks: false - serviceAccountName: immich - automountServiceAccountToken: true - hostIPC: false - hostNetwork: false - hostPID: false - dnsPolicy: ClusterFirst - containers: - - env: - - name: TZ - value: America/Chicago - - name: IMMICH_TELEMETRY_INCLUDE - value: all - - name: IMMICH_CONFIG_FILE - value: /config/immich.json - - name: REDIS_HOSTNAME - value: immich-valkey - - name: DB_VECTOR_EXTENSION - value: vectorchord - - name: DB_HOSTNAME - valueFrom: - secretKeyRef: - key: host - name: immich-postgresql-18-cluster-app - - name: DB_DATABASE_NAME - valueFrom: - secretKeyRef: - key: dbname - name: immich-postgresql-18-cluster-app - - name: DB_PORT - valueFrom: - secretKeyRef: - key: port - name: immich-postgresql-18-cluster-app - - name: DB_USERNAME - valueFrom: - secretKeyRef: - key: user - name: immich-postgresql-18-cluster-app - - name: DB_PASSWORD - valueFrom: - secretKeyRef: - key: password - name: immich-postgresql-18-cluster-app - image: ghcr.io/immich-app/immich-server:v2.7.5@sha256:c15bff75068effb03f4355997d03dc7e0fc58720c2b54ad6f7f10d1bc57efaa5 - livenessProbe: - failureThreshold: 3 - httpGet: - path: /api/server/ping - port: 2283 - initialDelaySeconds: 0 - periodSeconds: 10 - timeoutSeconds: 1 - name: main - readinessProbe: - failureThreshold: 3 - httpGet: - path: /api/server/ping - port: 2283 - initialDelaySeconds: 0 - periodSeconds: 10 - timeoutSeconds: 1 - resources: - limits: - gpu.intel.com/i915: 1 - requests: - cpu: 10m - gpu.intel.com/i915: 1 - memory: 500Mi - startupProbe: - failureThreshold: 30 - httpGet: - path: /api/server/ping - port: 2283 - initialDelaySeconds: 0 - periodSeconds: 10 - timeoutSeconds: 1 - volumeMounts: - - mountPath: /config/immich.json - mountPropagation: None - name: config - readOnly: true - subPath: immich.json - - mountPath: /usr/src/app/upload - name: data - volumes: - - csi: - driver: secrets-store.csi.k8s.io - readOnly: true - volumeAttributes: - secretProviderClass: immich-config - name: config - - name: data - persistentVolumeClaim: - claimName: immich diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml deleted file mode 100644 index f0fd469dc..000000000 --- a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml +++ /dev/null @@ -1,47 +0,0 @@ -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: immich-backup-secret-external - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup-secret-external -spec: - secretStoreRef: - kind: ClusterSecretStore - name: openbao - target: - template: - mergePolicy: Merge - engineVersion: v2 - data: - RESTIC_REPOSITORY: "s3:{{ .ENDPOINT }}/{{ .BUCKET }}/cl01tl/immich/immich" - data: - - secretKey: ENDPOINT - remoteRef: - key: /digital-ocean/config - property: ENDPOINT - - secretKey: BUCKET - remoteRef: - key: /digital-ocean/home-infra/volsync-backups - property: BUCKET - - secretKey: RESTIC_PASSWORD - remoteRef: - key: /digital-ocean/home-infra/volsync-backups - property: RESTIC_PASSWORD - - secretKey: AWS_DEFAULT_REGION - remoteRef: - key: /digital-ocean/home-infra/volsync-backups - property: AWS_REGION - - secretKey: AWS_ACCESS_KEY_ID - remoteRef: - key: /digital-ocean/home-infra/volsync-backups - property: AWS_ACCESS_KEY_ID - - secretKey: AWS_SECRET_ACCESS_KEY - remoteRef: - key: /digital-ocean/home-infra/volsync-backups - property: AWS_SECRET_ACCESS_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml deleted file mode 100644 index 0ee9fe200..000000000 --- a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml +++ /dev/null @@ -1,47 +0,0 @@ -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: immich-backup-secret-local - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup-secret-local -spec: - secretStoreRef: - kind: ClusterSecretStore - name: openbao - target: - template: - mergePolicy: Merge - engineVersion: v2 - data: - RESTIC_REPOSITORY: "s3:{{ .ENDPOINT }}/{{ .BUCKET }}/cl01tl/immich/immich" - data: - - secretKey: ENDPOINT - remoteRef: - key: /garage/config - property: ENDPOINT_LOCAL - - secretKey: BUCKET - remoteRef: - key: /garage/home-infra/volsync-backups - property: BUCKET - - secretKey: RESTIC_PASSWORD - remoteRef: - key: /garage/home-infra/volsync-backups - property: RESTIC_PASSWORD_LOCAL - - secretKey: AWS_DEFAULT_REGION - remoteRef: - key: /garage/home-infra/volsync-backups - property: ACCESS_REGION - - secretKey: AWS_ACCESS_KEY_ID - remoteRef: - key: /garage/home-infra/volsync-backups - property: ACCESS_KEY_ID - - secretKey: AWS_SECRET_ACCESS_KEY - remoteRef: - key: /garage/home-infra/volsync-backups - property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml deleted file mode 100644 index 6c944429c..000000000 --- a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml +++ /dev/null @@ -1,47 +0,0 @@ -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: immich-backup-secret-remote - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup-secret-remote -spec: - secretStoreRef: - kind: ClusterSecretStore - name: openbao - target: - template: - mergePolicy: Merge - engineVersion: v2 - data: - RESTIC_REPOSITORY: "s3:{{ .ENDPOINT }}/{{ .BUCKET }}/cl01tl/immich/immich" - data: - - secretKey: ENDPOINT - remoteRef: - key: /garage/config - property: ENDPOINT_REMOTE - - secretKey: BUCKET - remoteRef: - key: /garage/home-infra/volsync-backups - property: BUCKET - - secretKey: RESTIC_PASSWORD - remoteRef: - key: /garage/home-infra/volsync-backups - property: RESTIC_PASSWORD_REMOTE - - secretKey: AWS_DEFAULT_REGION - remoteRef: - key: /garage/home-infra/volsync-backups - property: ACCESS_REGION - - secretKey: AWS_ACCESS_KEY_ID - remoteRef: - key: /garage/home-infra/volsync-backups - property: ACCESS_KEY_ID - - secretKey: AWS_SECRET_ACCESS_KEY - remoteRef: - key: /garage/home-infra/volsync-backups - property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml deleted file mode 100644 index 872987bdb..000000000 --- a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: immich-postgresql-18-backup-garage-local-secret - namespace: immich - labels: - app.kubernetes.io/name: immich-postgresql-18-backup-garage-local-secret - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm -spec: - secretStoreRef: - kind: ClusterSecretStore - name: openbao - data: - - secretKey: ACCESS_REGION - remoteRef: - key: /garage/home-infra/postgres-backups - property: ACCESS_REGION - - secretKey: ACCESS_KEY_ID - remoteRef: - key: /garage/home-infra/postgres-backups - property: ACCESS_KEY_ID - - secretKey: ACCESS_SECRET_KEY - remoteRef: - key: /garage/home-infra/postgres-backups - property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml deleted file mode 100644 index 4032effcb..000000000 --- a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: immich-postgresql-18-recovery-secret - namespace: immich - labels: - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-postgresql-18-recovery-secret -spec: - secretStoreRef: - kind: ClusterSecretStore - name: openbao - data: - - secretKey: ACCESS_REGION - remoteRef: - key: /garage/home-infra/postgres-backups - property: ACCESS_REGION - - secretKey: ACCESS_KEY_ID - remoteRef: - key: /garage/home-infra/postgres-backups - property: ACCESS_KEY_ID - - secretKey: ACCESS_SECRET_KEY - remoteRef: - key: /garage/home-infra/postgres-backups - property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml b/clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml deleted file mode 100644 index b8475cbe9..000000000 --- a/clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: immich - labels: - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - helm.sh/chart: immich-4.6.2 - namespace: immich -spec: - parentRefs: - - group: gateway.networking.k8s.io - kind: Gateway - name: traefik-gateway - namespace: traefik - hostnames: - - "immich.alexlebens.net" - rules: - - backendRefs: - - group: "" - kind: Service - name: immich - namespace: immich - port: 2283 - weight: 1 - matches: - - path: - type: PathPrefix - value: / diff --git a/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml b/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml deleted file mode 100644 index 2a0852f0c..000000000 --- a/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml +++ /dev/null @@ -1,33 +0,0 @@ -apiVersion: barmancloud.cnpg.io/v1 -kind: ObjectStore -metadata: - name: immich-postgresql-18-backup-garage-local - namespace: immich - labels: - app.kubernetes.io/name: immich-postgresql-18-backup-garage-local - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm -spec: - retentionPolicy: 7d - instanceSidecarConfiguration: - env: - - name: AWS_REQUEST_CHECKSUM_CALCULATION - value: when_required - - name: AWS_RESPONSE_CHECKSUM_VALIDATION - value: when_required - configuration: - destinationPath: s3://postgres-backups/cl01tl/immich/immich-postgresql-18-cluster - endpointURL: http://garage-main.garage:3900 - s3Credentials: - accessKeyId: - name: immich-postgresql-18-backup-garage-local-secret - key: ACCESS_KEY_ID - secretAccessKey: - name: immich-postgresql-18-backup-garage-local-secret - key: ACCESS_SECRET_KEY - region: - name: immich-postgresql-18-backup-garage-local-secret - key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml b/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml deleted file mode 100644 index 2c0f41c0e..000000000 --- a/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: barmancloud.cnpg.io/v1 -kind: ObjectStore -metadata: - name: "immich-postgresql-18-recovery" - namespace: immich - labels: - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: "immich-postgresql-18-recovery" -spec: - configuration: - destinationPath: s3://postgres-backups/cl01tl/immich/immich-postgresql-18-cluster - endpointURL: http://garage-main.garage:3900 - wal: - compression: snappy - maxParallel: 1 - data: - compression: snappy - jobs: 1 - s3Credentials: - accessKeyId: - name: immich-postgresql-18-recovery-secret - key: ACCESS_KEY_ID - secretAccessKey: - name: immich-postgresql-18-recovery-secret - key: ACCESS_SECRET_KEY - region: - name: immich-postgresql-18-recovery-secret - key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml b/clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml deleted file mode 100644 index 0ad202dd2..000000000 --- a/clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml +++ /dev/null @@ -1,19 +0,0 @@ -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: immich - labels: - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - helm.sh/chart: immich-4.6.2 - annotations: - helm.sh/resource-policy: keep - namespace: immich -spec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: "50Gi" - storageClassName: "ceph-block" diff --git a/clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml deleted file mode 100644 index de18b8537..000000000 --- a/clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PodMonitor -metadata: - name: immich-valkey - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/part-of: valkey - app.kubernetes.io/component: podmonitor -spec: - podMetricsEndpoints: - - port: metrics - interval: 30s - namespaceSelector: - matchNames: - - immich - selector: - matchLabels: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml deleted file mode 100644 index a476eddd7..000000000 --- a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: immich-backup-source-local - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup-source-local -spec: - groups: - - name: volsync.alerts - rules: - - alert: VolSyncBackupPodFailed - expr: | - (kube_pod_container_status_last_terminated_exitcode > 0) - * on(pod, namespace) group_left(owner_name) - kube_pod_owner{owner_kind="Job", owner_name=~"volsync-.*"} - for: 1m - labels: - severity: critical - annotations: - summary: "VolSync Backup Pod failed in {{ $labels.namespace }}" - description: | - A pod for the VolSync backup of PVC 'immich' failed with exit code {{ $value }}. - Job: {{ $labels.owner_name }} - Namespace: {{ $labels.namespace }} diff --git a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml deleted file mode 100644 index 6d5b90393..000000000 --- a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml +++ /dev/null @@ -1,270 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: immich-postgresql-18-alert-rules - namespace: immich - labels: - app.kubernetes.io/name: immich-postgresql-18-alert-rules - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm -spec: - groups: - - name: cloudnative-pg/immich-postgresql-18 - rules: - - alert: CNPGClusterBackendsWaitingWarning - annotations: - summary: CNPG Cluster a backend is waiting for longer than 5 minutes. - description: |- - Pod {{ $labels.pod }} - has been waiting for longer than 5 minutes - expr: | - cnpg_backends_waiting_total{namespace="immich"} > 300 - for: 1m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterDatabaseDeadlockConflictsWarning - annotations: - summary: CNPG Cluster has over 10 deadlock conflicts. - description: |- - There are over 10 deadlock conflicts in - {{ $labels.pod }} - expr: | - cnpg_pg_stat_database_deadlocks{namespace="immich"} > 10 - for: 1m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterHACritical - annotations: - summary: CNPG Cluster has no standby replicas! - description: |- - CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe - risk of data loss and downtime if the primary instance fails. - - The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint - will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. - - This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less - instances. The replaced instance may need some time to catch-up with the cluster primary instance. - - This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this - case you may want to silence it. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md - expr: | - max by (job) (cnpg_pg_replication_streaming_replicas{namespace="immich"} - cnpg_pg_replication_is_wal_receiver_up{namespace="immich"}) < 1 - for: 5m - labels: - severity: critical - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterHAWarning - annotations: - summary: CNPG Cluster less than 2 standby replicas. - description: |- - CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting - your cluster at risk if another instance fails. The cluster is still able to operate normally, although - the `-ro` and `-r` endpoints operate at reduced capacity. - - This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may - need some time to catch-up with the cluster primary instance. - - This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. - In this case you may want to silence it. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md - expr: | - max by (job) (cnpg_pg_replication_streaming_replicas{namespace="immich"} - cnpg_pg_replication_is_wal_receiver_up{namespace="immich"}) < 2 - for: 5m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterHighConnectionsCritical - annotations: - summary: CNPG Instance maximum number of connections critical! - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of - the maximum number of connections. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md - expr: | - sum by (pod) (cnpg_backends_total{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 - for: 5m - labels: - severity: critical - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterHighConnectionsWarning - annotations: - summary: CNPG Instance is approaching the maximum number of connections. - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of - the maximum number of connections. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md - expr: | - sum by (pod) (cnpg_backends_total{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 - for: 5m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterHighReplicationLag - annotations: - summary: CNPG Cluster high replication lag - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" is experiencing a high replication lag of - {{`{{`}} $value {{`}}`}}ms. - - High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md - expr: | - max(cnpg_pg_replication_lag{namespace="immich",pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 - for: 5m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterInstancesOnSameNode - annotations: - summary: CNPG Cluster instances are located on the same node. - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} - instances on the same node {{`{{`}} $labels.node {{`}}`}}. - - A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md - expr: | - count by (node) (kube_pod_info{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 - for: 5m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterLongRunningTransactionWarning - annotations: - summary: CNPG Cluster query is taking longer than 5 minutes. - description: |- - CloudNativePG Cluster Pod {{ $labels.pod }} - is taking more than 5 minutes (300 seconds) for a query. - expr: |- - cnpg_backends_max_tx_duration_seconds{namespace="immich"} > 300 - for: 1m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterLowDiskSpaceCritical - annotations: - summary: CNPG Instance is running out of disk space! - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md - expr: | - max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR - max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR - max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) - / - sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) - * - on(namespace, persistentvolumeclaim) group_left(volume) - kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} - ) > 0.9 - for: 5m - labels: - severity: critical - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterLowDiskSpaceWarning - annotations: - summary: CNPG Instance is running out of disk space. - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" is running low on disk space. Check attached PVCs. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md - expr: | - max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR - max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR - max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) - / - sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) - * - on(namespace, persistentvolumeclaim) group_left(volume) - kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} - ) > 0.7 - for: 5m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterOffline - annotations: - summary: CNPG Cluster has no running instances! - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" has no ready instances. - - Having an offline cluster means your applications will not be able to access the database, leading to - potential service disruption and/or data loss. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md - expr: | - (count(cnpg_collector_up{namespace="immich",pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 - for: 5m - labels: - severity: critical - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterPGDatabaseXidAgeWarning - annotations: - summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. - description: |- - Over 300,000,000 transactions from frozen xid - on pod {{ $labels.pod }} - expr: | - cnpg_pg_database_xid_age{namespace="immich"} > 300000000 - for: 1m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterPGReplicationWarning - annotations: - summary: CNPG Cluster standby is lagging behind the primary. - description: |- - Standby is lagging behind by over 300 seconds (5 minutes) - expr: | - cnpg_pg_replication_lag{namespace="immich"} > 300 - for: 1m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterReplicaFailingReplicationWarning - annotations: - summary: CNPG Cluster has a replica is failing to replicate. - description: |- - Replica {{ $labels.pod }} - is failing to replicate - expr: | - cnpg_pg_replication_in_recovery{namespace="immich"} > cnpg_pg_replication_is_wal_receiver_up{namespace="immich"} - for: 1m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster - - alert: CNPGClusterZoneSpreadWarning - annotations: - summary: CNPG Cluster instances in the same zone. - description: |- - CloudNativePG Cluster "immich/immich-postgresql-18-cluster" has instances in the same availability zone. - - A disaster in one availability zone will lead to a potential service disruption and/or data loss. - runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md - expr: | - 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 - for: 5m - labels: - severity: warning - namespace: immich - cnpg_cluster: immich-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml deleted file mode 100644 index bc042f70f..000000000 --- a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml +++ /dev/null @@ -1,47 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: immich-valkey - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/part-of: valkey -spec: - groups: - - name: immich-valkey - rules: - - alert: ValkeyDown - annotations: - description: Valkey instance {{ $labels.instance }} is down. - summary: Valkey instance {{ $labels.instance }} down - expr: | - redis_up{service="immich-valkey-metrics"} == 0 - for: 2m - labels: - severity: error - - alert: ValkeyMemoryHigh - annotations: - description: | - Valkey instance {{ $labels.instance }} is using {{ $value }}% of its available memory. - summary: Valkey instance {{ $labels.instance }} is using too much memory - expr: | - redis_memory_used_bytes{service="immich-valkey-metrics"} * 100 - / - redis_memory_max_bytes{service="immich-valkey-metrics"} - > 90 <= 100 - for: 2m - labels: - severity: error - - alert: ValkeyKeyEviction - annotations: - description: | - Valkey instance {{ $labels.instance }} has evicted {{ $value }} keys in the last 5 minutes. - summary: Valkey instance {{ $labels.instance }} has evicted keys - expr: | - increase(redis_evicted_keys_total{service="immich-valkey-metrics"}[5m]) > 0 - for: 1s - labels: - severity: error diff --git a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml deleted file mode 100644 index 6de3c1df5..000000000 --- a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: volsync.backube/v1alpha1 -kind: ReplicationSource -metadata: - name: immich-backup-source-external - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup -spec: - sourcePVC: immich - trigger: - schedule: 24 10 * * * - restic: - pruneIntervalDays: 7 - repository: immich-backup-secret-external - retain: - daily: 7 - hourly: 0 - monthly: 3 - weekly: 4 - yearly: 1 - copyMethod: Snapshot - storageClassName: ceph-block - volumeSnapshotClassName: ceph-blockpool-snapshot - cacheCapacity: 10Gi diff --git a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml deleted file mode 100644 index 248145b53..000000000 --- a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: volsync.backube/v1alpha1 -kind: ReplicationSource -metadata: - name: immich-backup-source-local - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup-source-local -spec: - sourcePVC: immich - trigger: - schedule: 24 8 * * * - restic: - pruneIntervalDays: 7 - repository: immich-backup-secret-local - retain: - daily: 7 - hourly: 0 - monthly: 3 - weekly: 4 - yearly: 1 - copyMethod: Snapshot - storageClassName: ceph-block - volumeSnapshotClassName: ceph-blockpool-snapshot - cacheCapacity: 10Gi diff --git a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml deleted file mode 100644 index 970c20e74..000000000 --- a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: volsync.backube/v1alpha1 -kind: ReplicationSource -metadata: - name: immich-backup-source-remote - namespace: immich - labels: - helm.sh/chart: volsync-target-data-1.1.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "1.1.1" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich-backup -spec: - sourcePVC: immich - trigger: - schedule: 24 9 * * * - restic: - pruneIntervalDays: 7 - repository: immich-backup-secret-remote - retain: - daily: 7 - hourly: 0 - monthly: 3 - weekly: 4 - yearly: 1 - copyMethod: Snapshot - storageClassName: ceph-block - volumeSnapshotClassName: ceph-blockpool-snapshot - cacheCapacity: 10Gi diff --git a/clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml b/clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml deleted file mode 100644 index 6f18a8a5b..000000000 --- a/clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: postgresql.cnpg.io/v1 -kind: ScheduledBackup -metadata: - name: "immich-postgresql-18-scheduled-backup-live-backup" - namespace: immich - labels: - app.kubernetes.io/name: "immich-postgresql-18-scheduled-backup-live-backup" - helm.sh/chart: postgres-18-cluster-7.12.1 - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich - app.kubernetes.io/version: "7.12.1" - app.kubernetes.io/managed-by: Helm -spec: - immediate: true - suspend: false - schedule: "0 40 14 * * *" - backupOwnerReference: self - cluster: - name: immich-postgresql-18-cluster - method: plugin - pluginConfiguration: - name: barman-cloud.cloudnative-pg.io - parameters: - barmanObjectName: "immich-postgresql-18-backup-garage-local" diff --git a/clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml b/clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml deleted file mode 100644 index 2628b39fd..000000000 --- a/clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Secret -type: kubernetes.io/service-account-token -metadata: - name: immich-immich-sa-token - labels: - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - helm.sh/chart: immich-4.6.2 - annotations: - kubernetes.io/service-account.name: immich - namespace: immich diff --git a/clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml b/clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml deleted file mode 100644 index d1c78ea57..000000000 --- a/clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: secrets-store.csi.x-k8s.io/v1 -kind: SecretProviderClass -metadata: - name: immich-config - namespace: immich - labels: - app.kubernetes.io/name: immich-config - app.kubernetes.io/instance: immich - app.kubernetes.io/part-of: immich -spec: - provider: openbao - parameters: - baoAddress: "http://openbao-internal.openbao:8200" - roleName: immich - objects: | - - objectName: immich.json - fileName: immich.json - secretPath: secret/data/cl01tl/immich/config - secretKey: immich.json diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml deleted file mode 100644 index 1b5dcb00a..000000000 --- a/clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: immich-valkey-headless - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: headless -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - name: tcp - port: 6379 - targetPort: tcp - protocol: TCP - selector: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml deleted file mode 100644 index ff6de0460..000000000 --- a/clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: immich-valkey-metrics - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: valkey - annotations: -spec: - type: ClusterIP - ports: - - name: metrics - port: 9121 - protocol: TCP - targetPort: metrics - selector: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml deleted file mode 100644 index e91c2856b..000000000 --- a/clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: immich-valkey-read - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: read -spec: - type: ClusterIP - ports: - - name: tcp - port: 6379 - targetPort: tcp - protocol: TCP - selector: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml deleted file mode 100644 index 1f4852319..000000000 --- a/clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: immich-valkey - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: primary -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: tcp - protocol: TCP - name: tcp - selector: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - statefulset.kubernetes.io/pod-name: immich-valkey-0 diff --git a/clusters/cl01tl/manifests/immich/Service-immich.yaml b/clusters/cl01tl/manifests/immich/Service-immich.yaml deleted file mode 100644 index fb0158a83..000000000 --- a/clusters/cl01tl/manifests/immich/Service-immich.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: immich - labels: - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - app.kubernetes.io/service: immich - helm.sh/chart: immich-4.6.2 - namespace: immich -spec: - type: ClusterIP - ports: - - port: 2283 - targetPort: 2283 - protocol: TCP - name: http - - port: 8081 - targetPort: 8081 - protocol: TCP - name: metrics-api - - port: 8082 - targetPort: 8082 - protocol: TCP - name: metrics-ms - selector: - app.kubernetes.io/controller: main - app.kubernetes.io/instance: immich - app.kubernetes.io/name: immich diff --git a/clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml deleted file mode 100644 index d1e7d95df..000000000 --- a/clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: immich-valkey - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm -automountServiceAccountToken: false diff --git a/clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml b/clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml deleted file mode 100644 index 4c94fbe57..000000000 --- a/clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: immich - labels: - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - helm.sh/chart: immich-4.6.2 - namespace: immich -secrets: - - name: immich-immich-sa-token diff --git a/clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml deleted file mode 100644 index 631397d43..000000000 --- a/clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: immich-valkey - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/part-of: valkey - app.kubernetes.io/component: service-monitor -spec: - endpoints: - - port: metrics - interval: 30s - namespaceSelector: - matchNames: - - immich - selector: - matchLabels: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/component: metrics diff --git a/clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml b/clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml deleted file mode 100644 index 2e4deea17..000000000 --- a/clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: immich - labels: - app.kubernetes.io/instance: immich - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: immich - helm.sh/chart: immich-4.6.2 - namespace: immich -spec: - jobLabel: immich - namespaceSelector: - matchNames: - - immich - selector: - matchLabels: - app.kubernetes.io/instance: immich - app.kubernetes.io/name: immich - endpoints: - - interval: 3m - path: /metrics - port: metrics-api - scrapeTimeout: 1m - - interval: 3m - path: /metrics - port: metrics-ms - scrapeTimeout: 1m diff --git a/clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml deleted file mode 100644 index 993c7cda2..000000000 --- a/clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml +++ /dev/null @@ -1,133 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: immich-valkey - labels: - helm.sh/chart: valkey-0.9.4 - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - app.kubernetes.io/version: "9.0.3" - app.kubernetes.io/managed-by: Helm -spec: - serviceName: immich-valkey-headless - replicas: 3 - podManagementPolicy: OrderedReady - selector: - matchLabels: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - volumeClaimTemplates: - - metadata: - name: valkey-data - spec: - accessModes: - - ReadWriteOnce - storageClassName: "ceph-block" - resources: - requests: - storage: "1Gi" - template: - metadata: - labels: - app.kubernetes.io/name: valkey - app.kubernetes.io/instance: immich - annotations: - checksum/initconfig: "2d8432be19db9efa32b993becf4e58d4" - spec: - automountServiceAccountToken: false - serviceAccountName: immich-valkey - securityContext: - fsGroup: 1000 - runAsGroup: 1000 - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - initContainers: - - name: immich-valkey-init - image: docker.io/valkey/valkey:9.0.3@sha256:3b55fbaa0cd93cf0d9d961f405e4dfcc70efe325e2d84da207a0a8e6d8fde4f9 - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - command: ["/scripts/init.sh"] - env: - - name: POD_INDEX - valueFrom: - fieldRef: - fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] - volumeMounts: - - name: valkey-data - mountPath: /data - - name: scripts - mountPath: /scripts - containers: - - name: immich-valkey - image: docker.io/valkey/valkey:9.0.3@sha256:3b55fbaa0cd93cf0d9d961f405e4dfcc70efe325e2d84da207a0a8e6d8fde4f9 - imagePullPolicy: IfNotPresent - command: ["valkey-server"] - args: ["/data/conf/valkey.conf"] - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - env: - - name: POD_INDEX - valueFrom: - fieldRef: - fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] - - name: VALKEY_LOGLEVEL - value: "notice" - ports: - - name: tcp - containerPort: 6379 - protocol: TCP - startupProbe: - exec: - command: ["sh", "-c", "valkey-cli ping"] - livenessProbe: - exec: - command: ["sh", "-c", "valkey-cli ping"] - resources: - requests: - cpu: 10m - memory: 20Mi - volumeMounts: - - name: valkey-data - mountPath: /data - - name: metrics - image: ghcr.io/oliver006/redis_exporter:v1.83.0@sha256:e8c209894d4c0cc55b1259ddd47e0b769ad1ff864b356736ee885462a3b0e48c - imagePullPolicy: "IfNotPresent" - ports: - - name: metrics - containerPort: 9121 - startupProbe: - tcpSocket: - port: metrics - livenessProbe: - tcpSocket: - port: metrics - readinessProbe: - httpGet: - path: / - port: metrics - resources: - requests: - cpu: 1m - memory: 10M - env: - - name: REDIS_ALIAS - value: immich-valkey - volumes: - - name: scripts - configMap: - name: immich-valkey-init-scripts - defaultMode: 0555 -- 2.49.1 From 38dfd84ce4d470061f6e646fd985bef319c3127d Mon Sep 17 00:00:00 2001 From: gitea-bot Date: Sun, 3 May 2026 00:44:10 +0000 Subject: [PATCH 2/5] chore: Update manifests after change --- clusters/cl01tl/manifests/whodb/Deployment-whodb.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/cl01tl/manifests/whodb/Deployment-whodb.yaml b/clusters/cl01tl/manifests/whodb/Deployment-whodb.yaml index 500bc81c9..8c7d88bb0 100644 --- a/clusters/cl01tl/manifests/whodb/Deployment-whodb.yaml +++ b/clusters/cl01tl/manifests/whodb/Deployment-whodb.yaml @@ -39,7 +39,7 @@ spec: value: ollama-server-2.ollama - name: WHODB_OLLAMA_PORT value: "11434" - image: clidey/whodb:0.106.0@sha256:f872bfcdf2f1cd6d9e97fa4c5d8dd521636bea1bfc0efe0a27ab6c9c11137010 + image: clidey/whodb:0.107.0@sha256:20fb865bd0972bf56600c9e16715a3b5f30fef77defc19d74efcf72218d5f567 name: main resources: requests: -- 2.49.1 From 96e1ea13fcaf62e91cdbcf267210c7abe6cdf1c8 Mon Sep 17 00:00:00 2001 From: gitea-bot Date: Sun, 3 May 2026 00:44:49 +0000 Subject: [PATCH 3/5] chore: Update manifests after change --- .../loki/ClusterRole-loki-clusterrole.yaml | 4 +- ...erRoleBinding-loki-clusterrolebinding.yaml | 4 +- .../loki/ConfigMap-loki-gateway.yaml | 193 +++++++++++++++++- .../loki/ConfigMap-loki-runtime.yaml | 4 +- .../cl01tl/manifests/loki/ConfigMap-loki.yaml | 27 ++- .../manifests/loki/DaemonSet-loki-canary.yaml | 51 +++-- .../loki/Deployment-loki-gateway.yaml | 67 +++++- .../loki/HTTPRoute-loki-gateway.yaml | 30 +++ .../manifests/loki/Pod-loki-helm-test.yaml | 27 --- .../manifests/loki/Service-loki-canary.yaml | 4 +- .../loki/Service-loki-chunks-cache.yaml | 4 +- .../loki/Service-loki-gateway-exporter.yaml | 23 +++ .../manifests/loki/Service-loki-gateway.yaml | 8 +- .../manifests/loki/Service-loki-headless.yaml | 20 +- .../loki/Service-loki-memberlist.yaml | 4 +- .../loki/Service-loki-results-cache.yaml | 4 +- .../cl01tl/manifests/loki/Service-loki.yaml | 16 +- .../loki/ServiceAccount-loki-canary.yaml | 6 +- .../loki/ServiceAccount-loki-gateway.yaml | 12 ++ .../loki/ServiceAccount-loki-memcached.yaml | 12 ++ .../manifests/loki/ServiceAccount-loki.yaml | 4 +- .../loki/StatefulSet-loki-chunks-cache.yaml | 18 +- .../loki/StatefulSet-loki-results-cache.yaml | 18 +- .../manifests/loki/StatefulSet-loki.yaml | 128 ++++++++---- 24 files changed, 554 insertions(+), 134 deletions(-) create mode 100644 clusters/cl01tl/manifests/loki/HTTPRoute-loki-gateway.yaml delete mode 100644 clusters/cl01tl/manifests/loki/Pod-loki-helm-test.yaml create mode 100644 clusters/cl01tl/manifests/loki/Service-loki-gateway-exporter.yaml create mode 100644 clusters/cl01tl/manifests/loki/ServiceAccount-loki-gateway.yaml create mode 100644 clusters/cl01tl/manifests/loki/ServiceAccount-loki-memcached.yaml diff --git a/clusters/cl01tl/manifests/loki/ClusterRole-loki-clusterrole.yaml b/clusters/cl01tl/manifests/loki/ClusterRole-loki-clusterrole.yaml index 3555cc833..52316c621 100644 --- a/clusters/cl01tl/manifests/loki/ClusterRole-loki-clusterrole.yaml +++ b/clusters/cl01tl/manifests/loki/ClusterRole-loki-clusterrole.yaml @@ -2,10 +2,10 @@ kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" name: loki-clusterrole rules: - apiGroups: [""] diff --git a/clusters/cl01tl/manifests/loki/ClusterRoleBinding-loki-clusterrolebinding.yaml b/clusters/cl01tl/manifests/loki/ClusterRoleBinding-loki-clusterrolebinding.yaml index 915ce6ad7..27b30648e 100644 --- a/clusters/cl01tl/manifests/loki/ClusterRoleBinding-loki-clusterrolebinding.yaml +++ b/clusters/cl01tl/manifests/loki/ClusterRoleBinding-loki-clusterrolebinding.yaml @@ -3,10 +3,10 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: loki-clusterrolebinding labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" subjects: - kind: ServiceAccount name: loki diff --git a/clusters/cl01tl/manifests/loki/ConfigMap-loki-gateway.yaml b/clusters/cl01tl/manifests/loki/ConfigMap-loki-gateway.yaml index 5c9fd27ee..34097ed3f 100644 --- a/clusters/cl01tl/manifests/loki/ConfigMap-loki-gateway.yaml +++ b/clusters/cl01tl/manifests/loki/ConfigMap-loki-gateway.yaml @@ -4,10 +4,197 @@ metadata: name: loki-gateway namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: gateway data: - nginx.conf: "worker_processes 5; ## Default: 1\nerror_log /dev/stderr;\npid /tmp/nginx.pid;\nworker_rlimit_nofile 8192;\n\nevents {\n worker_connections 4096; ## Default: 1024\n}\n\nhttp {\n client_body_temp_path /tmp/client_temp;\n proxy_temp_path /tmp/proxy_temp_path;\n fastcgi_temp_path /tmp/fastcgi_temp;\n uwsgi_temp_path /tmp/uwsgi_temp;\n scgi_temp_path /tmp/scgi_temp;\n\n client_max_body_size 4M;\n\n proxy_read_timeout 600; ## 10 minutes\n proxy_send_timeout 600;\n proxy_connect_timeout 600;\n\n proxy_http_version 1.1;\n\n default_type application/octet-stream;\n log_format main '$remote_addr - $remote_user [$time_local] $status '\n '\"$request\" $body_bytes_sent \"$http_referer\" '\n '\"$http_user_agent\" \"$http_x_forwarded_for\"';\n access_log /dev/stderr main;\n\n sendfile on;\n tcp_nopush on;\n resolver kube-dns.kube-system.svc.cluster.local.;\n\n # if the X-Query-Tags header is empty, set a noop= without a value as empty values are not logged\n map $http_x_query_tags $query_tags {\n \"\" \"noop=\"; # When header is empty, set noop=\n default $http_x_query_tags; # Otherwise, preserve the original value\n }\n\n server {\n listen 8080;\n listen [::]:8080;\n\n location = / {\n \n return 200 'OK';\n auth_basic off;\n }\n\n ########################################################\n # Configure backend targets\n location ^~ /ui {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # Distributor\n location = /api/prom/push {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/push {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /distributor/ring {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /otlp/v1/logs {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # Ingester\n location = /flush {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location ^~ /ingester/ {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /ingester {\n \n internal; # to suppress 301\n }\n\n # Ring\n location = /ring {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # MemberListKV\n location = /memberlist {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # Ruler\n location = /ruler/ring {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /api/prom/rules {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location ^~ /api/prom/rules/ {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/rules {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location ^~ /loki/api/v1/rules/ {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /prometheus/api/v1/alerts {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /prometheus/api/v1/rules {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # Compactor\n location = /compactor/ring {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/delete {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/cache/generation_numbers {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # IndexGateway\n location = /indexgateway/ring {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # QueryScheduler\n location = /scheduler/ring {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n # Config\n location = /config {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n\n\n # QueryFrontend, Querier\n location = /api/prom/tail {\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/tail {\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location ^~ /api/prom/ {\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /api/prom {\n \n internal; # to suppress 301\n }\n location ^~ /loki/api/v1/ {\n # pass custom headers set by Grafana as X-Query-Tags which are logged as key/value pairs in metrics.go log messages\n proxy_set_header X-Query-Tags \"${query_tags},user=${http_x_grafana_user},dashboard_id=${http_x_dashboard_uid},dashboard_title=${http_x_dashboard_title},panel_id=${http_x_panel_id},panel_title=${http_x_panel_title},source_rule_uid=${http_x_rule_uid},rule_name=${http_x_rule_name},rule_folder=${http_x_rule_folder},rule_version=${http_x_rule_version},rule_source=${http_x_rule_source},rule_type=${http_x_rule_type}\";\n \n proxy_pass http://loki.loki.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1 {\n \n internal; # to suppress 301\n }\n }\n}\n" + nginx.conf: "worker_processes 5; ## Default: 1\nerror_log /dev/stderr;\npid /tmp/nginx.pid;\nworker_rlimit_nofile 8192;\n\nevents {\n worker_connections 4096; ## Default: 1024\n}\n\nhttp {\n client_body_temp_path /tmp/client_temp;\n proxy_temp_path /tmp/proxy_temp_path;\n fastcgi_temp_path /tmp/fastcgi_temp;\n uwsgi_temp_path /tmp/uwsgi_temp;\n scgi_temp_path /tmp/scgi_temp;\n\n client_max_body_size 4M;\n\n proxy_read_timeout 600; ## 10 minutes\n proxy_send_timeout 600;\n proxy_connect_timeout 600;\n\n proxy_http_version 1.1;\n\n default_type application/octet-stream;\n log_format main '$remote_addr - $remote_user [$time_local] $status '\n '\"$request\" $body_bytes_sent \"$http_referer\" '\n '\"$http_user_agent\" \"$http_x_forwarded_for\"';\n # Exclude specific requests from logging\n map $request_uri $track {\n default 1;\n ~^/$ 0;\n ~^/health 0;\n ~^/metrics 0;\n }\n\n # simple_upstream preset\n log_format access_log_exporter '$http_host\\t$request_method\\t$status\\t$request_completion\\t$request_time\\t$request_length\\t$bytes_sent\\t$upstream_addr\\t$upstream_connect_time\\t$upstream_header_time\\t$upstream_response_time\\t$request_uri';\n access_log syslog:server=127.0.0.1:8514,nohostname access_log_exporter if=$track;\n access_log /dev/stderr main;\n\n sendfile on;\n tcp_nopush on;\n resolver kube-dns.kube-system.svc.cluster.local.;\n\n # if the X-Query-Tags header is empty, set a noop= without a value as empty values are not logged\n map $http_x_query_tags $query_tags {\n \"\" \"noop=\"; # When header is empty, set noop=\n default $http_x_query_tags; # Otherwise, preserve the original value\n }\n\n server {\n listen 8080;\n listen [::]:8080;\n\n location = / {\n \n return 200 'OK';\n auth_basic off;\n }\n\n location = /stub_status {\n stub_status on;\n satisfy any;\n access_log off;\n allow 127.0.0.1;\n deny all;\n server_tokens on; # expose nginx version\n }\n\n ########################################################\n # Configure backend targets\n location ^~ /ui {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # Distributor\n location = /api/prom/push {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /loki/api/v1/push {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /distributor/ring {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /otlp/v1/logs {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # Ingester\n location = /flush {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location ^~ /ingester/ {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /ingester {\n \n internal; # to suppress 301\n }\n\n # Ring\n location = /ring {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # MemberListKV\n location = /memberlist {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # Ruler\n location = /ruler/ring {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /api/prom/rules {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location ^~ /api/prom/rules/ {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /loki/api/v1/rules {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location ^~ /loki/api/v1/rules/ {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /prometheus/api/v1/alerts {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /prometheus/api/v1/rules {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # Compactor\n location = /compactor/ring {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /loki/api/v1/delete {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /loki/api/v1/cache/generation_numbers {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # IndexGateway\n location = /indexgateway/ring {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # QueryScheduler\n location = /scheduler/ring {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # Config\n location = /config {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n\n # QueryFrontend, Querier\n location = /api/prom/tail {\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /loki/api/v1/tail {\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location ^~ /api/prom/ {\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /api/prom {\n \n internal; # to suppress 301\n }\n location ^~ /loki/api/v1/ {\n # pass custom headers set by Grafana as X-Query-Tags which are logged as key/value pairs in metrics.go log messages\n proxy_set_header X-Query-Tags \"${query_tags},user=${http_x_grafana_user},dashboard_id=${http_x_dashboard_uid},dashboard_title=${http_x_dashboard_title},panel_id=${http_x_panel_id},panel_title=${http_x_panel_title},source_rule_uid=${http_x_rule_uid},rule_name=${http_x_rule_name},rule_folder=${http_x_rule_folder},rule_version=${http_x_rule_version},rule_source=${http_x_rule_source},rule_type=${http_x_rule_type}\";\n \n set $backend \"http://loki.loki.svc.cluster.local:3100\";\n proxy_pass $backend$request_uri;\n }\n location = /loki/api/v1 {\n \n internal; # to suppress 301\n }\n }\n}\n" + access-log-exporter.yaml: | + presets: + loki: + metrics: + - name: "http_requests_total" + type: "counter" + help: "The total number of client requests." + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_requests_completed_total" + type: "counter" + help: "The total number of completed requests." + valueIndex: 3 + replacements: + - string: "OK" + replacement: "1" + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_request_size_bytes" + type: "histogram" + buckets: [ 10,1000,100000,1000000,5000000,50000000,200000000 ] + help: "The request length (including request line, header, and request body)" + valueIndex: 5 + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_response_size_bytes" + type: "histogram" + buckets: [ 10,1000,100000,1000000,5000000,50000000,200000000 ] + help: "The response length (including request line, header, and request body)" + valueIndex: 6 + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_request_duration_seconds" + type: "histogram" + buckets: [ .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10 ] + help: "The time spent on receiving and response the response to the client" + valueIndex: 4 + math: + enabled: true + div: 1000 + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_upstream_connect_duration_seconds" + type: "histogram" + buckets: [ .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10 ] + help: "The time spent on establishing a connection with the upstream server" + valueIndex: 8 + math: + enabled: true + div: 1000 + upstream: + enabled: true + addrLineIndex: 7 + excludes: [] + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_upstream_header_duration_seconds" + type: "histogram" + help: "The time spent on receiving the response header from the upstream server" + buckets: [ .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10 ] + valueIndex: 9 + math: + enabled: true + div: 1000 + upstream: + enabled: true + addrLineIndex: 7 + excludes: [] + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" + + - name: "http_upstream_request_duration_seconds" + type: "histogram" + help: "The time spent on receiving the response from the upstream server" + buckets: [ .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10 ] + valueIndex: 10 + math: + enabled: true + div: 1000 + upstream: + enabled: true + addrLineIndex: 7 + excludes: [] + labels: + - name: "host" + lineIndex: 0 + - name: "method" + lineIndex: 1 + - name: "status" + lineIndex: 2 + - name: "path" + lineIndex: 11 + replacements: + - regexp: "^$" + replacement: "/" + - regexp: "^(.+)\\?.+" + replacement: "$1" diff --git a/clusters/cl01tl/manifests/loki/ConfigMap-loki-runtime.yaml b/clusters/cl01tl/manifests/loki/ConfigMap-loki-runtime.yaml index 01b22c4f6..1d691831e 100644 --- a/clusters/cl01tl/manifests/loki/ConfigMap-loki-runtime.yaml +++ b/clusters/cl01tl/manifests/loki/ConfigMap-loki-runtime.yaml @@ -4,10 +4,10 @@ metadata: name: loki-runtime namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" data: runtime-config.yaml: | {} diff --git a/clusters/cl01tl/manifests/loki/ConfigMap-loki.yaml b/clusters/cl01tl/manifests/loki/ConfigMap-loki.yaml index e12f58d9a..b79bc2d8f 100644 --- a/clusters/cl01tl/manifests/loki/ConfigMap-loki.yaml +++ b/clusters/cl01tl/manifests/loki/ConfigMap-loki.yaml @@ -4,10 +4,10 @@ metadata: name: loki namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" data: config.yaml: |2 @@ -57,6 +57,9 @@ data: scheduler_address: "" index_gateway: mode: simple + ingester: + wal: + flush_on_shutdown: true ingester_client: pool_config: remote_timeout: 10s @@ -74,8 +77,16 @@ data: split_queries_by_interval: 15m volume_enabled: true memberlist: + abort_if_cluster_join_fails: true + advertise_addr: ${HASH_RING_INSTANCE_ADDR} + advertise_port: 7946 + bind_port: 7946 join_members: - loki-memberlist.loki.svc.cluster.local + max_join_backoff: 1m + max_join_retries: 10 + min_join_backoff: 1s + rejoin_interval: 90s pattern_ingester: enabled: false query_range: @@ -109,10 +120,18 @@ data: schema: v13 store: boltdb-shipper server: + graceful_shutdown_timeout: 5s grpc_listen_port: 9095 + grpc_server_max_concurrent_streams: 1000 + grpc_server_max_recv_msg_size: 104857600 + grpc_server_max_send_msg_size: 104857600 + grpc_server_min_time_between_pings: 10s + grpc_server_ping_without_stream_allowed: true http_listen_port: 3100 - http_server_read_timeout: 600s - http_server_write_timeout: 600s + http_server_idle_timeout: 30s + http_server_read_timeout: 10m0s + http_server_write_timeout: 10m0s + log_level: info storage_config: bloom_shipper: working_directory: /var/loki/data/bloomshipper diff --git a/clusters/cl01tl/manifests/loki/DaemonSet-loki-canary.yaml b/clusters/cl01tl/manifests/loki/DaemonSet-loki-canary.yaml index 605c7aef8..4b7013ddf 100644 --- a/clusters/cl01tl/manifests/loki/DaemonSet-loki-canary.yaml +++ b/clusters/cl01tl/manifests/loki/DaemonSet-loki-canary.yaml @@ -4,10 +4,10 @@ metadata: name: loki-canary namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: canary spec: selector: @@ -21,34 +21,39 @@ spec: type: RollingUpdate template: metadata: + annotations: + kubectl.kubernetes.io/default-container: "canary" labels: + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: canary spec: serviceAccountName: loki-canary + enableServiceLinks: true + automountServiceAccountToken: false securityContext: fsGroup: 10001 fsGroupChangePolicy: OnRootMismatch runAsGroup: 10001 runAsNonRoot: true runAsUser: 10001 + seccompProfile: + type: RuntimeDefault + terminationGracePeriodSeconds: 30 + volumes: + - name: temp + emptyDir: {} containers: - - name: loki-canary - image: docker.io/grafana/loki-canary:3.6.7 + - name: canary + image: docker.io/grafana/loki-canary:3.7.1 imagePullPolicy: IfNotPresent args: - -addr=loki-gateway.loki.svc.cluster.local.:80 - -labelname=pod - -labelvalue=$(POD_NAME) - -push=true - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - volumeMounts: ports: - name: http-metrics containerPort: 3500 @@ -58,10 +63,32 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + - name: GOGC + value: "80" + - name: HASH_RING_INSTANCE_ADDR + valueFrom: + fieldRef: + fieldPath: status.podIP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + seccompProfile: + type: RuntimeDefault + livenessProbe: + httpGet: + path: /metrics + port: http-metrics + initialDelaySeconds: 15 + timeoutSeconds: 1 readinessProbe: httpGet: path: /metrics port: http-metrics initialDelaySeconds: 15 timeoutSeconds: 1 - volumes: + volumeMounts: + - name: temp + mountPath: /tmp diff --git a/clusters/cl01tl/manifests/loki/Deployment-loki-gateway.yaml b/clusters/cl01tl/manifests/loki/Deployment-loki-gateway.yaml index 775a4d82e..071bfb1c2 100644 --- a/clusters/cl01tl/manifests/loki/Deployment-loki-gateway.yaml +++ b/clusters/cl01tl/manifests/loki/Deployment-loki-gateway.yaml @@ -4,10 +4,10 @@ metadata: name: loki-gateway namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: gateway spec: replicas: 1 @@ -22,32 +22,35 @@ spec: template: metadata: annotations: - checksum/config: d76bd0b627b1549dddc6ce5304d9322ebdeb13e5b813234d8067357925630015 + checksum/config: 7f59b16a0121fadc14a504ba3bc07ec5d397a0093def094ae56ba0c10f467dbb labels: app.kubernetes.io/name: loki app.kubernetes.io/instance: loki app.kubernetes.io/component: gateway spec: - serviceAccountName: loki + serviceAccountName: loki-gateway + automountServiceAccountToken: false enableServiceLinks: true securityContext: fsGroup: 101 runAsGroup: 101 runAsNonRoot: true runAsUser: 101 + seccompProfile: + type: RuntimeDefault terminationGracePeriodSeconds: 30 containers: - name: nginx - image: docker.io/nginxinc/nginx-unprivileged:1.29-alpine + image: docker.io/nginxinc/nginx-unprivileged:1.30-alpine imagePullPolicy: IfNotPresent ports: - - name: http-metrics + - name: http containerPort: 8080 protocol: TCP readinessProbe: httpGet: path: / - port: http-metrics + port: http initialDelaySeconds: 15 timeoutSeconds: 1 securityContext: @@ -56,6 +59,8 @@ spec: drop: - ALL readOnlyRootFilesystem: true + seccompProfile: + type: RuntimeDefault volumeMounts: - name: config mountPath: /etc/nginx @@ -67,6 +72,54 @@ spec: requests: cpu: 10m memory: 20Mi + - name: exporter + image: ghcr.io/jkroepke/access-log-exporter:0.3.11 + imagePullPolicy: IfNotPresent + ports: + - containerPort: 4040 + name: http-metrics + - containerPort: 8514 + name: syslog + args: + - --nginx.scrape-url + - http://127.0.0.1:8080/stub_status + - --preset + - loki + resources: + limits: {} + requests: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + seccompProfile: + type: RuntimeDefault + readinessProbe: + failureThreshold: 3 + httpGet: + path: /health + port: http-metrics + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + livenessProbe: + failureThreshold: 3 + httpGet: + path: /health + port: http-metrics + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + volumeMounts: + - name: config + mountPath: /config.yaml + subPath: access-log-exporter.yaml affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: diff --git a/clusters/cl01tl/manifests/loki/HTTPRoute-loki-gateway.yaml b/clusters/cl01tl/manifests/loki/HTTPRoute-loki-gateway.yaml new file mode 100644 index 000000000..fa125f263 --- /dev/null +++ b/clusters/cl01tl/manifests/loki/HTTPRoute-loki-gateway.yaml @@ -0,0 +1,30 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: loki-gateway + namespace: loki + labels: + helm.sh/chart: loki-13.5.0 + app.kubernetes.io/name: loki + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "3.7.1" + app.kubernetes.io/component: gateway +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: traefik-gateway + namespace: traefik + hostnames: + - loki.alexlebens.net + rules: + - backendRefs: + - group: "" + kind: Service + name: loki-gateway + port: 80 + weight: 1 + matches: + - path: + type: PathPrefix + value: / diff --git a/clusters/cl01tl/manifests/loki/Pod-loki-helm-test.yaml b/clusters/cl01tl/manifests/loki/Pod-loki-helm-test.yaml deleted file mode 100644 index fb905727d..000000000 --- a/clusters/cl01tl/manifests/loki/Pod-loki-helm-test.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: "loki-helm-test" - namespace: loki - labels: - helm.sh/chart: loki-6.55.0 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" - app.kubernetes.io/component: helm-test - annotations: - "helm.sh/hook": test -spec: - containers: - - name: loki-helm-test - image: docker.io/grafana/loki-helm-test:latest - env: - - name: CANARY_SERVICE_ADDRESS - value: "http://loki-canary.loki.svc.cluster.local:3500/metrics" - - name: CANARY_PROMETHEUS_ADDRESS - value: "" - - name: CANARY_TEST_TIMEOUT - value: "1m" - args: - - -test.v - restartPolicy: Never diff --git a/clusters/cl01tl/manifests/loki/Service-loki-canary.yaml b/clusters/cl01tl/manifests/loki/Service-loki-canary.yaml index 05c108e0e..1e08c3500 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki-canary.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki-canary.yaml @@ -4,10 +4,10 @@ metadata: name: loki-canary namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: canary annotations: spec: diff --git a/clusters/cl01tl/manifests/loki/Service-loki-chunks-cache.yaml b/clusters/cl01tl/manifests/loki/Service-loki-chunks-cache.yaml index e11e77cde..0462edd46 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki-chunks-cache.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki-chunks-cache.yaml @@ -3,10 +3,10 @@ kind: Service metadata: name: loki-chunks-cache labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: "memcached-chunks-cache" annotations: {} namespace: "loki" diff --git a/clusters/cl01tl/manifests/loki/Service-loki-gateway-exporter.yaml b/clusters/cl01tl/manifests/loki/Service-loki-gateway-exporter.yaml new file mode 100644 index 000000000..cd464e123 --- /dev/null +++ b/clusters/cl01tl/manifests/loki/Service-loki-gateway-exporter.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: loki-gateway-exporter + namespace: loki + labels: + helm.sh/chart: loki-13.5.0 + app.kubernetes.io/name: loki + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "3.7.1" + app.kubernetes.io/component: gateway + annotations: +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 4040 + targetPort: http-metrics + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: loki + app.kubernetes.io/component: gateway diff --git a/clusters/cl01tl/manifests/loki/Service-loki-gateway.yaml b/clusters/cl01tl/manifests/loki/Service-loki-gateway.yaml index e76a1c56a..9f573e3d4 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki-gateway.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki-gateway.yaml @@ -4,19 +4,19 @@ metadata: name: loki-gateway namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: gateway prometheus.io/service-monitor: "false" annotations: spec: type: ClusterIP ports: - - name: http-metrics + - name: http port: 80 - targetPort: http-metrics + targetPort: http protocol: TCP selector: app.kubernetes.io/name: loki diff --git a/clusters/cl01tl/manifests/loki/Service-loki-headless.yaml b/clusters/cl01tl/manifests/loki/Service-loki-headless.yaml index ff58b51dd..77783285e 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki-headless.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki-headless.yaml @@ -1,23 +1,35 @@ apiVersion: v1 kind: Service metadata: - name: loki-headless + name: "loki-headless" namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" - variant: headless + app.kubernetes.io/version: "3.7.1" + app.kubernetes.io/component: "single-binary" prometheus.io/service-monitor: "false" + variant: headless annotations: spec: clusterIP: None + type: ClusterIP + publishNotReadyAddresses: true ports: - name: http-metrics port: 3100 targetPort: http-metrics protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + - name: grpclb + port: 9096 + targetPort: grpc + protocol: TCP selector: app.kubernetes.io/name: loki app.kubernetes.io/instance: loki + app.kubernetes.io/component: "single-binary" diff --git a/clusters/cl01tl/manifests/loki/Service-loki-memberlist.yaml b/clusters/cl01tl/manifests/loki/Service-loki-memberlist.yaml index 7dfa8e78d..8e4c6e399 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki-memberlist.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki-memberlist.yaml @@ -4,10 +4,10 @@ metadata: name: loki-memberlist namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" annotations: spec: type: ClusterIP diff --git a/clusters/cl01tl/manifests/loki/Service-loki-results-cache.yaml b/clusters/cl01tl/manifests/loki/Service-loki-results-cache.yaml index 1e09f93bd..2f82cbf4f 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki-results-cache.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki-results-cache.yaml @@ -3,10 +3,10 @@ kind: Service metadata: name: loki-results-cache labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: "memcached-results-cache" annotations: {} namespace: "loki" diff --git a/clusters/cl01tl/manifests/loki/Service-loki.yaml b/clusters/cl01tl/manifests/loki/Service-loki.yaml index 7932bd841..4e51725c9 100644 --- a/clusters/cl01tl/manifests/loki/Service-loki.yaml +++ b/clusters/cl01tl/manifests/loki/Service-loki.yaml @@ -1,16 +1,18 @@ apiVersion: v1 kind: Service metadata: - name: loki - namespace: loki + name: "loki" + namespace: "loki" labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" + app.kubernetes.io/component: "single-binary" annotations: spec: type: ClusterIP + publishNotReadyAddresses: true ports: - name: http-metrics port: 3100 @@ -20,7 +22,11 @@ spec: port: 9095 targetPort: grpc protocol: TCP + - name: grpclb + port: 9096 + targetPort: grpc + protocol: TCP selector: app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/component: single-binary + app.kubernetes.io/component: "single-binary" diff --git a/clusters/cl01tl/manifests/loki/ServiceAccount-loki-canary.yaml b/clusters/cl01tl/manifests/loki/ServiceAccount-loki-canary.yaml index 8ac5ed1d1..588a3e514 100644 --- a/clusters/cl01tl/manifests/loki/ServiceAccount-loki-canary.yaml +++ b/clusters/cl01tl/manifests/loki/ServiceAccount-loki-canary.yaml @@ -4,9 +4,9 @@ metadata: name: loki-canary namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: canary -automountServiceAccountToken: true +automountServiceAccountToken: false diff --git a/clusters/cl01tl/manifests/loki/ServiceAccount-loki-gateway.yaml b/clusters/cl01tl/manifests/loki/ServiceAccount-loki-gateway.yaml new file mode 100644 index 000000000..a830fd931 --- /dev/null +++ b/clusters/cl01tl/manifests/loki/ServiceAccount-loki-gateway.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: loki-gateway + namespace: loki + labels: + helm.sh/chart: loki-13.5.0 + app.kubernetes.io/name: loki + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "3.7.1" + app.kubernetes.io/component: gateway +automountServiceAccountToken: false diff --git a/clusters/cl01tl/manifests/loki/ServiceAccount-loki-memcached.yaml b/clusters/cl01tl/manifests/loki/ServiceAccount-loki-memcached.yaml new file mode 100644 index 000000000..b5d50e69e --- /dev/null +++ b/clusters/cl01tl/manifests/loki/ServiceAccount-loki-memcached.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: loki-memcached + namespace: loki + labels: + helm.sh/chart: loki-13.5.0 + app.kubernetes.io/name: loki + app.kubernetes.io/instance: loki + app.kubernetes.io/version: "3.7.1" + app.kubernetes.io/component: memcached +automountServiceAccountToken: false diff --git a/clusters/cl01tl/manifests/loki/ServiceAccount-loki.yaml b/clusters/cl01tl/manifests/loki/ServiceAccount-loki.yaml index e1a8c4fa2..56cb7176d 100644 --- a/clusters/cl01tl/manifests/loki/ServiceAccount-loki.yaml +++ b/clusters/cl01tl/manifests/loki/ServiceAccount-loki.yaml @@ -4,8 +4,8 @@ metadata: name: loki namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" automountServiceAccountToken: true diff --git a/clusters/cl01tl/manifests/loki/StatefulSet-loki-chunks-cache.yaml b/clusters/cl01tl/manifests/loki/StatefulSet-loki-chunks-cache.yaml index a2b35d6b9..a8909cf29 100644 --- a/clusters/cl01tl/manifests/loki/StatefulSet-loki-chunks-cache.yaml +++ b/clusters/cl01tl/manifests/loki/StatefulSet-loki-chunks-cache.yaml @@ -3,10 +3,10 @@ kind: StatefulSet metadata: name: loki-chunks-cache labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: "memcached-chunks-cache" name: "memcached-chunks-cache" annotations: {} @@ -32,12 +32,14 @@ spec: name: "memcached-chunks-cache" annotations: spec: - serviceAccountName: loki + serviceAccountName: loki-memcached securityContext: fsGroup: 11211 runAsGroup: 11211 runAsNonRoot: true runAsUser: 11211 + seccompProfile: + type: RuntimeDefault initContainers: [] nodeSelector: {} affinity: {} @@ -46,7 +48,7 @@ spec: terminationGracePeriodSeconds: 60 containers: - name: memcached - image: memcached:1.6.39-alpine + image: memcached:1.6.41-alpine imagePullPolicy: IfNotPresent resources: limits: @@ -72,6 +74,9 @@ spec: drop: - ALL readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault readinessProbe: failureThreshold: 6 initialDelaySeconds: 5 @@ -87,7 +92,7 @@ spec: port: client timeoutSeconds: 5 - name: exporter - image: prom/memcached-exporter:v0.15.4 + image: prom/memcached-exporter:v0.16.0 imagePullPolicy: IfNotPresent ports: - containerPort: 9150 @@ -104,6 +109,9 @@ spec: drop: - ALL readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault readinessProbe: failureThreshold: 3 httpGet: diff --git a/clusters/cl01tl/manifests/loki/StatefulSet-loki-results-cache.yaml b/clusters/cl01tl/manifests/loki/StatefulSet-loki-results-cache.yaml index 2d8c9a8e7..83b2484ec 100644 --- a/clusters/cl01tl/manifests/loki/StatefulSet-loki-results-cache.yaml +++ b/clusters/cl01tl/manifests/loki/StatefulSet-loki-results-cache.yaml @@ -3,10 +3,10 @@ kind: StatefulSet metadata: name: loki-results-cache labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: "memcached-results-cache" name: "memcached-results-cache" annotations: {} @@ -32,12 +32,14 @@ spec: name: "memcached-results-cache" annotations: spec: - serviceAccountName: loki + serviceAccountName: loki-memcached securityContext: fsGroup: 11211 runAsGroup: 11211 runAsNonRoot: true runAsUser: 11211 + seccompProfile: + type: RuntimeDefault initContainers: [] nodeSelector: {} affinity: {} @@ -46,7 +48,7 @@ spec: terminationGracePeriodSeconds: 60 containers: - name: memcached - image: memcached:1.6.39-alpine + image: memcached:1.6.41-alpine imagePullPolicy: IfNotPresent resources: limits: @@ -72,6 +74,9 @@ spec: drop: - ALL readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault readinessProbe: failureThreshold: 6 initialDelaySeconds: 5 @@ -87,7 +92,7 @@ spec: port: client timeoutSeconds: 5 - name: exporter - image: prom/memcached-exporter:v0.15.4 + image: prom/memcached-exporter:v0.16.0 imagePullPolicy: IfNotPresent ports: - containerPort: 9150 @@ -104,6 +109,9 @@ spec: drop: - ALL readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault readinessProbe: failureThreshold: 3 httpGet: diff --git a/clusters/cl01tl/manifests/loki/StatefulSet-loki.yaml b/clusters/cl01tl/manifests/loki/StatefulSet-loki.yaml index 24a2878bd..e9ffbd0a4 100644 --- a/clusters/cl01tl/manifests/loki/StatefulSet-loki.yaml +++ b/clusters/cl01tl/manifests/loki/StatefulSet-loki.yaml @@ -4,10 +4,10 @@ metadata: name: loki namespace: loki labels: - helm.sh/chart: loki-6.55.0 + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki - app.kubernetes.io/version: "3.6.7" + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: single-binary app.kubernetes.io/part-of: memberlist spec: @@ -29,31 +29,61 @@ spec: template: metadata: annotations: - checksum/config: 9cded33d7ba292eb76711b451f5ecd9bade13c7fb5ffb5622229f5706f8f90dd - storage/size: "150Gi" + checksum/config: 19e0049d8578b5fadd19fbcef19075cf8df1c30f6a3e6fc48aeeeaae41e30e27 + storage/size: 150Gi kubectl.kubernetes.io/default-container: "loki" labels: + helm.sh/chart: loki-13.5.0 app.kubernetes.io/name: loki app.kubernetes.io/instance: loki + app.kubernetes.io/version: "3.7.1" app.kubernetes.io/component: single-binary app.kubernetes.io/part-of: memberlist spec: serviceAccountName: loki - automountServiceAccountToken: true enableServiceLinks: true + automountServiceAccountToken: true securityContext: fsGroup: 10001 fsGroupChangePolicy: OnRootMismatch runAsGroup: 10001 runAsNonRoot: true runAsUser: 10001 + seccompProfile: + type: RuntimeDefault terminationGracePeriodSeconds: 30 + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/component: single-binary + app.kubernetes.io/instance: 'loki' + app.kubernetes.io/name: 'loki' + topologyKey: kubernetes.io/hostname + volumes: + - name: temp + emptyDir: {} + - name: config + configMap: + name: loki + items: + - key: "config.yaml" + path: "config.yaml" + - name: runtime-config + configMap: + name: loki-runtime + - name: sc-rules-volume + emptyDir: {} + - name: sc-rules-temp + emptyDir: {} containers: - name: loki - image: docker.io/grafana/loki:3.6.7 + image: docker.io/grafana/loki:3.7.1 imagePullPolicy: IfNotPresent args: - -config.file=/etc/loki/config/config.yaml + - -config.expand-env=true - -target=all ports: - name: http-metrics @@ -65,12 +95,29 @@ spec: - name: http-memberlist containerPort: 7946 protocol: TCP + env: + - name: GOGC + value: "80" + - name: HASH_RING_INSTANCE_ADDR + valueFrom: + fieldRef: + fieldPath: status.podIP securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true + seccompProfile: + type: RuntimeDefault + livenessProbe: + failureThreshold: 10 + httpGet: + path: /loki/api/v1/status/buildinfo + port: http-metrics + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 1 readinessProbe: failureThreshold: 3 httpGet: @@ -81,14 +128,14 @@ spec: successThreshold: 1 timeoutSeconds: 1 volumeMounts: - - name: tmp - mountPath: /tmp - name: config mountPath: /etc/loki/config - name: runtime-config mountPath: /etc/loki/runtime-config - name: storage mountPath: /var/loki + - name: temp + mountPath: /tmp - name: sc-rules-volume mountPath: "/rules" resources: @@ -96,8 +143,38 @@ spec: cpu: 100m memory: 800Mi - name: loki-sc-rules - image: docker.io/kiwigrid/k8s-sidecar:2.5.0 + image: docker.io/kiwigrid/k8s-sidecar:2.7.1 imagePullPolicy: IfNotPresent + ports: + - name: http-sidecar + containerPort: 8080 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + seccompProfile: + type: RuntimeDefault + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: http-sidecar + initialDelaySeconds: 30 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: http-sidecar + initialDelaySeconds: 3 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 env: - name: METHOD value: WATCH @@ -113,40 +190,13 @@ spec: value: "60" - name: LOG_LEVEL value: "INFO" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true + - name: HEALTH_PORT + value: "8080" volumeMounts: - - name: tmp + - name: sc-rules-temp mountPath: /tmp - name: sc-rules-volume mountPath: "/rules" - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchLabels: - app.kubernetes.io/component: single-binary - app.kubernetes.io/instance: 'loki' - app.kubernetes.io/name: 'loki' - topologyKey: kubernetes.io/hostname - volumes: - - name: tmp - emptyDir: {} - - name: config - configMap: - name: loki - items: - - key: "config.yaml" - path: "config.yaml" - - name: runtime-config - configMap: - name: loki-runtime - - name: sc-rules-volume - emptyDir: {} volumeClaimTemplates: - apiVersion: v1 kind: PersistentVolumeClaim -- 2.49.1 From 99be12a823054278b44452ed264aa4d337c373ec Mon Sep 17 00:00:00 2001 From: gitea-bot Date: Sun, 3 May 2026 00:54:11 +0000 Subject: [PATCH 4/5] chore: Update manifests after change --- clusters/cl01tl/manifests/immich/-.yaml | 1 - .../Cluster-immich-postgresql-18-cluster.yaml | 68 +++++ .../ConfigMap-immich-valkey-init-scripts.yaml | 87 ++++++ .../manifests/immich/Deployment-immich.yaml | 125 ++++++++ ...lSecret-immich-backup-secret-external.yaml | 47 +++ ...rnalSecret-immich-backup-secret-local.yaml | 47 +++ ...nalSecret-immich-backup-secret-remote.yaml | 47 +++ ...tgresql-18-backup-garage-local-secret.yaml | 29 ++ ...-immich-postgresql-18-recovery-secret.yaml | 29 ++ .../manifests/immich/HTTPRoute-immich.yaml | 30 ++ ...ich-postgresql-18-backup-garage-local.yaml | 33 +++ ...ctStore-immich-postgresql-18-recovery.yaml | 32 +++ .../immich/PersistentVolumeClaim-immich.yaml | 19 ++ .../immich/PodMonitor-immich-valkey.yaml | 23 ++ ...etheusRule-immich-backup-source-local.yaml | 30 ++ ...Rule-immich-postgresql-18-alert-rules.yaml | 270 ++++++++++++++++++ .../immich/PrometheusRule-immich-valkey.yaml | 47 +++ ...nSource-immich-backup-source-external.yaml | 29 ++ ...tionSource-immich-backup-source-local.yaml | 29 ++ ...ionSource-immich-backup-source-remote.yaml | 29 ++ ...resql-18-scheduled-backup-live-backup.yaml | 24 ++ .../immich/Secret-immich-immich-sa-token.yaml | 13 + .../SecretProviderClass-immich-config.yaml | 19 ++ .../Service-immich-valkey-headless.yaml | 23 ++ .../immich/Service-immich-valkey-metrics.yaml | 23 ++ .../immich/Service-immich-valkey-read.yaml | 21 ++ .../immich/Service-immich-valkey.yaml | 22 ++ .../manifests/immich/Service-immich.yaml | 30 ++ .../immich/ServiceAccount-immich-valkey.yaml | 11 + .../immich/ServiceAccount-immich.yaml | 12 + .../immich/ServiceMonitor-immich-valkey.yaml | 24 ++ .../immich/ServiceMonitor-immich.yaml | 28 ++ .../immich/StatefulSet-immich-valkey.yaml | 133 +++++++++ 33 files changed, 1433 insertions(+), 1 deletion(-) delete mode 100644 clusters/cl01tl/manifests/immich/-.yaml create mode 100644 clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml create mode 100644 clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml create mode 100644 clusters/cl01tl/manifests/immich/Deployment-immich.yaml create mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml create mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml create mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml create mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml create mode 100644 clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml create mode 100644 clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml create mode 100644 clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml create mode 100644 clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml create mode 100644 clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml create mode 100644 clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml create mode 100644 clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml create mode 100644 clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml create mode 100644 clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml create mode 100644 clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml create mode 100644 clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml create mode 100644 clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml create mode 100644 clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml create mode 100644 clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml create mode 100644 clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml create mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml create mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml create mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml create mode 100644 clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml create mode 100644 clusters/cl01tl/manifests/immich/Service-immich.yaml create mode 100644 clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml create mode 100644 clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml create mode 100644 clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml create mode 100644 clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml create mode 100644 clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml diff --git a/clusters/cl01tl/manifests/immich/-.yaml b/clusters/cl01tl/manifests/immich/-.yaml deleted file mode 100644 index 8b1378917..000000000 --- a/clusters/cl01tl/manifests/immich/-.yaml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml b/clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml new file mode 100644 index 000000000..cfeec6e35 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Cluster-immich-postgresql-18-cluster.yaml @@ -0,0 +1,68 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: immich-postgresql-18-cluster + namespace: immich + labels: + app.kubernetes.io/name: immich-postgresql-18-cluster + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm +spec: + instances: 3 + imageName: "ghcr.io/tensorchord/cloudnative-vectorchord:18.0-0.5.3" + imagePullPolicy: IfNotPresent + postgresUID: 26 + postgresGID: 26 + storage: + size: 10Gi + storageClass: local-path + walStorage: + size: 2Gi + storageClass: local-path + resources: + limits: + hugepages-2Mi: 256Mi + requests: + cpu: 20m + memory: 80Mi + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + primaryUpdateMethod: switchover + primaryUpdateStrategy: unsupervised + logLevel: info + enableSuperuserAccess: false + enablePDB: true + postgresql: + shared_preload_libraries: + - vchord.so + parameters: + hot_standby_feedback: "on" + max_slot_wal_keep_size: 2000MB + shared_buffers: 256MB + monitoring: + enablePodMonitor: true + disableDefaultQueries: false + plugins: + - name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: true + parameters: + barmanObjectName: "immich-postgresql-18-backup-garage-local" + serverName: "immich-postgresql-18-backup-1" + bootstrap: + recovery: + database: app + source: immich-postgresql-18-backup-1 + externalClusters: + - name: immich-postgresql-18-backup-1 + plugin: + name: barman-cloud.cloudnative-pg.io + enabled: true + isWALArchiver: false + parameters: + barmanObjectName: "immich-postgresql-18-recovery" + serverName: immich-postgresql-18-backup-1 diff --git a/clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml b/clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml new file mode 100644 index 000000000..6eff7c092 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ConfigMap-immich-valkey-init-scripts.yaml @@ -0,0 +1,87 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: immich-valkey-init-scripts + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm +data: + init.sh: |- + #!/bin/sh + set -eu + + # Default config paths + VALKEY_CONFIG=${VALKEY_CONFIG_PATH:-/data/conf/valkey.conf} + + LOGFILE="/data/init.log" + DATA_DIR="/data/conf" + + # Logging function (outputs to stderr and file) + log() { + echo "$(date) $1" | tee -a "$LOGFILE" >&2 + } + + # Clean old log if requested + if [ "${KEEP_OLD_LOGS:-false}" != "true" ]; then + rm -f "$LOGFILE" + fi + + if [ -f "$LOGFILE" ]; then + log "Detected restart of this instance ($HOSTNAME)" + fi + + log "Creating configuration in $DATA_DIR..." + mkdir -p "$DATA_DIR" + rm -f "$VALKEY_CONFIG" + + + # Base valkey.conf + log "Generating base valkey.conf" + { + echo "port 6379" + echo "protected-mode no" + echo "bind * -::*" + echo "dir /data" + } >>"$VALKEY_CONFIG" + # Replica mode configuration + log "Configuring replication mode" + + # Use POD_INDEX from Kubernetes metadata + POD_INDEX=${POD_INDEX:-0} + IS_MASTER=false + + # Check if this is pod-0 (master) + if [ "$POD_INDEX" = "0" ]; then + IS_MASTER=true + log "This pod (index $POD_INDEX) is configured as MASTER" + else + log "This pod (index $POD_INDEX) is configured as REPLICA" + fi + + # Configure replica settings + if [ "$IS_MASTER" = "false" ]; then + MASTER_HOST="immich-valkey-0.immich-valkey-headless.immich.svc.cluster.local" + MASTER_PORT="6379" + + log "Configuring replica to follow master at $MASTER_HOST:$MASTER_PORT" + + { + echo "" + echo "# Replica Configuration" + echo "replicaof $MASTER_HOST $MASTER_PORT" + echo "replica-announce-ip immich-valkey-$POD_INDEX.immich-valkey-headless.immich.svc.cluster.local" + } >>"$VALKEY_CONFIG" + fi + + # Append extra configs if present + if [ -f /usr/local/etc/valkey/valkey.conf ]; then + log "Appending /usr/local/etc/valkey/valkey.conf" + cat /usr/local/etc/valkey/valkey.conf >>"$VALKEY_CONFIG" + fi + if [ -d /extravalkeyconfigs ]; then + log "Appending files in /extravalkeyconfigs/" + cat /extravalkeyconfigs/* >>"$VALKEY_CONFIG" + fi diff --git a/clusters/cl01tl/manifests/immich/Deployment-immich.yaml b/clusters/cl01tl/manifests/immich/Deployment-immich.yaml new file mode 100644 index 000000000..545ba0eea --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Deployment-immich.yaml @@ -0,0 +1,125 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: immich + labels: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + helm.sh/chart: immich-4.6.2 + namespace: immich +spec: + revisionHistoryLimit: 3 + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/controller: main + app.kubernetes.io/name: immich + app.kubernetes.io/instance: immich + template: + metadata: + annotations: + checksum/secrets: 46a3f57ca394cccffc419e0c17f5d5f366374b0651c02c507636c53c0b5f33e6 + labels: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: immich + app.kubernetes.io/name: immich + spec: + enableServiceLinks: false + serviceAccountName: immich + automountServiceAccountToken: true + hostIPC: false + hostNetwork: false + hostPID: false + dnsPolicy: ClusterFirst + containers: + - env: + - name: TZ + value: America/Chicago + - name: IMMICH_TELEMETRY_INCLUDE + value: all + - name: IMMICH_CONFIG_FILE + value: /config/immich.json + - name: REDIS_HOSTNAME + value: immich-valkey + - name: DB_VECTOR_EXTENSION + value: vectorchord + - name: DB_HOSTNAME + valueFrom: + secretKeyRef: + key: host + name: immich-postgresql-18-cluster-app + - name: DB_DATABASE_NAME + valueFrom: + secretKeyRef: + key: dbname + name: immich-postgresql-18-cluster-app + - name: DB_PORT + valueFrom: + secretKeyRef: + key: port + name: immich-postgresql-18-cluster-app + - name: DB_USERNAME + valueFrom: + secretKeyRef: + key: user + name: immich-postgresql-18-cluster-app + - name: DB_PASSWORD + valueFrom: + secretKeyRef: + key: password + name: immich-postgresql-18-cluster-app + image: ghcr.io/immich-app/immich-server:v2.7.5@sha256:c15bff75068effb03f4355997d03dc7e0fc58720c2b54ad6f7f10d1bc57efaa5 + livenessProbe: + failureThreshold: 3 + httpGet: + path: /api/server/ping + port: 2283 + initialDelaySeconds: 0 + periodSeconds: 10 + timeoutSeconds: 1 + name: main + readinessProbe: + failureThreshold: 3 + httpGet: + path: /api/server/ping + port: 2283 + initialDelaySeconds: 0 + periodSeconds: 10 + timeoutSeconds: 1 + resources: + limits: + gpu.intel.com/i915: 1 + requests: + cpu: 10m + gpu.intel.com/i915: 1 + memory: 500Mi + startupProbe: + failureThreshold: 30 + httpGet: + path: /api/server/ping + port: 2283 + initialDelaySeconds: 0 + periodSeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /config/immich.json + mountPropagation: None + name: config + readOnly: true + subPath: immich.json + - mountPath: /usr/src/app/upload + name: data + volumes: + - csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: immich-config + name: config + - name: data + persistentVolumeClaim: + claimName: immich diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml new file mode 100644 index 000000000..f0fd469dc --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-external.yaml @@ -0,0 +1,47 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: immich-backup-secret-external + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup-secret-external +spec: + secretStoreRef: + kind: ClusterSecretStore + name: openbao + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "s3:{{ .ENDPOINT }}/{{ .BUCKET }}/cl01tl/immich/immich" + data: + - secretKey: ENDPOINT + remoteRef: + key: /digital-ocean/config + property: ENDPOINT + - secretKey: BUCKET + remoteRef: + key: /digital-ocean/home-infra/volsync-backups + property: BUCKET + - secretKey: RESTIC_PASSWORD + remoteRef: + key: /digital-ocean/home-infra/volsync-backups + property: RESTIC_PASSWORD + - secretKey: AWS_DEFAULT_REGION + remoteRef: + key: /digital-ocean/home-infra/volsync-backups + property: AWS_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + key: /digital-ocean/home-infra/volsync-backups + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + key: /digital-ocean/home-infra/volsync-backups + property: AWS_SECRET_ACCESS_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml new file mode 100644 index 000000000..0ee9fe200 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-local.yaml @@ -0,0 +1,47 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: immich-backup-secret-local + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup-secret-local +spec: + secretStoreRef: + kind: ClusterSecretStore + name: openbao + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "s3:{{ .ENDPOINT }}/{{ .BUCKET }}/cl01tl/immich/immich" + data: + - secretKey: ENDPOINT + remoteRef: + key: /garage/config + property: ENDPOINT_LOCAL + - secretKey: BUCKET + remoteRef: + key: /garage/home-infra/volsync-backups + property: BUCKET + - secretKey: RESTIC_PASSWORD + remoteRef: + key: /garage/home-infra/volsync-backups + property: RESTIC_PASSWORD_LOCAL + - secretKey: AWS_DEFAULT_REGION + remoteRef: + key: /garage/home-infra/volsync-backups + property: ACCESS_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + key: /garage/home-infra/volsync-backups + property: ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + key: /garage/home-infra/volsync-backups + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml new file mode 100644 index 000000000..6c944429c --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-backup-secret-remote.yaml @@ -0,0 +1,47 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: immich-backup-secret-remote + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup-secret-remote +spec: + secretStoreRef: + kind: ClusterSecretStore + name: openbao + target: + template: + mergePolicy: Merge + engineVersion: v2 + data: + RESTIC_REPOSITORY: "s3:{{ .ENDPOINT }}/{{ .BUCKET }}/cl01tl/immich/immich" + data: + - secretKey: ENDPOINT + remoteRef: + key: /garage/config + property: ENDPOINT_REMOTE + - secretKey: BUCKET + remoteRef: + key: /garage/home-infra/volsync-backups + property: BUCKET + - secretKey: RESTIC_PASSWORD + remoteRef: + key: /garage/home-infra/volsync-backups + property: RESTIC_PASSWORD_REMOTE + - secretKey: AWS_DEFAULT_REGION + remoteRef: + key: /garage/home-infra/volsync-backups + property: ACCESS_REGION + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + key: /garage/home-infra/volsync-backups + property: ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + key: /garage/home-infra/volsync-backups + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml new file mode 100644 index 000000000..872987bdb --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-backup-garage-local-secret.yaml @@ -0,0 +1,29 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: immich-postgresql-18-backup-garage-local-secret + namespace: immich + labels: + app.kubernetes.io/name: immich-postgresql-18-backup-garage-local-secret + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm +spec: + secretStoreRef: + kind: ClusterSecretStore + name: openbao + data: + - secretKey: ACCESS_REGION + remoteRef: + key: /garage/home-infra/postgres-backups + property: ACCESS_REGION + - secretKey: ACCESS_KEY_ID + remoteRef: + key: /garage/home-infra/postgres-backups + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + key: /garage/home-infra/postgres-backups + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml new file mode 100644 index 000000000..4032effcb --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ExternalSecret-immich-postgresql-18-recovery-secret.yaml @@ -0,0 +1,29 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: immich-postgresql-18-recovery-secret + namespace: immich + labels: + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-postgresql-18-recovery-secret +spec: + secretStoreRef: + kind: ClusterSecretStore + name: openbao + data: + - secretKey: ACCESS_REGION + remoteRef: + key: /garage/home-infra/postgres-backups + property: ACCESS_REGION + - secretKey: ACCESS_KEY_ID + remoteRef: + key: /garage/home-infra/postgres-backups + property: ACCESS_KEY_ID + - secretKey: ACCESS_SECRET_KEY + remoteRef: + key: /garage/home-infra/postgres-backups + property: ACCESS_SECRET_KEY diff --git a/clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml b/clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml new file mode 100644 index 000000000..b8475cbe9 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/HTTPRoute-immich.yaml @@ -0,0 +1,30 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: immich + labels: + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + helm.sh/chart: immich-4.6.2 + namespace: immich +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: traefik-gateway + namespace: traefik + hostnames: + - "immich.alexlebens.net" + rules: + - backendRefs: + - group: "" + kind: Service + name: immich + namespace: immich + port: 2283 + weight: 1 + matches: + - path: + type: PathPrefix + value: / diff --git a/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml b/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml new file mode 100644 index 000000000..2a0852f0c --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-backup-garage-local.yaml @@ -0,0 +1,33 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: immich-postgresql-18-backup-garage-local + namespace: immich + labels: + app.kubernetes.io/name: immich-postgresql-18-backup-garage-local + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm +spec: + retentionPolicy: 7d + instanceSidecarConfiguration: + env: + - name: AWS_REQUEST_CHECKSUM_CALCULATION + value: when_required + - name: AWS_RESPONSE_CHECKSUM_VALIDATION + value: when_required + configuration: + destinationPath: s3://postgres-backups/cl01tl/immich/immich-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + s3Credentials: + accessKeyId: + name: immich-postgresql-18-backup-garage-local-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: immich-postgresql-18-backup-garage-local-secret + key: ACCESS_SECRET_KEY + region: + name: immich-postgresql-18-backup-garage-local-secret + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml b/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml new file mode 100644 index 000000000..2c0f41c0e --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ObjectStore-immich-postgresql-18-recovery.yaml @@ -0,0 +1,32 @@ +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: "immich-postgresql-18-recovery" + namespace: immich + labels: + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: "immich-postgresql-18-recovery" +spec: + configuration: + destinationPath: s3://postgres-backups/cl01tl/immich/immich-postgresql-18-cluster + endpointURL: http://garage-main.garage:3900 + wal: + compression: snappy + maxParallel: 1 + data: + compression: snappy + jobs: 1 + s3Credentials: + accessKeyId: + name: immich-postgresql-18-recovery-secret + key: ACCESS_KEY_ID + secretAccessKey: + name: immich-postgresql-18-recovery-secret + key: ACCESS_SECRET_KEY + region: + name: immich-postgresql-18-recovery-secret + key: ACCESS_REGION diff --git a/clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml b/clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml new file mode 100644 index 000000000..0ad202dd2 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/PersistentVolumeClaim-immich.yaml @@ -0,0 +1,19 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: immich + labels: + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + helm.sh/chart: immich-4.6.2 + annotations: + helm.sh/resource-policy: keep + namespace: immich +spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "50Gi" + storageClassName: "ceph-block" diff --git a/clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml new file mode 100644 index 000000000..de18b8537 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/PodMonitor-immich-valkey.yaml @@ -0,0 +1,23 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: immich-valkey + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: valkey + app.kubernetes.io/component: podmonitor +spec: + podMetricsEndpoints: + - port: metrics + interval: 30s + namespaceSelector: + matchNames: + - immich + selector: + matchLabels: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml new file mode 100644 index 000000000..a476eddd7 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-backup-source-local.yaml @@ -0,0 +1,30 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: immich-backup-source-local + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup-source-local +spec: + groups: + - name: volsync.alerts + rules: + - alert: VolSyncBackupPodFailed + expr: | + (kube_pod_container_status_last_terminated_exitcode > 0) + * on(pod, namespace) group_left(owner_name) + kube_pod_owner{owner_kind="Job", owner_name=~"volsync-.*"} + for: 1m + labels: + severity: critical + annotations: + summary: "VolSync Backup Pod failed in {{ $labels.namespace }}" + description: | + A pod for the VolSync backup of PVC 'immich' failed with exit code {{ $value }}. + Job: {{ $labels.owner_name }} + Namespace: {{ $labels.namespace }} diff --git a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml new file mode 100644 index 000000000..6d5b90393 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-postgresql-18-alert-rules.yaml @@ -0,0 +1,270 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: immich-postgresql-18-alert-rules + namespace: immich + labels: + app.kubernetes.io/name: immich-postgresql-18-alert-rules + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm +spec: + groups: + - name: cloudnative-pg/immich-postgresql-18 + rules: + - alert: CNPGClusterBackendsWaitingWarning + annotations: + summary: CNPG Cluster a backend is waiting for longer than 5 minutes. + description: |- + Pod {{ $labels.pod }} + has been waiting for longer than 5 minutes + expr: | + cnpg_backends_waiting_total{namespace="immich"} > 300 + for: 1m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterDatabaseDeadlockConflictsWarning + annotations: + summary: CNPG Cluster has over 10 deadlock conflicts. + description: |- + There are over 10 deadlock conflicts in + {{ $labels.pod }} + expr: | + cnpg_pg_stat_database_deadlocks{namespace="immich"} > 10 + for: 1m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterHACritical + annotations: + summary: CNPG Cluster has no standby replicas! + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe + risk of data loss and downtime if the primary instance fails. + + The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint + will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main. + + This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less + instances. The replaced instance may need some time to catch-up with the cluster primary instance. + + This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this + case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="immich"} - cnpg_pg_replication_is_wal_receiver_up{namespace="immich"}) < 1 + for: 5m + labels: + severity: critical + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterHAWarning + annotations: + summary: CNPG Cluster less than 2 standby replicas. + description: |- + CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting + your cluster at risk if another instance fails. The cluster is still able to operate normally, although + the `-ro` and `-r` endpoints operate at reduced capacity. + + This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may + need some time to catch-up with the cluster primary instance. + + This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances. + In this case you may want to silence it. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md + expr: | + max by (job) (cnpg_pg_replication_streaming_replicas{namespace="immich"} - cnpg_pg_replication_is_wal_receiver_up{namespace="immich"}) < 2 + for: 5m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsCritical + annotations: + summary: CNPG Instance maximum number of connections critical! + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95 + for: 5m + labels: + severity: critical + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterHighConnectionsWarning + annotations: + summary: CNPG Instance is approaching the maximum number of connections. + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of + the maximum number of connections. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md + expr: | + sum by (pod) (cnpg_backends_total{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80 + for: 5m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterHighReplicationLag + annotations: + summary: CNPG Cluster high replication lag + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" is experiencing a high replication lag of + {{`{{`}} $value {{`}}`}}ms. + + High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md + expr: | + max(cnpg_pg_replication_lag{namespace="immich",pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000 + for: 5m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterInstancesOnSameNode + annotations: + summary: CNPG Cluster instances are located on the same node. + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}} + instances on the same node {{`{{`}} $labels.node {{`}}`}}. + + A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md + expr: | + count by (node) (kube_pod_info{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1 + for: 5m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterLongRunningTransactionWarning + annotations: + summary: CNPG Cluster query is taking longer than 5 minutes. + description: |- + CloudNativePG Cluster Pod {{ $labels.pod }} + is taking more than 5 minutes (300 seconds) for a query. + expr: |- + cnpg_backends_max_tx_duration_seconds{namespace="immich"} > 300 + for: 1m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceCritical + annotations: + summary: CNPG Instance is running out of disk space! + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs! + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.9 + for: 5m + labels: + severity: critical + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterLowDiskSpaceWarning + annotations: + summary: CNPG Instance is running out of disk space. + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" is running low on disk space. Check attached PVCs. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md + expr: | + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR + max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR + max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + / + sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="immich", persistentvolumeclaim=~"immich-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"}) + * + on(namespace, persistentvolumeclaim) group_left(volume) + kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} + ) > 0.7 + for: 5m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterOffline + annotations: + summary: CNPG Cluster has no running instances! + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" has no ready instances. + + Having an offline cluster means your applications will not be able to access the database, leading to + potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md + expr: | + (count(cnpg_collector_up{namespace="immich",pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0 + for: 5m + labels: + severity: critical + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterPGDatabaseXidAgeWarning + annotations: + summary: CNPG Cluster has a number of transactions from the frozen XID to the current one. + description: |- + Over 300,000,000 transactions from frozen xid + on pod {{ $labels.pod }} + expr: | + cnpg_pg_database_xid_age{namespace="immich"} > 300000000 + for: 1m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterPGReplicationWarning + annotations: + summary: CNPG Cluster standby is lagging behind the primary. + description: |- + Standby is lagging behind by over 300 seconds (5 minutes) + expr: | + cnpg_pg_replication_lag{namespace="immich"} > 300 + for: 1m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterReplicaFailingReplicationWarning + annotations: + summary: CNPG Cluster has a replica is failing to replicate. + description: |- + Replica {{ $labels.pod }} + is failing to replicate + expr: | + cnpg_pg_replication_in_recovery{namespace="immich"} > cnpg_pg_replication_is_wal_receiver_up{namespace="immich"} + for: 1m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster + - alert: CNPGClusterZoneSpreadWarning + annotations: + summary: CNPG Cluster instances in the same zone. + description: |- + CloudNativePG Cluster "immich/immich-postgresql-18-cluster" has instances in the same availability zone. + + A disaster in one availability zone will lead to a potential service disruption and/or data loss. + runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md + expr: | + 3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="immich", pod=~"immich-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3 + for: 5m + labels: + severity: warning + namespace: immich + cnpg_cluster: immich-postgresql-18-cluster diff --git a/clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml new file mode 100644 index 000000000..bc042f70f --- /dev/null +++ b/clusters/cl01tl/manifests/immich/PrometheusRule-immich-valkey.yaml @@ -0,0 +1,47 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: immich-valkey + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: valkey +spec: + groups: + - name: immich-valkey + rules: + - alert: ValkeyDown + annotations: + description: Valkey instance {{ $labels.instance }} is down. + summary: Valkey instance {{ $labels.instance }} down + expr: | + redis_up{service="immich-valkey-metrics"} == 0 + for: 2m + labels: + severity: error + - alert: ValkeyMemoryHigh + annotations: + description: | + Valkey instance {{ $labels.instance }} is using {{ $value }}% of its available memory. + summary: Valkey instance {{ $labels.instance }} is using too much memory + expr: | + redis_memory_used_bytes{service="immich-valkey-metrics"} * 100 + / + redis_memory_max_bytes{service="immich-valkey-metrics"} + > 90 <= 100 + for: 2m + labels: + severity: error + - alert: ValkeyKeyEviction + annotations: + description: | + Valkey instance {{ $labels.instance }} has evicted {{ $value }} keys in the last 5 minutes. + summary: Valkey instance {{ $labels.instance }} has evicted keys + expr: | + increase(redis_evicted_keys_total{service="immich-valkey-metrics"}[5m]) > 0 + for: 1s + labels: + severity: error diff --git a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml new file mode 100644 index 000000000..6de3c1df5 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-external.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: immich-backup-source-external + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup +spec: + sourcePVC: immich + trigger: + schedule: 24 10 * * * + restic: + pruneIntervalDays: 7 + repository: immich-backup-secret-external + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 10Gi diff --git a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml new file mode 100644 index 000000000..248145b53 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-local.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: immich-backup-source-local + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup-source-local +spec: + sourcePVC: immich + trigger: + schedule: 24 8 * * * + restic: + pruneIntervalDays: 7 + repository: immich-backup-secret-local + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 10Gi diff --git a/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml new file mode 100644 index 000000000..970c20e74 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ReplicationSource-immich-backup-source-remote.yaml @@ -0,0 +1,29 @@ +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationSource +metadata: + name: immich-backup-source-remote + namespace: immich + labels: + helm.sh/chart: volsync-target-data-1.1.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "1.1.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich-backup +spec: + sourcePVC: immich + trigger: + schedule: 24 9 * * * + restic: + pruneIntervalDays: 7 + repository: immich-backup-secret-remote + retain: + daily: 7 + hourly: 0 + monthly: 3 + weekly: 4 + yearly: 1 + copyMethod: Snapshot + storageClassName: ceph-block + volumeSnapshotClassName: ceph-blockpool-snapshot + cacheCapacity: 10Gi diff --git a/clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml b/clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml new file mode 100644 index 000000000..6f18a8a5b --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ScheduledBackup-immich-postgresql-18-scheduled-backup-live-backup.yaml @@ -0,0 +1,24 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: "immich-postgresql-18-scheduled-backup-live-backup" + namespace: immich + labels: + app.kubernetes.io/name: "immich-postgresql-18-scheduled-backup-live-backup" + helm.sh/chart: postgres-18-cluster-7.12.1 + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich + app.kubernetes.io/version: "7.12.1" + app.kubernetes.io/managed-by: Helm +spec: + immediate: true + suspend: false + schedule: "0 40 14 * * *" + backupOwnerReference: self + cluster: + name: immich-postgresql-18-cluster + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: "immich-postgresql-18-backup-garage-local" diff --git a/clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml b/clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml new file mode 100644 index 000000000..2628b39fd --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Secret-immich-immich-sa-token.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: immich-immich-sa-token + labels: + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + helm.sh/chart: immich-4.6.2 + annotations: + kubernetes.io/service-account.name: immich + namespace: immich diff --git a/clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml b/clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml new file mode 100644 index 000000000..d1c78ea57 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/SecretProviderClass-immich-config.yaml @@ -0,0 +1,19 @@ +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: immich-config + namespace: immich + labels: + app.kubernetes.io/name: immich-config + app.kubernetes.io/instance: immich + app.kubernetes.io/part-of: immich +spec: + provider: openbao + parameters: + baoAddress: "http://openbao-internal.openbao:8200" + roleName: immich + objects: | + - objectName: immich.json + fileName: immich.json + secretPath: secret/data/cl01tl/immich/config + secretKey: immich.json diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml new file mode 100644 index 000000000..1b5dcb00a --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Service-immich-valkey-headless.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: immich-valkey-headless + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: headless +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: true + ports: + - name: tcp + port: 6379 + targetPort: tcp + protocol: TCP + selector: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml new file mode 100644 index 000000000..ff6de0460 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Service-immich-valkey-metrics.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: immich-valkey-metrics + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: valkey + annotations: +spec: + type: ClusterIP + ports: + - name: metrics + port: 9121 + protocol: TCP + targetPort: metrics + selector: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml new file mode 100644 index 000000000..e91c2856b --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Service-immich-valkey-read.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: immich-valkey-read + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: read +spec: + type: ClusterIP + ports: + - name: tcp + port: 6379 + targetPort: tcp + protocol: TCP + selector: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich diff --git a/clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml new file mode 100644 index 000000000..1f4852319 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Service-immich-valkey.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: immich-valkey + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: primary +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: tcp + protocol: TCP + name: tcp + selector: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + statefulset.kubernetes.io/pod-name: immich-valkey-0 diff --git a/clusters/cl01tl/manifests/immich/Service-immich.yaml b/clusters/cl01tl/manifests/immich/Service-immich.yaml new file mode 100644 index 000000000..fb0158a83 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/Service-immich.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Service +metadata: + name: immich + labels: + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + app.kubernetes.io/service: immich + helm.sh/chart: immich-4.6.2 + namespace: immich +spec: + type: ClusterIP + ports: + - port: 2283 + targetPort: 2283 + protocol: TCP + name: http + - port: 8081 + targetPort: 8081 + protocol: TCP + name: metrics-api + - port: 8082 + targetPort: 8082 + protocol: TCP + name: metrics-ms + selector: + app.kubernetes.io/controller: main + app.kubernetes.io/instance: immich + app.kubernetes.io/name: immich diff --git a/clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml new file mode 100644 index 000000000..d1e7d95df --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ServiceAccount-immich-valkey.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: immich-valkey + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm +automountServiceAccountToken: false diff --git a/clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml b/clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml new file mode 100644 index 000000000..4c94fbe57 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ServiceAccount-immich.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: immich + labels: + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + helm.sh/chart: immich-4.6.2 + namespace: immich +secrets: + - name: immich-immich-sa-token diff --git a/clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml new file mode 100644 index 000000000..631397d43 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ServiceMonitor-immich-valkey.yaml @@ -0,0 +1,24 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: immich-valkey + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: valkey + app.kubernetes.io/component: service-monitor +spec: + endpoints: + - port: metrics + interval: 30s + namespaceSelector: + matchNames: + - immich + selector: + matchLabels: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/component: metrics diff --git a/clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml b/clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml new file mode 100644 index 000000000..2e4deea17 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/ServiceMonitor-immich.yaml @@ -0,0 +1,28 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: immich + labels: + app.kubernetes.io/instance: immich + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: immich + helm.sh/chart: immich-4.6.2 + namespace: immich +spec: + jobLabel: immich + namespaceSelector: + matchNames: + - immich + selector: + matchLabels: + app.kubernetes.io/instance: immich + app.kubernetes.io/name: immich + endpoints: + - interval: 3m + path: /metrics + port: metrics-api + scrapeTimeout: 1m + - interval: 3m + path: /metrics + port: metrics-ms + scrapeTimeout: 1m diff --git a/clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml b/clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml new file mode 100644 index 000000000..993c7cda2 --- /dev/null +++ b/clusters/cl01tl/manifests/immich/StatefulSet-immich-valkey.yaml @@ -0,0 +1,133 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: immich-valkey + labels: + helm.sh/chart: valkey-0.9.4 + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + app.kubernetes.io/version: "9.0.3" + app.kubernetes.io/managed-by: Helm +spec: + serviceName: immich-valkey-headless + replicas: 3 + podManagementPolicy: OrderedReady + selector: + matchLabels: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + volumeClaimTemplates: + - metadata: + name: valkey-data + spec: + accessModes: + - ReadWriteOnce + storageClassName: "ceph-block" + resources: + requests: + storage: "1Gi" + template: + metadata: + labels: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: immich + annotations: + checksum/initconfig: "2d8432be19db9efa32b993becf4e58d4" + spec: + automountServiceAccountToken: false + serviceAccountName: immich-valkey + securityContext: + fsGroup: 1000 + runAsGroup: 1000 + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + initContainers: + - name: immich-valkey-init + image: docker.io/valkey/valkey:9.0.3@sha256:3b55fbaa0cd93cf0d9d961f405e4dfcc70efe325e2d84da207a0a8e6d8fde4f9 + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + command: ["/scripts/init.sh"] + env: + - name: POD_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + volumeMounts: + - name: valkey-data + mountPath: /data + - name: scripts + mountPath: /scripts + containers: + - name: immich-valkey + image: docker.io/valkey/valkey:9.0.3@sha256:3b55fbaa0cd93cf0d9d961f405e4dfcc70efe325e2d84da207a0a8e6d8fde4f9 + imagePullPolicy: IfNotPresent + command: ["valkey-server"] + args: ["/data/conf/valkey.conf"] + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + env: + - name: POD_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + - name: VALKEY_LOGLEVEL + value: "notice" + ports: + - name: tcp + containerPort: 6379 + protocol: TCP + startupProbe: + exec: + command: ["sh", "-c", "valkey-cli ping"] + livenessProbe: + exec: + command: ["sh", "-c", "valkey-cli ping"] + resources: + requests: + cpu: 10m + memory: 20Mi + volumeMounts: + - name: valkey-data + mountPath: /data + - name: metrics + image: ghcr.io/oliver006/redis_exporter:v1.83.0@sha256:e8c209894d4c0cc55b1259ddd47e0b769ad1ff864b356736ee885462a3b0e48c + imagePullPolicy: "IfNotPresent" + ports: + - name: metrics + containerPort: 9121 + startupProbe: + tcpSocket: + port: metrics + livenessProbe: + tcpSocket: + port: metrics + readinessProbe: + httpGet: + path: / + port: metrics + resources: + requests: + cpu: 1m + memory: 10M + env: + - name: REDIS_ALIAS + value: immich-valkey + volumes: + - name: scripts + configMap: + name: immich-valkey-init-scripts + defaultMode: 0555 -- 2.49.1 From 62f774cac38a453392657593b0fc807abb69c20d Mon Sep 17 00:00:00 2001 From: gitea-bot Date: Sun, 3 May 2026 01:11:15 +0000 Subject: [PATCH 5/5] chore: Update manifests after change --- .../manifests/mariadb-operator/Deployment-mariadb-operator.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/clusters/cl01tl/manifests/mariadb-operator/Deployment-mariadb-operator.yaml b/clusters/cl01tl/manifests/mariadb-operator/Deployment-mariadb-operator.yaml index ddb318c0f..d6d5da99b 100644 --- a/clusters/cl01tl/manifests/mariadb-operator/Deployment-mariadb-operator.yaml +++ b/clusters/cl01tl/manifests/mariadb-operator/Deployment-mariadb-operator.yaml @@ -9,7 +9,6 @@ metadata: app.kubernetes.io/version: "26.3.0" app.kubernetes.io/managed-by: Helm spec: - replicas: 3 selector: matchLabels: app.kubernetes.io/name: mariadb-operator @@ -30,7 +29,6 @@ spec: args: - --metrics-addr=:8080 - --log-level=INFO - - --leader-elect ports: - containerPort: 8080 protocol: TCP -- 2.49.1