From af800c427fbb8c32e3e9199aa6535fcf01a2ad4b Mon Sep 17 00:00:00 2001 From: Alex Lebens Date: Sat, 20 Dec 2025 13:56:13 -0600 Subject: [PATCH] expand backups to all 3 targets --- .../cl01tl/helm/talos/templates/config.yaml | 85 ++++++ .../helm/talos/templates/external-secret.yaml | 147 ++++++++- clusters/cl01tl/helm/talos/values.yaml | 282 ++++++++++++++---- 3 files changed, 449 insertions(+), 65 deletions(-) create mode 100644 clusters/cl01tl/helm/talos/templates/config.yaml diff --git a/clusters/cl01tl/helm/talos/templates/config.yaml b/clusters/cl01tl/helm/talos/templates/config.yaml new file mode 100644 index 000000000..5669d167b --- /dev/null +++ b/clusters/cl01tl/helm/talos/templates/config.yaml @@ -0,0 +1,85 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: vault-backup-script + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: vault-backup-script + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +data: + backup.sh: | + export DATE_RANGE=$(date -d @$(( $(date +%s) - ${DATE_RANGE_SECONDS} )) +%Y-%m-%dT%H:%M:%SZ); + export FILE_MATCH="${BUCKET}/cl01tl/etcd/cl01tl-${DATE_RANGE}.snap.age" + + echo ">> Running S3 prune for Talos backup repository ${TARGET} ..." + + echo ">> Backups prior to '$DATE_RANGE' will be removed" + echo ">> Backups to be removed:" + s3cmd ls --no-check-certificate ${BUCKET}/cl01tl/etcd/ | + awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' + + echo ">> Deleting ..." + s3cmd ls --no-check-certificate ${BUCKET}/cl01tl/etcd/ | + awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' | + while read file; do + s3cmd del --no-check-certificate -v "$file"; + if [ $? -ne 0 ]; then + ERROR=true + echo ">> Detected error, will send message to ntfy" + fi + done; + + if [ "$ERROR" = true ]; then + + MAX_RETRIES=5 + SUCCESS=false + + echo " " + echo ">> Sending message to ntfy using curl ..." + + echo " " + echo ">> Verifying required commands ..." + + for i in $(seq 1 "$MAX_RETRIES"); do + if apk update 2>&1 >/dev/null; then + echo ">> Attempt $i: Repositories are reachable"; + SUCCESS=true; + break; + else + echo ">> Attempt $i: Connection failed, retrying in 5 seconds ..."; + sleep 5; + fi; + done; + + if [ "$SUCCESS" = false ]; then + echo ">> ERROR: Could not connect to apk repositories after $MAX_RETRIES attempts, exiting ..."; + exit 1; + fi + + if ! command -v curl 2>&1 >/dev/null; then + echo ">> Command curl could not be found, installing"; + apk add --no-cache -q curl; + if [ $? -eq 0 ]; then + echo ">> Installation successful"; + else + echo ">> Installation failed with exit code $?"; + exit 1; + fi; + fi; + + echo " " + echo ">> Sending to NTFY ..." + HTTP_STATUS=$(curl \ + --silent \ + --write-out '%{http_code}' \ + -H "Authorization: Bearer ${NTFY_TOKEN}" \ + -H "X-Priority: 5" \ + -H "X-Tags: warning" \ + -H "X-Title: Talos Backup Failed for ${TARGET}" \ + -d "$MESSAGE" \ + ${NTFY_ENDPOINT}/${NTFY_TOPIC} + ) + echo ">> HTTP Status Code: $HTTP_STATUS" + + echo ">> Completed S3 prune for Talos backup repository ${TARGET}" diff --git a/clusters/cl01tl/helm/talos/templates/external-secret.yaml b/clusters/cl01tl/helm/talos/templates/external-secret.yaml index 199dc9bc7..5ee5b8de6 100644 --- a/clusters/cl01tl/helm/talos/templates/external-secret.yaml +++ b/clusters/cl01tl/helm/talos/templates/external-secret.yaml @@ -1,10 +1,116 @@ apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: - name: talos-etcd-backup-secret + name: talos-etcd-backup-local-secret namespace: {{ .Release.Namespace }} labels: - app.kubernetes.io/name: talos-etcd-backup-secret + app.kubernetes.io/name: talos-etcd-backup-local-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} + annotations: + kubernetes.io/service-account.name: talos-backup-secrets +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: AWS_SECRET_ACCESS_KEY + - secretKey: .s3cfg + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: s3cfg-local + - secretKey: BUCKET + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: BUCKET + - secretKey: AGE_X25519_PUBLIC_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/talos/etcd-backup + metadataPolicy: None + property: AGE_X25519_PUBLIC_KEY + +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: talos-etcd-backup-remote-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: talos-etcd-backup-remote-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} + annotations: + kubernetes.io/service-account.name: talos-backup-secrets +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: AWS_SECRET_ACCESS_KEY + - secretKey: .s3cfg + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: s3cfg-remote + - secretKey: BUCKET + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: BUCKET + - secretKey: AGE_X25519_PUBLIC_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/talos/etcd-backup + metadataPolicy: None + property: AGE_X25519_PUBLIC_KEY + +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: talos-etcd-backup-external-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: talos-etcd-backup-external-secret app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/part-of: {{ .Release.Name }} annotations: @@ -50,6 +156,43 @@ spec: metadataPolicy: None property: AGE_X25519_PUBLIC_KEY +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: talos-backup-ntfy-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: talos-backup-ntfy-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: NTFY_TOKEN + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /ntfy/user/cl01tl + metadataPolicy: None + property: token + - secretKey: NTFY_ENDPOINT + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /ntfy/user/cl01tl + metadataPolicy: None + property: endpoint + - secretKey: NTFY_TOPIC + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/talos/etcd-backup + metadataPolicy: None + property: NTFY_TOPIC + --- apiVersion: external-secrets.io/v1 kind: ExternalSecret diff --git a/clusters/cl01tl/helm/talos/values.yaml b/clusters/cl01tl/helm/talos/values.yaml index ec33d7ce1..38c617d91 100644 --- a/clusters/cl01tl/helm/talos/values.yaml +++ b/clusters/cl01tl/helm/talos/values.yaml @@ -20,7 +20,7 @@ etcd-backup: backoffLimit: 3 parallelism: 1 containers: - main: + local: image: repository: ghcr.io/siderolabs/talos-backup tag: v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7 @@ -42,12 +42,104 @@ etcd-backup: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: - name: talos-etcd-backup-secret + name: talos-etcd-backup-local-secret key: AWS_ACCESS_KEY_ID - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: - name: talos-etcd-backup-secret + name: talos-etcd-backup-local-secret + key: AWS_SECRET_ACCESS_KEY + - name: AWS_REGION + value: us-east-1 + - name: CUSTOM_S3_ENDPOINT + value: http://garage-main.garage:3900 + - name: BUCKET + value: talos-backups + - name: S3_PREFIX + value: "cl01tl/etcd" + - name: CLUSTER_NAME + value: "cl01tl" + - name: AGE_X25519_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: talos-etcd-backup-local-secret + key: AGE_X25519_PUBLIC_KEY + - name: USE_PATH_STYLE + value: "false" + remote: + image: + repository: ghcr.io/siderolabs/talos-backup + tag: v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7 + pullPolicy: IfNotPresent + command: + - /talos-backup + workingDir: /tmp + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + runAsNonRoot: true + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: talos-etcd-backup-remote-secret + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: talos-etcd-backup-remote-secret + key: AWS_SECRET_ACCESS_KEY + - name: AWS_REGION + value: us-east-1 + - name: CUSTOM_S3_ENDPOINT + value: https://garage-ps10rp.boreal-beaufort.ts.net:3900 + - name: BUCKET + value: talos-backups + - name: S3_PREFIX + value: "cl01tl/etcd" + - name: CLUSTER_NAME + value: "cl01tl" + - name: AGE_X25519_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: talos-etcd-backup-remote-secret + key: AGE_X25519_PUBLIC_KEY + - name: USE_PATH_STYLE + value: "false" + external: + image: + repository: ghcr.io/siderolabs/talos-backup + tag: v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7 + pullPolicy: IfNotPresent + command: + - /talos-backup + workingDir: /tmp + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + runAsNonRoot: true + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: talos-etcd-backup-external-secret + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: talos-etcd-backup-external-secret key: AWS_SECRET_ACCESS_KEY - name: AWS_REGION value: nyc3 @@ -62,15 +154,11 @@ etcd-backup: - name: AGE_X25519_PUBLIC_KEY valueFrom: secretKeyRef: - name: talos-etcd-backup-secret + name: talos-etcd-backup-external-secret key: AGE_X25519_PUBLIC_KEY - name: USE_PATH_STYLE value: "false" - resources: - requests: - cpu: 100m - memory: 128Mi - s3-prune: + s3-prune-local: image: repository: d3fk/s3cmd tag: latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f @@ -79,48 +167,58 @@ etcd-backup: - /bin/sh args: - -ec - - | - export DATE_RANGE=$(date -d @$(( $(date +%s) - 1209600 )) +%Y-%m-%dT%H:%M:%SZ); - export FILE_MATCH="$BUCKET/cl01tl/etcd/cl01tl-$DATE_RANGE.snap.age" - echo ">> Running S3 prune for Talos backup repository" - echo ">> Backups prior to '$DATE_RANGE' will be removed" - echo ">> Backups to be removed:" - s3cmd ls ${BUCKET}/cl01tl/etcd/ | - awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' - echo ">> Deleting ..." - s3cmd ls ${BUCKET}/cl01tl/etcd/ | - awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' | - while read file; do - s3cmd del "$file"; - done; - echo ">> Completed S3 prune for Talos backup repository" + - /scripts/prune.sh + envFrom: + - secretRef: + name: talos-etcd-backup-local-secret + - secretRef: + name: talos-backup-ntfy-secret env: - - name: BUCKET - valueFrom: - secretKeyRef: - name: talos-etcd-backup-secret - key: BUCKET - resources: - requests: - cpu: 100m - memory: 128Mi + - name: TARGET + value: Local + - name: DATE_RANGE_SECONDS + value: 2419200 + s3-prune-remote: + image: + repository: d3fk/s3cmd + tag: latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f + pullPolicy: IfNotPresent + command: + - /bin/sh + args: + - -ec + - /scripts/prune.sh + envFrom: + - secretRef: + name: talos-etcd-backup-remote-secret + - secretRef: + name: talos-backup-ntfy-secret + env: + - name: TARGET + value: Remote + - name: DATE_RANGE_SECONDS + value: 2419200 + s3-prune-external: + image: + repository: d3fk/s3cmd + tag: latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f + pullPolicy: IfNotPresent + command: + - /bin/sh + args: + - -ec + - /scripts/prune.sh + envFrom: + - secretRef: + name: talos-etcd-backup-external-secret + - secretRef: + name: talos-backup-ntfy-secret + env: + - name: TARGET + value: External + - name: DATE_RANGE_SECONDS + value: 1209600 persistence: - tmp: - type: emptyDir - medium: Memory - advancedMounts: - main: - main: - - path: /tmp - readOnly: false - talos: - type: emptyDir - medium: Memory - advancedMounts: - main: - main: - - path: /.talos - readOnly: false secret: enabled: true type: secret @@ -131,10 +229,10 @@ etcd-backup: - path: /var/run/secrets/talos.dev readOnly: true mountPropagation: None - s3cmd-config: + s3cmd-config-local: enabled: true type: secret - name: talos-etcd-backup-secret + name: talos-etcd-backup-local-secret advancedMounts: main: s3-prune: @@ -142,6 +240,76 @@ etcd-backup: readOnly: true mountPropagation: None subPath: .s3cfg + s3cmd-config-remote: + enabled: true + type: secret + name: talos-etcd-backup-remote-secret + advancedMounts: + main: + s3-prune: + - path: /root/.s3cfg + readOnly: true + mountPropagation: None + subPath: .s3cfg + s3cmd-config-external: + enabled: true + type: secret + name: talos-etcd-backup-external-secret + advancedMounts: + main: + s3-prune: + - path: /root/.s3cfg + readOnly: true + mountPropagation: None + subPath: .s3cfg + tmp-local: + type: emptyDir + medium: Memory + advancedMounts: + main: + local: + - path: /tmp + readOnly: false + tmp-remote: + type: emptyDir + medium: Memory + advancedMounts: + main: + remote: + - path: /tmp + readOnly: false + tmp-external: + type: emptyDir + medium: Memory + advancedMounts: + main: + external: + - path: /tmp + readOnly: false + talos-local: + type: emptyDir + medium: Memory + advancedMounts: + main: + local: + - path: /.talos + readOnly: false + talos-remote: + type: emptyDir + medium: Memory + advancedMounts: + main: + remote: + - path: /.talos + readOnly: false + talos-external: + type: emptyDir + medium: Memory + advancedMounts: + main: + external: + - path: /.talos + readOnly: false etcd-defrag: global: fullnameOverride: etcd-defrag @@ -179,10 +347,6 @@ etcd-defrag: env: - name: TALOSCONFIG value: /tmp/.talos/config - resources: - requests: - cpu: 100m - memory: 128Mi defrag-2: type: cronjob pod: @@ -216,10 +380,6 @@ etcd-defrag: env: - name: TALOSCONFIG value: /tmp/.talos/config - resources: - requests: - cpu: 100m - memory: 128Mi defrag-3: type: cronjob pod: @@ -253,10 +413,6 @@ etcd-defrag: env: - name: TALOSCONFIG value: /tmp/.talos/config - resources: - requests: - cpu: 100m - memory: 128Mi persistence: talos-config-1: enabled: true