expand backups to all 3 targets
All checks were successful
lint-test-helm / lint-helm (push) Successful in 14s
render-manifests-push / render-manifests-push (push) Successful in 32s
renovate / renovate (push) Successful in 1m6s

This commit is contained in:
2025-12-20 13:56:13 -06:00
parent e212e19020
commit af800c427f
3 changed files with 449 additions and 65 deletions

View File

@@ -0,0 +1,85 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: vault-backup-script
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: vault-backup-script
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/part-of: {{ .Release.Name }}
data:
backup.sh: |
export DATE_RANGE=$(date -d @$(( $(date +%s) - ${DATE_RANGE_SECONDS} )) +%Y-%m-%dT%H:%M:%SZ);
export FILE_MATCH="${BUCKET}/cl01tl/etcd/cl01tl-${DATE_RANGE}.snap.age"
echo ">> Running S3 prune for Talos backup repository ${TARGET} ..."
echo ">> Backups prior to '$DATE_RANGE' will be removed"
echo ">> Backups to be removed:"
s3cmd ls --no-check-certificate ${BUCKET}/cl01tl/etcd/ |
awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}'
echo ">> Deleting ..."
s3cmd ls --no-check-certificate ${BUCKET}/cl01tl/etcd/ |
awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' |
while read file; do
s3cmd del --no-check-certificate -v "$file";
if [ $? -ne 0 ]; then
ERROR=true
echo ">> Detected error, will send message to ntfy"
fi
done;
if [ "$ERROR" = true ]; then
MAX_RETRIES=5
SUCCESS=false
echo " "
echo ">> Sending message to ntfy using curl ..."
echo " "
echo ">> Verifying required commands ..."
for i in $(seq 1 "$MAX_RETRIES"); do
if apk update 2>&1 >/dev/null; then
echo ">> Attempt $i: Repositories are reachable";
SUCCESS=true;
break;
else
echo ">> Attempt $i: Connection failed, retrying in 5 seconds ...";
sleep 5;
fi;
done;
if [ "$SUCCESS" = false ]; then
echo ">> ERROR: Could not connect to apk repositories after $MAX_RETRIES attempts, exiting ...";
exit 1;
fi
if ! command -v curl 2>&1 >/dev/null; then
echo ">> Command curl could not be found, installing";
apk add --no-cache -q curl;
if [ $? -eq 0 ]; then
echo ">> Installation successful";
else
echo ">> Installation failed with exit code $?";
exit 1;
fi;
fi;
echo " "
echo ">> Sending to NTFY ..."
HTTP_STATUS=$(curl \
--silent \
--write-out '%{http_code}' \
-H "Authorization: Bearer ${NTFY_TOKEN}" \
-H "X-Priority: 5" \
-H "X-Tags: warning" \
-H "X-Title: Talos Backup Failed for ${TARGET}" \
-d "$MESSAGE" \
${NTFY_ENDPOINT}/${NTFY_TOPIC}
)
echo ">> HTTP Status Code: $HTTP_STATUS"
echo ">> Completed S3 prune for Talos backup repository ${TARGET}"

View File

@@ -1,10 +1,116 @@
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: talos-etcd-backup-secret
name: talos-etcd-backup-local-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-etcd-backup-secret
app.kubernetes.io/name: talos-etcd-backup-local-secret
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/part-of: {{ .Release.Name }}
annotations:
kubernetes.io/service-account.name: talos-backup-secrets
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: AWS_ACCESS_KEY_ID
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/talos-backups
metadataPolicy: None
property: AWS_ACCESS_KEY_ID
- secretKey: AWS_SECRET_ACCESS_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/talos-backups
metadataPolicy: None
property: AWS_SECRET_ACCESS_KEY
- secretKey: .s3cfg
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/talos-backups
metadataPolicy: None
property: s3cfg-local
- secretKey: BUCKET
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /garage/home-infra/talos-backups
metadataPolicy: None
property: BUCKET
- secretKey: AGE_X25519_PUBLIC_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /cl01tl/talos/etcd-backup
metadataPolicy: None
property: AGE_X25519_PUBLIC_KEY
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: talos-etcd-backup-remote-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-etcd-backup-remote-secret
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/part-of: {{ .Release.Name }}
annotations:
kubernetes.io/service-account.name: talos-backup-secrets
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: AWS_ACCESS_KEY_ID
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /digital-ocean/home-infra/etcd-backup
metadataPolicy: None
property: AWS_ACCESS_KEY_ID
- secretKey: AWS_SECRET_ACCESS_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /digital-ocean/home-infra/etcd-backup
metadataPolicy: None
property: AWS_SECRET_ACCESS_KEY
- secretKey: .s3cfg
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /digital-ocean/home-infra/etcd-backup
metadataPolicy: None
property: s3cfg-remote
- secretKey: BUCKET
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /digital-ocean/home-infra/etcd-backup
metadataPolicy: None
property: BUCKET
- secretKey: AGE_X25519_PUBLIC_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /cl01tl/talos/etcd-backup
metadataPolicy: None
property: AGE_X25519_PUBLIC_KEY
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: talos-etcd-backup-external-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-etcd-backup-external-secret
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/part-of: {{ .Release.Name }}
annotations:
@@ -50,6 +156,43 @@ spec:
metadataPolicy: None
property: AGE_X25519_PUBLIC_KEY
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: talos-backup-ntfy-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-backup-ntfy-secret
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/part-of: {{ .Release.Name }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: NTFY_TOKEN
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /ntfy/user/cl01tl
metadataPolicy: None
property: token
- secretKey: NTFY_ENDPOINT
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /ntfy/user/cl01tl
metadataPolicy: None
property: endpoint
- secretKey: NTFY_TOPIC
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /cl01tl/talos/etcd-backup
metadataPolicy: None
property: NTFY_TOPIC
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret

View File

@@ -20,7 +20,7 @@ etcd-backup:
backoffLimit: 3
parallelism: 1
containers:
main:
local:
image:
repository: ghcr.io/siderolabs/talos-backup
tag: v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7
@@ -42,12 +42,104 @@ etcd-backup:
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: talos-etcd-backup-secret
name: talos-etcd-backup-local-secret
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: talos-etcd-backup-secret
name: talos-etcd-backup-local-secret
key: AWS_SECRET_ACCESS_KEY
- name: AWS_REGION
value: us-east-1
- name: CUSTOM_S3_ENDPOINT
value: http://garage-main.garage:3900
- name: BUCKET
value: talos-backups
- name: S3_PREFIX
value: "cl01tl/etcd"
- name: CLUSTER_NAME
value: "cl01tl"
- name: AGE_X25519_PUBLIC_KEY
valueFrom:
secretKeyRef:
name: talos-etcd-backup-local-secret
key: AGE_X25519_PUBLIC_KEY
- name: USE_PATH_STYLE
value: "false"
remote:
image:
repository: ghcr.io/siderolabs/talos-backup
tag: v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7
pullPolicy: IfNotPresent
command:
- /talos-backup
workingDir: /tmp
securityContext:
runAsUser: 1000
runAsGroup: 1000
allowPrivilegeEscalation: false
runAsNonRoot: true
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
env:
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: talos-etcd-backup-remote-secret
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: talos-etcd-backup-remote-secret
key: AWS_SECRET_ACCESS_KEY
- name: AWS_REGION
value: us-east-1
- name: CUSTOM_S3_ENDPOINT
value: https://garage-ps10rp.boreal-beaufort.ts.net:3900
- name: BUCKET
value: talos-backups
- name: S3_PREFIX
value: "cl01tl/etcd"
- name: CLUSTER_NAME
value: "cl01tl"
- name: AGE_X25519_PUBLIC_KEY
valueFrom:
secretKeyRef:
name: talos-etcd-backup-remote-secret
key: AGE_X25519_PUBLIC_KEY
- name: USE_PATH_STYLE
value: "false"
external:
image:
repository: ghcr.io/siderolabs/talos-backup
tag: v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7
pullPolicy: IfNotPresent
command:
- /talos-backup
workingDir: /tmp
securityContext:
runAsUser: 1000
runAsGroup: 1000
allowPrivilegeEscalation: false
runAsNonRoot: true
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
env:
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: talos-etcd-backup-external-secret
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: talos-etcd-backup-external-secret
key: AWS_SECRET_ACCESS_KEY
- name: AWS_REGION
value: nyc3
@@ -62,15 +154,11 @@ etcd-backup:
- name: AGE_X25519_PUBLIC_KEY
valueFrom:
secretKeyRef:
name: talos-etcd-backup-secret
name: talos-etcd-backup-external-secret
key: AGE_X25519_PUBLIC_KEY
- name: USE_PATH_STYLE
value: "false"
resources:
requests:
cpu: 100m
memory: 128Mi
s3-prune:
s3-prune-local:
image:
repository: d3fk/s3cmd
tag: latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f
@@ -79,48 +167,58 @@ etcd-backup:
- /bin/sh
args:
- -ec
- |
export DATE_RANGE=$(date -d @$(( $(date +%s) - 1209600 )) +%Y-%m-%dT%H:%M:%SZ);
export FILE_MATCH="$BUCKET/cl01tl/etcd/cl01tl-$DATE_RANGE.snap.age"
echo ">> Running S3 prune for Talos backup repository"
echo ">> Backups prior to '$DATE_RANGE' will be removed"
echo ">> Backups to be removed:"
s3cmd ls ${BUCKET}/cl01tl/etcd/ |
awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}'
echo ">> Deleting ..."
s3cmd ls ${BUCKET}/cl01tl/etcd/ |
awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' |
while read file; do
s3cmd del "$file";
done;
echo ">> Completed S3 prune for Talos backup repository"
- /scripts/prune.sh
envFrom:
- secretRef:
name: talos-etcd-backup-local-secret
- secretRef:
name: talos-backup-ntfy-secret
env:
- name: BUCKET
valueFrom:
secretKeyRef:
name: talos-etcd-backup-secret
key: BUCKET
resources:
requests:
cpu: 100m
memory: 128Mi
- name: TARGET
value: Local
- name: DATE_RANGE_SECONDS
value: 2419200
s3-prune-remote:
image:
repository: d3fk/s3cmd
tag: latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f
pullPolicy: IfNotPresent
command:
- /bin/sh
args:
- -ec
- /scripts/prune.sh
envFrom:
- secretRef:
name: talos-etcd-backup-remote-secret
- secretRef:
name: talos-backup-ntfy-secret
env:
- name: TARGET
value: Remote
- name: DATE_RANGE_SECONDS
value: 2419200
s3-prune-external:
image:
repository: d3fk/s3cmd
tag: latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f
pullPolicy: IfNotPresent
command:
- /bin/sh
args:
- -ec
- /scripts/prune.sh
envFrom:
- secretRef:
name: talos-etcd-backup-external-secret
- secretRef:
name: talos-backup-ntfy-secret
env:
- name: TARGET
value: External
- name: DATE_RANGE_SECONDS
value: 1209600
persistence:
tmp:
type: emptyDir
medium: Memory
advancedMounts:
main:
main:
- path: /tmp
readOnly: false
talos:
type: emptyDir
medium: Memory
advancedMounts:
main:
main:
- path: /.talos
readOnly: false
secret:
enabled: true
type: secret
@@ -131,10 +229,10 @@ etcd-backup:
- path: /var/run/secrets/talos.dev
readOnly: true
mountPropagation: None
s3cmd-config:
s3cmd-config-local:
enabled: true
type: secret
name: talos-etcd-backup-secret
name: talos-etcd-backup-local-secret
advancedMounts:
main:
s3-prune:
@@ -142,6 +240,76 @@ etcd-backup:
readOnly: true
mountPropagation: None
subPath: .s3cfg
s3cmd-config-remote:
enabled: true
type: secret
name: talos-etcd-backup-remote-secret
advancedMounts:
main:
s3-prune:
- path: /root/.s3cfg
readOnly: true
mountPropagation: None
subPath: .s3cfg
s3cmd-config-external:
enabled: true
type: secret
name: talos-etcd-backup-external-secret
advancedMounts:
main:
s3-prune:
- path: /root/.s3cfg
readOnly: true
mountPropagation: None
subPath: .s3cfg
tmp-local:
type: emptyDir
medium: Memory
advancedMounts:
main:
local:
- path: /tmp
readOnly: false
tmp-remote:
type: emptyDir
medium: Memory
advancedMounts:
main:
remote:
- path: /tmp
readOnly: false
tmp-external:
type: emptyDir
medium: Memory
advancedMounts:
main:
external:
- path: /tmp
readOnly: false
talos-local:
type: emptyDir
medium: Memory
advancedMounts:
main:
local:
- path: /.talos
readOnly: false
talos-remote:
type: emptyDir
medium: Memory
advancedMounts:
main:
remote:
- path: /.talos
readOnly: false
talos-external:
type: emptyDir
medium: Memory
advancedMounts:
main:
external:
- path: /.talos
readOnly: false
etcd-defrag:
global:
fullnameOverride: etcd-defrag
@@ -179,10 +347,6 @@ etcd-defrag:
env:
- name: TALOSCONFIG
value: /tmp/.talos/config
resources:
requests:
cpu: 100m
memory: 128Mi
defrag-2:
type: cronjob
pod:
@@ -216,10 +380,6 @@ etcd-defrag:
env:
- name: TALOSCONFIG
value: /tmp/.talos/config
resources:
requests:
cpu: 100m
memory: 128Mi
defrag-3:
type: cronjob
pod:
@@ -253,10 +413,6 @@ etcd-defrag:
env:
- name: TALOSCONFIG
value: /tmp/.talos/config
resources:
requests:
cpu: 100m
memory: 128Mi
persistence:
talos-config-1:
enabled: true