diff --git a/clusters/cl01tl/manifests/talos/ConfigMap-vault-backup-script.yaml b/clusters/cl01tl/manifests/talos/ConfigMap-vault-backup-script.yaml new file mode 100644 index 000000000..7f4c6beab --- /dev/null +++ b/clusters/cl01tl/manifests/talos/ConfigMap-vault-backup-script.yaml @@ -0,0 +1,85 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: vault-backup-script + namespace: talos + labels: + app.kubernetes.io/name: vault-backup-script + app.kubernetes.io/instance: talos + app.kubernetes.io/part-of: talos +data: + backup.sh: | + export DATE_RANGE=$(date -d @$(( $(date +%s) - ${DATE_RANGE_SECONDS} )) +%Y-%m-%dT%H:%M:%SZ); + export FILE_MATCH="${BUCKET}/cl01tl/etcd/cl01tl-${DATE_RANGE}.snap.age" + + echo ">> Running S3 prune for Talos backup repository ${TARGET} ..." + + echo ">> Backups prior to '$DATE_RANGE' will be removed" + echo ">> Backups to be removed:" + s3cmd ls --no-check-certificate ${BUCKET}/cl01tl/etcd/ | + awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' + + echo ">> Deleting ..." + s3cmd ls --no-check-certificate ${BUCKET}/cl01tl/etcd/ | + awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' | + while read file; do + s3cmd del --no-check-certificate -v "$file"; + if [ $? -ne 0 ]; then + ERROR=true + echo ">> Detected error, will send message to ntfy" + fi + done; + + if [ "$ERROR" = true ]; then + + MAX_RETRIES=5 + SUCCESS=false + + echo " " + echo ">> Sending message to ntfy using curl ..." + + echo " " + echo ">> Verifying required commands ..." + + for i in $(seq 1 "$MAX_RETRIES"); do + if apk update 2>&1 >/dev/null; then + echo ">> Attempt $i: Repositories are reachable"; + SUCCESS=true; + break; + else + echo ">> Attempt $i: Connection failed, retrying in 5 seconds ..."; + sleep 5; + fi; + done; + + if [ "$SUCCESS" = false ]; then + echo ">> ERROR: Could not connect to apk repositories after $MAX_RETRIES attempts, exiting ..."; + exit 1; + fi + + if ! command -v curl 2>&1 >/dev/null; then + echo ">> Command curl could not be found, installing"; + apk add --no-cache -q curl; + if [ $? -eq 0 ]; then + echo ">> Installation successful"; + else + echo ">> Installation failed with exit code $?"; + exit 1; + fi; + fi; + + echo " " + echo ">> Sending to NTFY ..." + HTTP_STATUS=$(curl \ + --silent \ + --write-out '%{http_code}' \ + -H "Authorization: Bearer ${NTFY_TOKEN}" \ + -H "X-Priority: 5" \ + -H "X-Tags: warning" \ + -H "X-Title: Talos Backup Failed for ${TARGET}" \ + -d "$MESSAGE" \ + ${NTFY_ENDPOINT}/${NTFY_TOPIC} + ) + echo ">> HTTP Status Code: $HTTP_STATUS" + + echo ">> Completed S3 prune for Talos backup repository ${TARGET}" diff --git a/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-1.yaml b/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-1.yaml index 5d793b38f..d12e9e5a1 100644 --- a/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-1.yaml +++ b/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-1.yaml @@ -54,10 +54,6 @@ spec: image: ghcr.io/siderolabs/talosctl:v1.11.6 imagePullPolicy: IfNotPresent name: main - resources: - requests: - cpu: 100m - memory: 128Mi volumeMounts: - mountPath: /tmp/.talos/config mountPropagation: None diff --git a/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-2.yaml b/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-2.yaml index 8ba4fef2a..66feed3b0 100644 --- a/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-2.yaml +++ b/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-2.yaml @@ -54,10 +54,6 @@ spec: image: ghcr.io/siderolabs/talosctl:v1.11.6 imagePullPolicy: IfNotPresent name: main - resources: - requests: - cpu: 100m - memory: 128Mi volumeMounts: - mountPath: /tmp/.talos/config mountPropagation: None diff --git a/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-3.yaml b/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-3.yaml index 10515c9f2..6403d7f38 100644 --- a/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-3.yaml +++ b/clusters/cl01tl/manifests/talos/CronJob-etcd-defrag-defrag-3.yaml @@ -54,10 +54,6 @@ spec: image: ghcr.io/siderolabs/talosctl:v1.11.6 imagePullPolicy: IfNotPresent name: main - resources: - requests: - cpu: 100m - memory: 128Mi volumeMounts: - mountPath: /tmp/.talos/config mountPropagation: None diff --git a/clusters/cl01tl/manifests/talos/CronJob-talos.yaml b/clusters/cl01tl/manifests/talos/CronJob-talos.yaml index e42fd2139..6024f9472 100644 --- a/clusters/cl01tl/manifests/talos/CronJob-talos.yaml +++ b/clusters/cl01tl/manifests/talos/CronJob-talos.yaml @@ -50,12 +50,12 @@ spec: valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID - name: talos-etcd-backup-secret + name: talos-etcd-backup-external-secret - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY - name: talos-etcd-backup-secret + name: talos-etcd-backup-external-secret - name: AWS_REGION value: nyc3 - name: CUSTOM_S3_ENDPOINT @@ -70,16 +70,12 @@ spec: valueFrom: secretKeyRef: key: AGE_X25519_PUBLIC_KEY - name: talos-etcd-backup-secret + name: talos-etcd-backup-external-secret - name: USE_PATH_STYLE value: "false" image: ghcr.io/siderolabs/talos-backup:v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7 imagePullPolicy: IfNotPresent - name: main - resources: - requests: - cpu: 100m - memory: 128Mi + name: external securityContext: allowPrivilegeEscalation: false capabilities: @@ -91,63 +87,191 @@ spec: seccompProfile: type: RuntimeDefault volumeMounts: - - mountPath: /var/run/secrets/talos.dev - mountPropagation: None - name: secret - readOnly: true - mountPath: /.talos - name: talos + name: talos-external - mountPath: /tmp - name: tmp + name: tmp-external + workingDir: /tmp + - command: + - /talos-backup + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: AWS_ACCESS_KEY_ID + name: talos-etcd-backup-local-secret + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: AWS_SECRET_ACCESS_KEY + name: talos-etcd-backup-local-secret + - name: AWS_REGION + value: us-east-1 + - name: CUSTOM_S3_ENDPOINT + value: http://garage-main.garage:3900 + - name: BUCKET + value: talos-backups + - name: S3_PREFIX + value: cl01tl/etcd + - name: CLUSTER_NAME + value: cl01tl + - name: AGE_X25519_PUBLIC_KEY + valueFrom: + secretKeyRef: + key: AGE_X25519_PUBLIC_KEY + name: talos-etcd-backup-local-secret + - name: USE_PATH_STYLE + value: "false" + image: ghcr.io/siderolabs/talos-backup:v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7 + imagePullPolicy: IfNotPresent + name: local + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /.talos + name: talos-local + - mountPath: /tmp + name: tmp-local + workingDir: /tmp + - command: + - /talos-backup + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: AWS_ACCESS_KEY_ID + name: talos-etcd-backup-remote-secret + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: AWS_SECRET_ACCESS_KEY + name: talos-etcd-backup-remote-secret + - name: AWS_REGION + value: us-east-1 + - name: CUSTOM_S3_ENDPOINT + value: https://garage-ps10rp.boreal-beaufort.ts.net:3900 + - name: BUCKET + value: talos-backups + - name: S3_PREFIX + value: cl01tl/etcd + - name: CLUSTER_NAME + value: cl01tl + - name: AGE_X25519_PUBLIC_KEY + valueFrom: + secretKeyRef: + key: AGE_X25519_PUBLIC_KEY + name: talos-etcd-backup-remote-secret + - name: USE_PATH_STYLE + value: "false" + image: ghcr.io/siderolabs/talos-backup:v0.1.0-beta.3@sha256:05c86663b251a407551dc948097e32e163a345818117eb52c573b0447bd0c7a7 + imagePullPolicy: IfNotPresent + name: remote + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /.talos + name: talos-remote + - mountPath: /tmp + name: tmp-remote workingDir: /tmp - args: - -ec - - | - export DATE_RANGE=$(date -d @$(( $(date +%s) - 1209600 )) +%Y-%m-%dT%H:%M:%SZ); - export FILE_MATCH="$BUCKET/cl01tl/etcd/cl01tl-$DATE_RANGE.snap.age" - echo ">> Running S3 prune for Talos backup repository" - echo ">> Backups prior to '$DATE_RANGE' will be removed" - echo ">> Backups to be removed:" - s3cmd ls ${BUCKET}/cl01tl/etcd/ | - awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' - echo ">> Deleting ..." - s3cmd ls ${BUCKET}/cl01tl/etcd/ | - awk -v file_match="$FILE_MATCH" '$4 < file_match {print $4}' | - while read file; do - s3cmd del "$file"; - done; - echo ">> Completed S3 prune for Talos backup repository" + - /scripts/prune.sh command: - /bin/sh env: - - name: BUCKET - valueFrom: - secretKeyRef: - key: BUCKET - name: talos-etcd-backup-secret + - name: TARGET + value: External + - name: DATE_RANGE_SECONDS + value: "1.2096e+06" + envFrom: + - secretRef: + name: talos-etcd-backup-external-secret + - secretRef: + name: talos-backup-ntfy-secret image: d3fk/s3cmd:latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f imagePullPolicy: IfNotPresent - name: s3-prune - resources: - requests: - cpu: 100m - memory: 128Mi - volumeMounts: - - mountPath: /root/.s3cfg - mountPropagation: None - name: s3cmd-config - readOnly: true - subPath: .s3cfg + name: s3-prune-external + - args: + - -ec + - /scripts/prune.sh + command: + - /bin/sh + env: + - name: TARGET + value: Local + - name: DATE_RANGE_SECONDS + value: "2.4192e+06" + envFrom: + - secretRef: + name: talos-etcd-backup-local-secret + - secretRef: + name: talos-backup-ntfy-secret + image: d3fk/s3cmd:latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f + imagePullPolicy: IfNotPresent + name: s3-prune-local + - args: + - -ec + - /scripts/prune.sh + command: + - /bin/sh + env: + - name: TARGET + value: Remote + - name: DATE_RANGE_SECONDS + value: "2.4192e+06" + envFrom: + - secretRef: + name: talos-etcd-backup-remote-secret + - secretRef: + name: talos-backup-ntfy-secret + image: d3fk/s3cmd:latest@sha256:ed348a0fae5723d2e62636c175baf4dfaf732a790179ca675d1f24f863d0d68f + imagePullPolicy: IfNotPresent + name: s3-prune-remote volumes: - - name: s3cmd-config + - name: s3cmd-config-external secret: - secretName: talos-etcd-backup-secret + secretName: talos-etcd-backup-external-secret + - name: s3cmd-config-local + secret: + secretName: talos-etcd-backup-local-secret + - name: s3cmd-config-remote + secret: + secretName: talos-etcd-backup-remote-secret - name: secret secret: secretName: talos-backup-secrets - emptyDir: medium: Memory - name: talos + name: talos-external - emptyDir: medium: Memory - name: tmp + name: talos-local + - emptyDir: + medium: Memory + name: talos-remote + - emptyDir: + medium: Memory + name: tmp-external + - emptyDir: + medium: Memory + name: tmp-local + - emptyDir: + medium: Memory + name: tmp-remote diff --git a/clusters/cl01tl/manifests/talos/ExternalSecret-talos-backup-ntfy-secret.yaml b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-backup-ntfy-secret.yaml new file mode 100644 index 000000000..8c1ddad52 --- /dev/null +++ b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-backup-ntfy-secret.yaml @@ -0,0 +1,35 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: talos-backup-ntfy-secret + namespace: talos + labels: + app.kubernetes.io/name: talos-backup-ntfy-secret + app.kubernetes.io/instance: talos + app.kubernetes.io/part-of: talos +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: NTFY_TOKEN + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /ntfy/user/cl01tl + metadataPolicy: None + property: token + - secretKey: NTFY_ENDPOINT + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /ntfy/user/cl01tl + metadataPolicy: None + property: endpoint + - secretKey: NTFY_TOPIC + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/talos/etcd-backup + metadataPolicy: None + property: NTFY_TOPIC diff --git a/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-secret.yaml b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-external-secret.yaml similarity index 93% rename from clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-secret.yaml rename to clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-external-secret.yaml index 4a3f53cb3..0325fcc2f 100644 --- a/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-secret.yaml +++ b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-external-secret.yaml @@ -1,10 +1,10 @@ apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: - name: talos-etcd-backup-secret + name: talos-etcd-backup-external-secret namespace: talos labels: - app.kubernetes.io/name: talos-etcd-backup-secret + app.kubernetes.io/name: talos-etcd-backup-external-secret app.kubernetes.io/instance: talos app.kubernetes.io/part-of: talos annotations: diff --git a/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-local-secret.yaml b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-local-secret.yaml new file mode 100644 index 000000000..50cca1b0d --- /dev/null +++ b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-local-secret.yaml @@ -0,0 +1,51 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: talos-etcd-backup-local-secret + namespace: talos + labels: + app.kubernetes.io/name: talos-etcd-backup-local-secret + app.kubernetes.io/instance: talos + app.kubernetes.io/part-of: talos + annotations: + kubernetes.io/service-account.name: talos-backup-secrets +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: AWS_SECRET_ACCESS_KEY + - secretKey: .s3cfg + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: s3cfg-local + - secretKey: BUCKET + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /garage/home-infra/talos-backups + metadataPolicy: None + property: BUCKET + - secretKey: AGE_X25519_PUBLIC_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/talos/etcd-backup + metadataPolicy: None + property: AGE_X25519_PUBLIC_KEY diff --git a/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-remote-secret.yaml b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-remote-secret.yaml new file mode 100644 index 000000000..187716b07 --- /dev/null +++ b/clusters/cl01tl/manifests/talos/ExternalSecret-talos-etcd-backup-remote-secret.yaml @@ -0,0 +1,51 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: talos-etcd-backup-remote-secret + namespace: talos + labels: + app.kubernetes.io/name: talos-etcd-backup-remote-secret + app.kubernetes.io/instance: talos + app.kubernetes.io/part-of: talos + annotations: + kubernetes.io/service-account.name: talos-backup-secrets +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: AWS_ACCESS_KEY_ID + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: AWS_ACCESS_KEY_ID + - secretKey: AWS_SECRET_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: AWS_SECRET_ACCESS_KEY + - secretKey: .s3cfg + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: s3cfg-remote + - secretKey: BUCKET + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /digital-ocean/home-infra/etcd-backup + metadataPolicy: None + property: BUCKET + - secretKey: AGE_X25519_PUBLIC_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /cl01tl/talos/etcd-backup + metadataPolicy: None + property: AGE_X25519_PUBLIC_KEY