1 Commits

Author SHA1 Message Date
9dac66b6b2 chore(deps): update ghcr.io/caronc/apprise docker tag to v1.4.0
All checks were successful
lint-test-helm / lint-helm (pull_request) Successful in 30s
lint-test-helm / validate-kubeconform (pull_request) Successful in 26s
render-manifests / render-manifests (pull_request) Successful in 3m32s
2026-04-26 17:11:50 +00:00
58 changed files with 1036 additions and 1264 deletions

View File

@@ -12,8 +12,8 @@ on:
jobs:
renovate:
runs-on: ubuntu-js
container: ghcr.io/renovatebot/renovate:43.147.0@sha256:a630e74f8b353f2cf9b3e4ef91eee3d30cf652fed60d2bab666fa6663ee4eb99
runs-on: ubuntu-latest
container: ghcr.io/renovatebot/renovate:43.141.6@sha256:077a2aada1c508923e4e36b68f7efe3ec013a797da8aed352afd98fb0e1b4c60
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

View File

@@ -2,8 +2,5 @@ dependencies:
- name: app-template
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.6.2
- name: volsync-target
repository: oci://harbor.alexlebens.net/helm-charts
version: 1.0.0
digest: sha256:ee1ff98af82f76ddf0b672abf9f4973ae41faff3cd61d81849f496c089cfdbd3
generated: "2026-04-26T14:57:34.863614-05:00"
digest: sha256:1c04c187e6cf768117f7f91f3a3b082937ad5854c1cf6a681ad7c02687cd543d
generated: "2026-04-18T20:15:22.778699-05:00"

View File

@@ -1,108 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: haproxy
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: haproxy
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: EmbeddedExporter
rules:
- alert: HAProxyHighHTTP4xxErrorRateBackend
expr: ((sum by (proxy) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (proxy) (rate(haproxy_server_http_responses_total[1m])) > 0
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate backend (instance {{ `{{ $labels.instance }}` }})
description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ `{{ $labels.proxy }}` }}\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyHighHTTP5xxErrorRateBackend
expr: ((sum by (proxy) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (proxy) (rate(haproxy_server_http_responses_total[1m])) > 0
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate backend (instance {{ `{{ $labels.instance }}` }})
description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ `{{ $labels.proxy }}` }}\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyHighHTTP4xxErrorRateServer
expr: ((sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate server (instance {{ `{{ $labels.instance }}` }})
description: "Too many HTTP requests with status 4xx (> 5%) on server {{ `{{ $labels.server }}` }}\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyHighHTTP5xxErrorRateServer
expr: ((sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate server (instance {{ `{{ $labels.instance }}` }})
description: "Too many HTTP requests with status 5xx (> 5%) on server {{ `{{ $labels.server }}` }}\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyServerResponseErrors
expr: (sum by (server) (rate(haproxy_server_response_errors_total[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100 > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy server response errors (instance {{ `{{ $labels.instance }}` }})
description: "Too many response errors to {{ `{{ $labels.server }}` }} server (> 5%).\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyBackendConnectionErrors
expr: (sum by (proxy) (rate(haproxy_backend_connection_errors_total[1m]))) > 100
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy backend connection errors (instance {{ `{{ $labels.instance }}` }})
description: "Too many connection errors to {{ `{{ $labels.proxy }}` }} backend (> 100 req/s). Request throughput may be too high.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyServerConnectionErrors
expr: (sum by (proxy) (rate(haproxy_server_connection_errors_total[1m]))) > 100
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy server connection errors (instance {{ `{{ $labels.instance }}` }})
description: "Too many connection errors to {{ `{{ $labels.proxy }}` }} (> 100 req/s). Request throughput may be too high.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyBackendMaxActiveSession>80%
expr: (haproxy_backend_current_sessions / haproxy_backend_limit_sessions * 100) > 80 and haproxy_backend_limit_sessions > 0
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy backend max active session > 80% (instance {{ `{{ $labels.instance }}` }})
description: "Session limit from backend {{ `{{ $labels.proxy }}` }} reached 80% of limit - {{ `{{ $value | printf \"%.2f\"}}` }}%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyPendingRequests
expr: sum by (proxy) (haproxy_backend_current_queue) > 0
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy pending requests (instance {{ `{{ $labels.instance }}` }})
description: "Some HAProxy requests are pending on {{ `{{ $labels.proxy }}` }} - {{ `{{ $value | printf \"%.2f\"}}` }}\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyRetryHigh
expr: sum by (proxy) (rate(haproxy_backend_retry_warnings_total[1m])) > 10
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy retry high (instance {{ `{{ $labels.instance }}` }})
description: "High rate of retry on {{ `{{ $labels.proxy }}` }} - {{ `{{ $value | printf \"%.2f\"}}` }}\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyFrontendSecurityBlockedRequests
expr: sum by (proxy) (rate(haproxy_frontend_denied_connections_total[2m])) > 10
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy frontend security blocked requests (instance {{ `{{ $labels.instance }}` }})
description: "HAProxy is blocking requests for security reason\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: HAProxyServerHealthcheckFailure
expr: increase(haproxy_server_check_failures_total[1m]) > 2
for: 0m
labels:
severity: warning
annotations:
summary: HAProxy server healthcheck failure (instance {{ `{{ $labels.instance }}` }})
description: "Some server healthcheck are failing on {{ `{{ $labels.server }}` }} ({{ `{{ $value }}` }} in the last 1m)\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -103,7 +103,7 @@ argo-cd:
enabled: true
image:
repository: haproxy
tag: 3.3.7-alpine@sha256:2afa53c856e4e9fcc7dfb35b807fcb189896d7e62b38d363f9bedea92bce7f9a
tag: 3.3.6-alpine@sha256:4f97a2cb7f02fd08402259e74a65ef12fcfa3dff1ef78fddecb5228a17b7f4ad
resources:
requests:
cpu: 5m

View File

@@ -4,9 +4,9 @@ dependencies:
version: 4.6.2
- name: volsync-target
repository: oci://harbor.alexlebens.net/helm-charts
version: 1.0.0
version: 0.8.0
- name: volsync-target
repository: oci://harbor.alexlebens.net/helm-charts
version: 1.0.0
digest: sha256:c6af4b1dd96410281d53ff8f63235bc79bd9a1d493d6da097d9e4ff088e09538
generated: "2026-04-26T14:57:40.219612-05:00"
version: 0.8.0
digest: sha256:7ee4cfdf7f908401c39b3cda0cf8783b25dcb9cf93e7c911609bab9e303ec5bf
generated: "2026-03-06T01:05:03.534042627Z"

View File

@@ -32,4 +32,4 @@ dependencies:
repository: oci://harbor.alexlebens.net/helm-charts
icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/audiobookshelf.png
# renovate: datasource=github-releases depName=advplyr/audiobookshelf
appVersion: 2.34.0
appVersion: 2.33.2

View File

@@ -12,7 +12,7 @@ audiobookshelf:
main:
image:
repository: ghcr.io/advplyr/audiobookshelf
tag: 2.34.0@sha256:4143292c530f6ac6700afd13360c04f477e4f1a81c1c97c4224b1c7e4330c5c4
tag: 2.33.2@sha256:a44ed89b3e845faa1f7d353f2cc89b2fcd8011737dd14075fa963cf9468da3a5
env:
- name: TZ
value: America/Chicago

View File

@@ -1,44 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: cert-manager
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: cert-manager
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: EmbeddedExporter
rules:
- alert: Cert-ManagerAbsent
expr: absent(up{job="cert-manager"})
for: 10m
labels:
severity: critical
annotations:
summary: Cert-Manager absent (instance {{ `{{ $labels.instance }}` }})
description: "Cert-Manager has disappeared from Prometheus service discovery. New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: Cert-ManagerCertificateExpiringSoon
expr: avg by (exported_namespace, namespace, name) (certmanager_certificate_expiration_timestamp_seconds - time()) < (21 * 24 * 3600)
for: 1h
labels:
severity: warning
annotations:
summary: Cert-Manager certificate expiring soon (instance {{ `{{ $labels.instance }}` }})
description: "The certificate {{ `{{ $labels.name }}` }} is expiring in less than 21 days.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: Cert-ManagerCertificateNotReady
expr: max by (name, exported_namespace, namespace, condition) (certmanager_certificate_ready_status{condition!="True"} == 1)
for: 10m
labels:
severity: critical
annotations:
summary: Cert-Manager certificate not ready (instance {{ `{{ $labels.instance }}` }})
description: "The certificate {{ `{{ $labels.name }}` }} in namespace {{ `{{ $labels.exported_namespace }}` }} is not ready to serve traffic.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: Cert-ManagerHittingACMERateLimits
expr: sum by (host) (rate(certmanager_acme_client_request_count{status="429"}[5m])) > 0
for: 5m
labels:
severity: critical
annotations:
summary: Cert-Manager hitting ACME rate limits (instance {{ `{{ $labels.instance }}` }})
description: "Cert-Manager is being rate-limited by the ACME provider. Certificate issuance and renewal may be blocked for up to a week.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -5,11 +5,5 @@ dependencies:
- name: plugin-barman-cloud
repository: https://cloudnative-pg.io/charts/
version: 0.6.0
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
digest: sha256:75d7078b7009082521a1bb8b49141e20b442343dabe7f76f5e7a16a352cfe205
generated: "2026-04-26T15:36:31.678086-05:00"
digest: sha256:48241acb753e635a01b306b90cfbce13ed3c0105a33ec7d36f159e3a7fe607f3
generated: "2026-04-14T09:03:10.332065288Z"

View File

@@ -13,7 +13,6 @@ sources:
- https://github.com/cloudnative-pg/postgres-containers/pkgs/container/postgresql
- https://github.com/cloudnative-pg/charts/tree/main/charts/cloudnative-pg
- https://github.com/cloudnative-pg/charts/tree/main/charts/plugin-barman-cloud
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/rclone-bucket
maintainers:
- name: alexlebens
dependencies:
@@ -23,14 +22,6 @@ dependencies:
- name: plugin-barman-cloud
version: 0.6.0
repository: https://cloudnative-pg.io/charts/
- name: rclone-bucket
alias: rclone-postgres-backups-remote
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
- name: rclone-bucket
alias: rclone-postgres-backups-external
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
icon: https://raw.githubusercontent.com/cloudnative-pg/cloudnative-pg.github.io/refs/heads/main/assets/images/hero_image.png
# renovate: datasource=github-releases depName=cloudnative-pg/cloudnative-pg
appVersion: 1.29.0

View File

@@ -14,62 +14,3 @@ plugin-barman-cloud:
requests:
cpu: 1m
memory: 20Mi
rclone-postgres-backups-remote:
nameOverride: postgres-backups-remote-rclone
cronJob:
suspend: false
schedule: 0 6 * * 6
rclone:
source:
bucketName: postgres-backups
destination:
bucketName: postgres-backups
prune:
enabled: true
ageToPrune: 45d
include: "/cl01tl/*/*/*/base/**"
exclude: "**/walls/**"
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/postgres-backups
config:
path: /garage/config
destination:
credentials:
path: /garage/home-infra/postgres-backups
config:
path: /garage/config
rclone-postgres-backups-external:
nameOverride: postgres-backups-external-rclone
cronJob:
suspend: true
schedule: 0 6 * * 6
rclone:
source:
bucketName: openbao-backups
destination:
bucketName: postgres-backups-ecc1010276b61716
providerType: DigitalOcean
prune:
enabled: true
ageToPrune: 45d
include: "/cl01tl/*/*/*/base/**"
exclude: "**/walls/**"
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/postgres-backups
config:
path: /garage/config
destination:
credentials:
path: /digital-ocean/home-infra/postgres-backups
keyIdProperty: AWS_ACCESS_KEY_ID
secretKeyProperty: AWS_SECRET_ACCESS_KEY
regionProperty: AWS_REGION
config:
path: /digital-ocean/config
endpointProperty: ENDPOINT

View File

@@ -42,4 +42,4 @@ dependencies:
repository: oci://harbor.alexlebens.net/helm-charts
icon: https://cdn.jsdelivr.net/gh/selfhst/icons@main/png/dawarich.png
# renovate: datasource=github-releases depName=Freika/dawarich
appVersion: 1.7.0
appVersion: 1.6.1

View File

@@ -15,18 +15,6 @@ spec:
remoteRef:
key: /cl01tl/dawarich/key
property: key
- secretKey: otp-primary-key
remoteRef:
key: /cl01tl/dawarich/key
property: otp-primary-key
- secretKey: otp-deterministic-key
remoteRef:
key: /cl01tl/dawarich/key
property: otp-deterministic-key
- secretKey: otp-derivation-salt
remoteRef:
key: /cl01tl/dawarich/key
property: otp-derivation-salt
---
apiVersion: external-secrets.io/v1

View File

@@ -8,7 +8,7 @@ dawarich:
main:
image:
repository: freikin/dawarich
tag: 1.7.0@sha256:7d5f99c61121fcfa4cbdd6a153392630d9f059ffb0156759278d3e049085ec62
tag: 1.6.1@sha256:a884f69f19ce0f66992f3872d24544d1e587e133b8a003e072711aafc1e02429
command:
- "web-entrypoint.sh"
args:
@@ -83,21 +83,6 @@ dawarich:
secretKeyRef:
name: dawarich-key
key: key
- name: OTP_ENCRYPTION_PRIMARY_KEY
valueFrom:
secretKeyRef:
name: dawarich-key
key: otp-primary-key
- name: OTP_ENCRYPTION_DETERMINISTIC_KEY
valueFrom:
secretKeyRef:
name: dawarich-key
key: otp-deterministic-key
- name: OTP_ENCRYPTION_KEY_DERIVATION_SALT
valueFrom:
secretKeyRef:
name: dawarich-key
key: otp-derivation-salt
- name: RAILS_LOG_TO_STDOUT
value: true
- name: SELF_HOSTED
@@ -126,7 +111,7 @@ dawarich:
sidekiq:
image:
repository: freikin/dawarich
tag: 1.7.0@sha256:7d5f99c61121fcfa4cbdd6a153392630d9f059ffb0156759278d3e049085ec62
tag: 1.6.1@sha256:a884f69f19ce0f66992f3872d24544d1e587e133b8a003e072711aafc1e02429
command:
- "sidekiq-entrypoint.sh"
args:
@@ -176,12 +161,12 @@ dawarich:
- name: OIDC_CLIENT_ID
valueFrom:
secretKeyRef:
name: dawarich-oidc-authentik
name: dawarich-oidc-secret
key: client
- name: OIDC_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: dawarich-oidc-authentik
name: dawarich-oidc-secret
key: secret
- name: OIDC_PROVIDER_NAME
value: Authentik
@@ -196,23 +181,8 @@ dawarich:
- name: SECRET_KEY_BASE
valueFrom:
secretKeyRef:
name: dawarich-key
name: dawarich-key-secret
key: key
- name: OTP_ENCRYPTION_PRIMARY_KEY
valueFrom:
secretKeyRef:
name: dawarich-key
key: otp-primary-key
- name: OTP_ENCRYPTION_DETERMINISTIC_KEY
valueFrom:
secretKeyRef:
name: dawarich-key
key: otp-deterministic-key
- name: OTP_ENCRYPTION_KEY_DERIVATION_SALT
valueFrom:
secretKeyRef:
name: dawarich-key
key: otp-derivation-salt
- name: RAILS_LOG_TO_STDOUT
value: true
- name: SELF_HOSTED

View File

@@ -47,8 +47,6 @@ democratic-csi:
fsType: ext4
node:
hostPID: true
rbac:
enabled: true
driver:
extraEnv:
- name: ISCSIADM_HOST_STRATEGY

View File

@@ -10,6 +10,6 @@ dependencies:
version: 0.6.1
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
digest: sha256:df3b79c6b8868d749d98d232741fef4a26b73894bce3bf4588581340c15fc3da
generated: "2026-04-26T21:06:27.85398357Z"
version: 0.2.0
digest: sha256:b95c228173eb2e4914c37d5c8b3753ad644a90dc9f7f4357dbc1cbf15004961b
generated: "2026-04-25T20:59:03.456994-05:00"

View File

@@ -31,7 +31,7 @@ dependencies:
- name: rclone-bucket
alias: rclone-directus-assets-remote
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
version: 0.2.0
icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/directus.png
# renovate: datasource=github-releases depName=directus/directus
appVersion: 11.17.3

View File

@@ -1,28 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: meilisearch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: meilisearch
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: EmbeddedExporter
rules:
- alert: MeilisearchIndexIsEmpty
expr: meilisearch_index_docs_count == 0
for: 0m
labels:
severity: warning
annotations:
summary: Meilisearch index is empty (instance {{ `{{ $labels.instance }}` }})
description: "Meilisearch index {{ `{{ $labels.index }}` }} has zero documents\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: MeilisearchHttpResponseTime
expr: meilisearch_http_response_time_seconds > 0.5
for: 0m
labels:
severity: warning
annotations:
summary: Meilisearch http response time (instance {{ `{{ $labels.instance }}` }})
description: "Meilisearch http response time is too high\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -194,7 +194,7 @@ gitea-actions:
registry: docker.io
repository: gitea/act_runner
# renovate: datasource=docker depName=gitea/act_runner
tag: 0.5.0@sha256:9946000491cf19c3ed487c23e5da4f0c287010d791f495796c756e41e7a79cbe
tag: 0.4.1@sha256:696a59b51ad3d149521e3beb0229d5fb88f87295e1616f940199793274415b56
extraVolumeMounts:
- name: workspace-vol
mountPath: /workspace
@@ -206,8 +206,9 @@ gitea-actions:
runner:
labels:
- "ubuntu-latest:docker://harbor.alexlebens.net/proxy-hub.docker/gitea/runner-images:ubuntu-24.04"
- "ubuntu-latest-slim:docker://harbor.alexlebens.net/proxy-hub.docker/gitea/runner-images:ubuntu-24.04-slim"
- "ubuntu-js:docker://harbor.alexlebens.net/proxy-ghcr.io/catthehacker/ubuntu:js-24.04"
- "ubuntu-24.04:docker://harbor.alexlebens.net/proxy-hub.docker/gitea/runner-images:ubuntu-24.04"
- "ubuntu-22.04:docker://harbor.alexlebens.net/proxy-hub.docker/gitea/runner-images:ubuntu-22.04"
dind:
registry: docker.io
repository: docker

View File

@@ -1,6 +0,0 @@
dependencies:
- name: app-template
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.6.2
digest: sha256:65da71c32b4576a11e590f059d97dae22137448cb71049258d018cf5b7bb4a92
generated: "2026-04-26T14:59:16.326539-05:00"

View File

@@ -1,28 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: meilisearch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: meilisearch
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: EmbeddedExporter
rules:
- alert: MeilisearchIndexIsEmpty
expr: meilisearch_index_docs_count == 0
for: 0m
labels:
severity: warning
annotations:
summary: Meilisearch index is empty (instance {{ `{{ $labels.instance }}` }})
description: "Meilisearch index {{ `{{ $labels.index }}` }} has zero documents\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: MeilisearchHttpResponseTime
expr: meilisearch_http_response_time_seconds > 0.5
for: 0m
labels:
severity: warning
annotations:
summary: Meilisearch http response time (instance {{ `{{ $labels.instance }}` }})
description: "Meilisearch http response time is too high\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -11,8 +11,5 @@ dependencies:
- name: volsync-target
repository: oci://harbor.alexlebens.net/helm-charts
version: 1.0.0
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
digest: sha256:376ee64d93cc959afc02c5cf5b308bbf12a0b5dfb339a6a853b3243e6033604c
generated: "2026-04-26T21:07:05.718924873Z"
digest: sha256:7e04fb96a89630d96605e1a6dec951191709af377560357f002af33365618c06
generated: "2026-04-24T22:52:57.309438139Z"

View File

@@ -15,7 +15,6 @@ sources:
- https://github.com/meilisearch/meilisearch-kubernetes/tree/main/charts/meilisearch
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/cloudflared
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/volsync-target
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/rclone-bucket
maintainers:
- name: alexlebens
dependencies:
@@ -33,10 +32,6 @@ dependencies:
alias: volsync-target-data
version: 1.0.0
repository: oci://harbor.alexlebens.net/helm-charts
- name: rclone-bucket
alias: rclone-karakeep-assets-remote
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
icon: https://cdn.jsdelivr.net/gh/selfhst/icons@main/png/karakeep.png
# renovate: datasource=github-releases depName=karakeep-app/karakeep
appVersion: 0.31.0

View File

@@ -1,28 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: meilisearch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: meilisearch
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: EmbeddedExporter
rules:
- alert: MeilisearchIndexIsEmpty
expr: meilisearch_index_docs_count == 0
for: 0m
labels:
severity: warning
annotations:
summary: Meilisearch index is empty (instance {{ `{{ $labels.instance }}` }})
description: "Meilisearch index {{ `{{ $labels.index }}` }} has zero documents\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: MeilisearchHttpResponseTime
expr: meilisearch_http_response_time_seconds > 0.5
for: 0m
labels:
severity: warning
annotations:
summary: Meilisearch http response time (instance {{ `{{ $labels.instance }}` }})
description: "Meilisearch http response time is too high\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -172,24 +172,3 @@ volsync-target-data:
external:
enabled: true
schedule: 30 10 * * *
rclone-karakeep-assets-remote:
cronJob:
suspend: false
schedule: 10 0 * * *
rclone:
source:
bucketName: karakeep-assets
destination:
bucketName: karakeep-assets
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/karakeep-assets
config:
path: /garage/config
destination:
credentials:
path: /garage/home-infra/karakeep-assets
config:
path: /garage/config

View File

@@ -1,7 +1,7 @@
dependencies:
- name: kube-prometheus-stack
repository: oci://ghcr.io/prometheus-community/charts
version: 84.3.0
version: 84.1.0
- name: prometheus-operator-crds
repository: oci://ghcr.io/prometheus-community/charts
version: 28.0.1
@@ -11,5 +11,5 @@ dependencies:
- name: valkey
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.6.1
digest: sha256:88beedf9486adb9cb27b36c24021759401fcff106fc0e0cadbb3282d7e57d03c
generated: "2026-04-27T19:03:58.288039768Z"
digest: sha256:f7340793bc2c04e561d048b110cc7258fac0d5dc3d3b4ecdc6c2d8898445c5ab
generated: "2026-04-26T00:12:54.803217038Z"

View File

@@ -20,7 +20,7 @@ maintainers:
- name: alexlebens
dependencies:
- name: kube-prometheus-stack
version: 84.3.0
version: 84.1.0
repository: oci://ghcr.io/prometheus-community/charts
- name: prometheus-operator-crds
version: 28.0.1

View File

@@ -98,8 +98,8 @@ kube-prometheus-stack:
namespace: traefik
prometheusSpec:
scrapeInterval: 30s
retention: 60d
retentionSize: 450GiB
retention: 45d
retentionSize: 240GiB
externalUrl: https://prometheus.alexlebens.net
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
@@ -112,7 +112,7 @@ kube-prometheus-stack:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 500Gi
storage: 250Gi
ntfy-alertmanager:
global:
fullnameOverride: ntfy-alertmanager

View File

@@ -25,24 +25,24 @@ music-grabber:
- name: NAVIDROME_USER
valueFrom:
secretKeyRef:
name: music-grabber-config
name: music-grabber-config-secret
key: navidrome-user
- name: NAVIDROME_PASS
valueFrom:
secretKeyRef:
name: music-grabber-config
name: music-grabber-config-secret
key: navidrome-password
- name: SLSKD_URL
value: http://slskd.slskd:5030
- name: SLSKD_USER
valueFrom:
secretKeyRef:
name: music-grabber-config
name: music-grabber-config-secret
key: slskd-user
- name: SLSKD_PASS
valueFrom:
secretKeyRef:
name: music-grabber-config
name: music-grabber-config-secret
key: slskd-password
- name: SLSKD_DOWNLOADS_PATH
value: /mnt/store/slskd/Downloads

View File

@@ -5,8 +5,5 @@ dependencies:
- name: postgres-cluster
repository: oci://harbor.alexlebens.net/helm-charts
version: 7.12.1
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
digest: sha256:97ce7f765707305cb7ccf7020c3a0945a19cda7d7d54cb75ff341acdbf000a23
generated: "2026-04-26T21:07:46.221034664Z"
digest: sha256:1f4cf54fc4c52a2ef6fff3aae0f8af39b059d46a6e257add049310766ebc0a22
generated: "2026-04-24T21:55:36.889797295Z"

View File

@@ -10,7 +10,6 @@ sources:
- https://github.com/binwiederhier/ntfy
- https://hub.docker.com/r/binwiederhier/ntfy
- https://github.com/bjw-s-labs/helm-charts/tree/main/charts/other/app-template
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/rclone-bucket
maintainers:
- name: alexlebens
dependencies:
@@ -22,10 +21,6 @@ dependencies:
alias: postgres-18-cluster
version: 7.12.1
repository: oci://harbor.alexlebens.net/helm-charts
- name: rclone-bucket
alias: rclone-ntfy-attachments-remote
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/ntfy.png
# renovate: datasource=github-releases depName=binwiederhier/ntfy
appVersion: 2.22.0

View File

@@ -124,24 +124,3 @@ postgres-18-cluster:
immediate: true
schedule: "0 15 14 * * *"
backupName: garage-local
rclone-ntfy-attachments-remote:
cronJob:
suspend: false
schedule: 50 0 * * *
rclone:
source:
bucketName: ntfy-attachments
destination:
bucketName: ntfy-attachments
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/ntfy-attachments
config:
path: /garage/config
destination:
credentials:
path: /garage/home-infra/ntfy-attachments
config:
path: /garage/config

View File

@@ -1,15 +1,9 @@
dependencies:
- name: openbao
repository: https://openbao.github.io/openbao-helm
version: 0.27.2
version: 0.27.1
- name: app-template
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.6.2
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
digest: sha256:cbb61cd27ce6f613cd0fb07a3b9d380008732ed9e933eed45eda2d7e379fe279
generated: "2026-04-26T21:08:16.543052937Z"
digest: sha256:2a48dda8dad91d967fceeec4c50d3358f58b0255ba823e04bea726bf187f8f40
generated: "2026-04-15T19:55:47.720376-05:00"

View File

@@ -15,7 +15,6 @@ sources:
- https://github.com/lrstanley/vault-unseal/pkgs/container/vault-unseal
- https://github.com/openbao/openbao-helm/tree/main/charts/openbao
- https://github.com/bjw-s-labs/helm-charts/tree/main/charts/other/app-template
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/rclone-bucket
maintainers:
- name: alexlebens
dependencies:
@@ -26,14 +25,6 @@ dependencies:
alias: unseal
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.6.2
- name: rclone-bucket
alias: rclone-openbao-backups-remote
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
- name: rclone-bucket
alias: rclone-openbao-backups-external
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
icon: https://cdn.jsdelivr.net/gh/selfhst/icons@main/png/openbao.png
# renovate: datasource=github-releases depName=openbao/openbao
appVersion: v2.5.3

View File

@@ -120,52 +120,20 @@ openbao:
prometheusRules:
enabled: true
rules:
- alert: openBao-HighResponseTime
- alert: vault-HighResponseTime
annotations:
message: The response time of OpenBao is over 500ms on average over the last 5 minutes.
message: The response time of Vault is over 500ms on average over the last 5 minutes.
expr: vault_core_handle_request{quantile="0.5", namespace="mynamespace"} > 500
for: 5m
labels:
severity: warning
- alert: openBao-HighResponseTime
- alert: vault-HighResponseTime
annotations:
message: The response time of OpenBao is over 1s on average over the last 5 minutes.
message: The response time of Vault is over 1s on average over the last 5 minutes.
expr: vault_core_handle_request{quantile="0.5", namespace="mynamespace"} > 1000
for: 5m
labels:
severity: critical
- alert: openBao-Sealed
expr: vault_core_unsealed == 0
for: 1m
labels:
severity: critical
annotations:
summary: OpenBao sealed (instance {{ $labels.instance }})
description: "OpenBao instance is sealed on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: OpenBao-TooManyPendingTokens
expr: avg(vault_token_create_count - vault_token_store_count) > 0
for: 5m
labels:
severity: warning
annotations:
summary: OpenBao too many pending tokens (instance {{ $labels.instance }})
description: "Too many pending tokens on {{ $labels.instance }}: {{ $value }} tokens created but not yet stored.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: OpenBao-TooManyInfinityTokens
expr: vault_token_count_by_ttl{creation_ttl="+Inf"} > 3
for: 5m
labels:
severity: warning
annotations:
summary: OpenBao too many infinity tokens (instance {{ $labels.instance }})
description: "Too many non-expiring tokens on {{ $labels.instance }}: {{ $value }} tokens with infinite TTL.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: OpenBao-ClusterHealth
expr: sum(vault_core_active) / count(vault_core_active) <= 0.5 and count(vault_core_active) > 0
for: 0m
labels:
severity: critical
annotations:
summary: OpenBao cluster health (instance {{ $labels.instance }})
description: "OpenBao cluster is not healthy: only {{ $value | humanizePercentage }} of nodes are active.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
snapshotAgent:
enabled: true
schedule: 0 4 * * *
@@ -239,55 +207,3 @@ unseal:
requests:
cpu: 1m
memory: 10Mi
rclone-openbao-backups-remote:
nameOverride: openbao-backups-remote-rclone
cronJob:
suspend: false
schedule: 0 1 * * *
rclone:
source:
bucketName: openbao-backups
destination:
bucketName: openbao-backups
prune:
enabled: true
ageToPrune: 90d
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/openbao-backups
config:
path: /garage/config
destination:
credentials:
path: /garage/home-infra/openbao-backups
config:
path: /garage/config
rclone-openbao-backups-external:
nameOverride: openbao-backups-external-rclone
cronJob:
suspend: false
schedule: 10 1 * * *
rclone:
source:
bucketName: openbao-backups
destination:
bucketName: openbao-backups-6e088aad5fad110b
providerType: DigitalOcean
prune:
enabled: true
ageToPrune: 90d
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/openbao-backups
config:
path: /garage/config
destination:
credentials:
path: /digital-ocean/home-infra/openbao-backups
config:
path: /digital-ocean/config
endpointProperty: ENDPOINT

View File

@@ -48,4 +48,4 @@ dependencies:
repository: oci://harbor.alexlebens.net/helm-charts
icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/paperless-ngx.png
# renovate: datasource=github-releases depName=paperless-ngx/paperless-ngx
appVersion: 2.20.15
appVersion: 2.20.14

View File

@@ -8,7 +8,7 @@ paperless-ngx:
main:
image:
repository: ghcr.io/paperless-ngx/paperless-ngx
tag: 2.20.15@sha256:6c86cad803970ea782683a8e80e7403444c5bf3cf70de63b4d3c8e87500db92f
tag: 2.20.14@sha256:b89f83345532cfba72690185257eb6c4f92fc2a782332a42abe19c07b7a6595f
env:
- name: PAPERLESS_REDIS
value: redis://paperless-ngx-valkey.paperless-ngx:6379

View File

@@ -20,4 +20,4 @@ dependencies:
version: 4.6.2
icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/plex.png
# renovate: datasource=github-releases depName=linuxserver/docker-plex
appVersion: 1.43.1.10611-1e34174b1-ls303
appVersion: 1.43.1.10611-1e34174b1-ls302

View File

@@ -22,7 +22,7 @@ plex:
main:
image:
repository: ghcr.io/linuxserver/plex
tag: 1.43.1.10611-1e34174b1-ls303@sha256:b785bdd60e781662f16e0526a6b54c07856739df95ab558a674a3c084dbde423
tag: 1.43.1.10611-1e34174b1-ls302@sha256:e5c7c283b242966416a4bed2d666acf6f3fb8f957c704be8333f8dc987364825
env:
- name: TZ
value: America/Chicago

View File

@@ -1,6 +1,6 @@
dependencies:
- name: rclone-bucket
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
digest: sha256:7203c46d1617837cfaad5fc500277ff1ed8d5e310b3af65500f3fbbd3166abd6
generated: "2026-04-26T21:08:47.555855644Z"
- name: app-template
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.6.2
digest: sha256:8ed5a7025cbfee661770c4f525b6e1376f412114a7ab88cea1ab1de538eea500
generated: "2026-03-11T18:19:57.681245-05:00"

View File

@@ -9,14 +9,15 @@ keywords:
home: https://docs.alexlebens.dev/applications/rclone/
sources:
- https://github.com/rclone/rclone
- https://gitea.alexlebens.dev/alexlebens/helm-charts/src/branch/main/charts/rclone-bucket
- https://hub.docker.com/r/rclone/rclone
- https://github.com/bjw-s-labs/helm-charts/tree/main/charts/other/app-template
maintainers:
- name: alexlebens
dependencies:
- name: rclone-bucket
alias: rclone-web-assets-remote
repository: oci://harbor.alexlebens.net/helm-charts
version: 0.4.3
- name: app-template
alias: rclone
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.6.2
icon: https://cdn.jsdelivr.net/gh/selfhst/icons@main/png/rclone.png
# renovate: datasource=github-releases depName=rclone/rclone
appVersion: v1.73.5

View File

@@ -0,0 +1,235 @@
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: garage-karakeep-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: garage-karakeep-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /garage/home-infra/karakeep-assets
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /garage/home-infra/karakeep-assets
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /garage/home-infra/karakeep-assets
property: ACCESS_SECRET_KEY
- secretKey: SRC_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_LOCAL
- secretKey: DEST_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_REMOTE
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: garage-talos-backups-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: garage-talos-backups-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /garage/home-infra/talos-backups
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /garage/home-infra/talos-backups
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /garage/home-infra/talos-backups
property: ACCESS_SECRET_KEY
- secretKey: SRC_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_LOCAL
- secretKey: DEST_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_REMOTE
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: garage-web-assets-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: garage-web-assets-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /garage/home-infra/web-assets
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /garage/home-infra/web-assets
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /garage/home-infra/web-assets
property: ACCESS_SECRET_KEY
- secretKey: SRC_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_LOCAL
- secretKey: DEST_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_REMOTE
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: garage-postgres-backups-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: garage-postgres-backups-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /garage/home-infra/postgres-backups
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /garage/home-infra/postgres-backups
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /garage/home-infra/postgres-backups
property: ACCESS_SECRET_KEY
- secretKey: SRC_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_LOCAL
- secretKey: DEST_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_REMOTE
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: garage-ntfy-attachments-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: garage-ntfy-attachments-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /garage/home-infra/ntfy-attachments
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /garage/home-infra/ntfy-attachments
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /garage/home-infra/ntfy-attachments
property: ACCESS_SECRET_KEY
- secretKey: SRC_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_LOCAL
- secretKey: DEST_ENDPOINT
remoteRef:
key: /garage/config
property: ENDPOINT_REMOTE
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: garage-openbao-backups-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: garage-openbao-backups-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /garage/home-infra/openbao-backups
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /garage/home-infra/openbao-backups
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /garage/home-infra/openbao-backups
property: ACCESS_SECRET_KEY
- secretKey: ENDPOINT_LOCAL
remoteRef:
key: /garage/config
property: ENDPOINT_LOCAL
- secretKey: ENDPOINT_REMOTE
remoteRef:
key: /garage/config
property: ENDPOINT_REMOTE
---
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: external-openbao-backups-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: external-openbao-backups-secret
{{- include "custom.labels" . | nindent 4 }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: openbao
data:
- secretKey: ACCESS_KEY_ID
remoteRef:
key: /digital-ocean/home-infra/openbao-backups
property: ACCESS_KEY_ID
- secretKey: ACCESS_REGION
remoteRef:
key: /digital-ocean/home-infra/openbao-backups
property: ACCESS_REGION
- secretKey: ACCESS_SECRET_KEY
remoteRef:
key: /digital-ocean/home-infra/openbao-backups
property: ACCESS_SECRET_KEY

View File

@@ -1,5 +1,280 @@
rclone:
controllers:
karakeep-assets:
type: cronjob
cronjob:
suspend: false
timeZone: America/Chicago
schedule: 10 0 * * *
backoffLimit: 3
parallelism: 1
containers:
sync:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- sync
- src:karakeep-assets
- dest:karakeep-assets
- --s3-no-check-bucket
- --verbose
env:
- name: RCLONE_S3_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_TYPE
value: s3
- name: RCLONE_CONFIG_SRC_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_ENV_AUTH
value: false
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_SRC_REGION
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_SRC_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: SRC_ENDPOINT
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-karakeep-secret
key: DEST_ENDPOINT
- name: RCLONE_CONFIG_SRC_DEST_FORCE_PATH_STYLE
value: true
talos-backups:
type: cronjob
cronjob:
suspend: false
timeZone: America/Chicago
schedule: 20 0 * * *
backoffLimit: 3
parallelism: 1
containers:
sync:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- sync
- src:talos-backups
- dest:talos-backups
- --s3-no-check-bucket
- --max-age
- 90d
- --verbose
env:
- name: RCLONE_S3_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_TYPE
value: s3
- name: RCLONE_CONFIG_SRC_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_ENV_AUTH
value: false
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_SRC_REGION
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_SRC_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: SRC_ENDPOINT
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: DEST_ENDPOINT
- name: RCLONE_CONFIG_SRC_DEST_FORCE_PATH_STYLE
value: true
prune:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- delete
- dest:talos-backups
- --min-age
- 90d
- --verbose
env:
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-talos-backups-secret
key: DEST_ENDPOINT
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
web-assets:
type: cronjob
cronjob:
suspend: false
timeZone: America/Chicago
schedule: 30 0 * * *
backoffLimit: 3
parallelism: 1
containers:
sync:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- sync
- src:web-assets
- dest:web-assets
- --s3-no-check-bucket
- --verbose
env:
- name: RCLONE_S3_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_TYPE
value: s3
- name: RCLONE_CONFIG_SRC_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_ENV_AUTH
value: false
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_SRC_REGION
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_SRC_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: SRC_ENDPOINT
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-web-assets-secret
key: DEST_ENDPOINT
- name: RCLONE_CONFIG_DEST_S3_FORCE_PATH_STYLE
value: true
postgres-backups:
type: cronjob
cronjob:
@@ -123,24 +398,313 @@ rclone:
key: DEST_ENDPOINT
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
rclone-web-assets-remote:
cronJob:
suspend: false
schedule: 30 0 * * *
rclone:
source:
bucketName: web-assets
destination:
bucketName: web-assets
secret:
externalSecret:
source:
credentials:
path: /garage/home-infra/web-assets
config:
path: /garage/config
destination:
credentials:
path: /garage/home-infra/web-assets
config:
path: /garage/config
ntfy-attachments:
type: cronjob
cronjob:
suspend: false
timeZone: America/Chicago
schedule: 50 0 * * *
backoffLimit: 3
parallelism: 1
containers:
sync:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- sync
- src:ntfy-attachments
- dest:ntfy-attachments
- --s3-no-check-bucket
- --verbose
env:
- name: RCLONE_S3_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_TYPE
value: s3
- name: RCLONE_CONFIG_SRC_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_ENV_AUTH
value: false
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_SRC_REGION
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_SRC_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: SRC_ENDPOINT
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-ntfy-attachments-secret
key: DEST_ENDPOINT
- name: RCLONE_CONFIG_SRC_DEST_FORCE_PATH_STYLE
value: true
openbao-backups-remote:
type: cronjob
cronjob:
suspend: false
timeZone: America/Chicago
schedule: 0 1 * * *
backoffLimit: 3
parallelism: 1
containers:
sync:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- sync
- src:openbao-backups
- dest:openbao-backups
- --s3-no-check-bucket
- --max-age
- 90d
- --verbose
env:
- name: RCLONE_S3_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_TYPE
value: s3
- name: RCLONE_CONFIG_SRC_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_ENV_AUTH
value: false
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_SRC_REGION
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_SRC_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ENDPOINT_LOCAL
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ENDPOINT_REMOTE
- name: RCLONE_CONFIG_SRC_DEST_FORCE_PATH_STYLE
value: true
prune:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- delete
- dest:openbao-backups
- --min-age
- 90d
- --verbose
env:
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: Other
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ENDPOINT_REMOTE
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
openbao-backups-external:
type: cronjob
cronjob:
suspend: false
timeZone: America/Chicago
schedule: 10 1 * * *
backoffLimit: 3
parallelism: 1
containers:
sync:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- sync
- src:openbao-backups
- dest:openbao-backups-6e088aad5fad110b
- --s3-no-check-bucket
- --max-age
- 90d
- --verbose
env:
- name: RCLONE_S3_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_TYPE
value: s3
- name: RCLONE_CONFIG_SRC_PROVIDER
value: Other
- name: RCLONE_CONFIG_SRC_ENV_AUTH
value: false
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_SRC_REGION
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_SRC_ENDPOINT
valueFrom:
secretKeyRef:
name: garage-openbao-backups-secret
key: ENDPOINT_LOCAL
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
value: true
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: DigitalOcean
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: external-openbao-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: external-openbao-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: external-openbao-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
value: https://nyc3.digitaloceanspaces.com
- name: RCLONE_CONFIG_DEST_S3_FORCE_PATH_STYLE
value: true
prune:
image:
repository: rclone/rclone
tag: 1.73.5@sha256:1619a625f845e169c34b952cf40c483c0392965b821c5155cde8cbfd35254a96
args:
- delete
- dest:openbao-backups-6e088aad5fad110b
- --min-age
- 90d
- --verbose
env:
- name: RCLONE_CONFIG_DEST_TYPE
value: s3
- name: RCLONE_CONFIG_DEST_PROVIDER
value: DigitalOcean
- name: RCLONE_CONFIG_DEST_ENV_AUTH
value: false
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: external-openbao-backups-secret
key: ACCESS_KEY_ID
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: external-openbao-backups-secret
key: ACCESS_SECRET_KEY
- name: RCLONE_CONFIG_DEST_REGION
valueFrom:
secretKeyRef:
name: external-openbao-backups-secret
key: ACCESS_REGION
- name: RCLONE_CONFIG_DEST_ENDPOINT
value: https://nyc3.digitaloceanspaces.com
- name: RCLONE_CONFIG_DEST_S3_FORCE_PATH_STYLE
value: true

View File

@@ -1,156 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: clickhouse
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: clickhouse
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: EmbeddedExporter
rules:
- alert: ClickHouseNodeDown
expr: up{job="clickhouse"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: ClickHouse node down (instance {{ `{{ $labels.instance }}` }})
description: "No metrics received from ClickHouse exporter for over 2 minutes.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseMemoryUsageCritical
expr: ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 90 and ClickHouseAsyncMetrics_CGroupMemoryTotal > 0
for: 5m
labels:
severity: critical
annotations:
summary: ClickHouse Memory Usage Critical (instance {{ `{{ $labels.instance }}` }})
description: "Memory usage is critically high, over 90%.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseMemoryUsageWarning
expr: ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 80 and ClickHouseAsyncMetrics_CGroupMemoryTotal > 0
for: 5m
labels:
severity: warning
annotations:
summary: ClickHouse Memory Usage Warning (instance {{ `{{ $labels.instance }}` }})
description: "Memory usage is over 80%.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseDiskSpaceLowOnDefault
expr: ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 20 and (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) > 0
for: 2m
labels:
severity: warning
annotations:
summary: ClickHouse Disk Space Low on Default (instance {{ `{{ $labels.instance }}` }})
description: "Disk space on default is below 20%.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseDiskSpaceCriticalOnDefault
expr: ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 10 and (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) > 0
for: 2m
labels:
severity: critical
annotations:
summary: ClickHouse Disk Space Critical on Default (instance {{ `{{ $labels.instance }}` }})
description: "Disk space on default disk is critically low, below 10%.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseDiskSpaceLowOnBackups
expr: ClickHouseAsyncMetrics_DiskAvailable_backups / (ClickHouseAsyncMetrics_DiskAvailable_backups + ClickHouseAsyncMetrics_DiskUsed_backups) * 100 < 20 and (ClickHouseAsyncMetrics_DiskAvailable_backups + ClickHouseAsyncMetrics_DiskUsed_backups) > 0
for: 2m
labels:
severity: warning
annotations:
summary: ClickHouse Disk Space Low on Backups (instance {{ `{{ $labels.instance }}` }})
description: "Disk space on backups is below 20%.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseReplicaErrors
expr: ClickHouseErrorMetric_ALL_REPLICAS_ARE_STALE == 1 or ClickHouseErrorMetric_ALL_REPLICAS_LOST == 1
for: 0m
labels:
severity: critical
annotations:
summary: ClickHouse Replica Errors (instance {{ `{{ $labels.instance }}` }})
description: "Critical replica errors detected, either all replicas are stale or lost.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseNoAvailableReplicas
expr: ClickHouseErrorMetric_NO_AVAILABLE_REPLICA == 1
for: 0m
labels:
severity: critical
annotations:
summary: ClickHouse No Available Replicas (instance {{ `{{ $labels.instance }}` }})
description: "No available replicas in ClickHouse.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseNoLiveReplicas
expr: ClickHouseErrorMetric_TOO_FEW_LIVE_REPLICAS == 1
for: 0m
labels:
severity: critical
annotations:
summary: ClickHouse No Live Replicas (instance {{ `{{ $labels.instance }}` }})
description: "There are too few live replicas available, risking data loss and service disruption.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseHighTCPConnections
expr: ClickHouseMetrics_TCPConnection > 400
for: 5m
labels:
severity: warning
annotations:
summary: ClickHouse High TCP Connections (instance {{ `{{ $labels.instance }}` }})
description: "High number of TCP connections, indicating heavy client or inter-cluster communication.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseInterserverConnectionIssues
expr: ClickHouseMetrics_InterserverConnection > 50
for: 5m
labels:
severity: warning
annotations:
summary: ClickHouse Interserver Connection Issues (instance {{ `{{ $labels.instance }}` }})
description: "High number of interserver connections may indicate replication or distributed query handling issues.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseZooKeeperConnectionIssues
expr: ClickHouseMetrics_ZooKeeperSession != 1
for: 3m
labels:
severity: warning
annotations:
summary: ClickHouse ZooKeeper Connection Issues (instance {{ `{{ $labels.instance }}` }})
description: "ClickHouse is experiencing issues with ZooKeeper connections, which may affect cluster state and coordination.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseAuthenticationFailures
expr: increase(ClickHouseErrorMetric_AUTHENTICATION_FAILED[5m]) > 3
for: 0m
labels:
severity: info
annotations:
summary: ClickHouse Authentication Failures (instance {{ `{{ $labels.instance }}` }})
description: "Authentication failures detected, indicating potential security issues or misconfiguration.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseAccessDeniedErrors
expr: increase(ClickHouseErrorMetric_RESOURCE_ACCESS_DENIED[5m]) > 3
for: 0m
labels:
severity: info
annotations:
summary: ClickHouse Access Denied Errors (instance {{ `{{ $labels.instance }}` }})
description: "Access denied errors have been logged, which could indicate permission issues or unauthorized access attempts.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseRejectedInsertQueries
expr: increase(ClickHouseProfileEvents_RejectedInserts[1m]) > 2
for: 1m
labels:
severity: warning
annotations:
summary: ClickHouse rejected insert queries (instance {{ `{{ $labels.instance }}` }})
description: "INSERTs rejected due to too many active data parts. Reduce insert frequency.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseDelayedInsertQueries
expr: increase(ClickHouseProfileEvents_DelayedInserts[5m]) > 10
for: 2m
labels:
severity: warning
annotations:
summary: ClickHouse delayed insert queries (instance {{ `{{ $labels.instance }}` }})
description: "INSERTs delayed due to high number of active parts.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseZookeeperHardwareException
expr: increase(ClickHouseProfileEvents_ZooKeeperHardwareExceptions[1m]) > 0
for: 1m
labels:
severity: critical
annotations:
summary: ClickHouse zookeeper hardware exception (instance {{ `{{ $labels.instance }}` }})
description: "Zookeeper hardware exception: network issues communicating with ZooKeeper\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ClickHouseDistributedRejectedInserts
expr: increase(ClickHouseProfileEvents_DistributedRejectedInserts[5m]) > 3
for: 2m
labels:
severity: critical
annotations:
summary: ClickHouse distributed rejected inserts (instance {{ `{{ $labels.instance }}` }})
description: "INSERTs into Distributed tables rejected due to pending bytes limit.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -150,20 +150,19 @@ rybbit:
data:
network.xml: |
<clickhouse>
<listen_host>0.0.0.0</listen_host>
<http_port>8123</http_port>
<listen_host>0.0.0.0</listen_host>
</clickhouse>
enable_json.xml: |
<clickhouse>
<settings>
<enable_json_type>1</enable_json_type>
</settings>
<settings>
<enable_json_type>1</enable_json_type>
</settings>
</clickhouse>
logging_rules.xml: |
<clickhouse>
<logger>
<level>warning</level>
<console>true</console>
<level>warning</level>
<console>true</console>
</logger>
<query_thread_log remove="remove"/>
<query_log remove="remove"/>
@@ -186,17 +185,6 @@ rybbit:
</default>
</profiles>
</clickhouse>
metrics.xml: |
<clickhouse>
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
</clickhouse>
service:
backend:
controller: backend
@@ -216,21 +204,6 @@ rybbit:
http:
port: 8123
targetPort: 8123
metrics:
port: 9363
targetPort: 9363
serviceMonitor:
main:
selector:
matchLabels:
app.kubernetes.io/name: rybbit-clickhouse
app.kubernetes.io/instance: rybbit-clickhouse
serviceName: '{{ include "bjw-s.common.lib.chart.names.fullname" $ }}'
endpoints:
- port: metrics
interval: 30s
scrapeTimeout: 10s
path: /metrics
persistence:
clickhouse:
forceRename: clickhouse-data
@@ -265,10 +238,6 @@ rybbit:
readOnly: true
mountPropagation: None
subPath: user_logging.xml
- path: /etc/clickhouse-server/config.d/metrics.xml
readOnly: true
mountPropagation: None
subPath: metrics.xml
postgres-18-cluster:
mode: recovery
recovery:

View File

@@ -3,12 +3,12 @@ secrets-store-csi-driver:
enabled: true
image:
repository: registry.k8s.io/csi-secrets-store/driver
tag: v1.6.0@sha256:110344819630bfd41e0c6d3f215d325ad1a4d5d5b1d298f8af7d0edf4df64a4e
tag: v1.5.6@sha256:6df2b3b3817136d2ade3d53306dbbd98385c1c01e8b3c373192c0e5b8d183f7b
crds:
enabled: true
image:
repository: registry.k8s.io/csi-secrets-store/driver-crds
tag: v1.6.0@sha256:2419b318a1c17bd741686bf1994cd37cee7162039c019435b8f534f2846fe488
tag: v1.5.6@sha256:d40d9212beb62ee0f9f09b75d024ed807816879f38e75eca309497c3df89568c
driver:
resources:
limits:

View File

@@ -10,7 +10,7 @@ site-documentation:
main:
image:
repository: harbor.alexlebens.net/images/site-documentation
tag: 0.28.0@sha256:dabb2c9a8c306a01ccf1d85e797f6a5cc81d8d3b5db8d28ab1b5969f1b56cf74
tag: 0.27.1@sha256:a9e8659827375e7ee65ea8bc8550f4c0604316b48f39da7fa255fa9f3b5a17d6
resources:
requests:
cpu: 10m

View File

@@ -10,7 +10,7 @@ site-profile:
main:
image:
repository: harbor.alexlebens.net/images/site-profile
tag: 3.19.1@sha256:bf8f7f065867c605fe42955f12aaec68c5d1e667a3325bb30ad6d028b523bcd5
tag: 3.18.6@sha256:6aacdb7270d21b02d85cd593999014c91614e70c8f6f84774e532f9141237a6c
resources:
requests:
cpu: 10m

View File

@@ -1,169 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: elasticsearch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: elasticsearch
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: ElasticsearchExporter
rules:
- alert: ElasticsearchHeapUsageTooHigh
expr: (elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 90 and elasticsearch_jvm_memory_max_bytes{area="heap"} > 0
for: 2m
labels:
severity: critical
annotations:
summary: Elasticsearch Heap Usage Too High (instance {{ `{{ $labels.instance }}` }})
description: "The heap usage is over 90%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchHeapUsageWarning
expr: (elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 80 and elasticsearch_jvm_memory_max_bytes{area="heap"} > 0
for: 2m
labels:
severity: warning
annotations:
summary: Elasticsearch Heap Usage warning (instance {{ `{{ $labels.instance }}` }})
description: "The heap usage is over 80%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchDiskOutOfSpace
expr: elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 10 and elasticsearch_filesystem_data_size_bytes > 0
for: 0m
labels:
severity: critical
annotations:
summary: Elasticsearch disk out of space (instance {{ `{{ $labels.instance }}` }})
description: "The disk usage is over 90%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchDiskSpaceLow
expr: elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 20 and elasticsearch_filesystem_data_size_bytes > 0
for: 2m
labels:
severity: warning
annotations:
summary: Elasticsearch disk space low (instance {{ `{{ $labels.instance }}` }})
description: "The disk usage is over 80%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchClusterRed
expr: elasticsearch_cluster_health_status{color="red"} == 1
for: 0m
labels:
severity: critical
annotations:
summary: Elasticsearch Cluster Red (instance {{ `{{ $labels.instance }}` }})
description: "Elastic Cluster Red status\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchClusterYellow
expr: elasticsearch_cluster_health_status{color="yellow"} == 1
for: 0m
labels:
severity: warning
annotations:
summary: Elasticsearch Cluster Yellow (instance {{ `{{ $labels.instance }}` }})
description: "Elastic Cluster Yellow status\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# 1m delay allows a restart without triggering an alert.
- alert: ElasticsearchHealthyNodes
expr: elasticsearch_cluster_health_number_of_nodes < 3
for: 1m
labels:
severity: critical
annotations:
summary: Elasticsearch Healthy Nodes (instance {{ `{{ $labels.instance }}` }})
description: "Missing node in Elasticsearch cluster\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# 1m delay allows a restart without triggering an alert.
- alert: ElasticsearchHealthyDataNodes
expr: elasticsearch_cluster_health_number_of_data_nodes < 3
for: 1m
labels:
severity: critical
annotations:
summary: Elasticsearch Healthy Data Nodes (instance {{ `{{ $labels.instance }}` }})
description: "Missing data node in Elasticsearch cluster\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchRelocatingShards
expr: elasticsearch_cluster_health_relocating_shards > 0
for: 0m
labels:
severity: info
annotations:
summary: Elasticsearch relocating shards (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch is relocating shards\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchRelocatingShardsTooLong
expr: elasticsearch_cluster_health_relocating_shards > 0
for: 15m
labels:
severity: warning
annotations:
summary: Elasticsearch relocating shards too long (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has been relocating shards for 15min\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchInitializingShards
expr: elasticsearch_cluster_health_initializing_shards > 0
for: 0m
labels:
severity: info
annotations:
summary: Elasticsearch initializing shards (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch is initializing shards\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchInitializingShardsTooLong
expr: elasticsearch_cluster_health_initializing_shards > 0
for: 15m
labels:
severity: warning
annotations:
summary: Elasticsearch initializing shards too long (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has been initializing shards for 15 min\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchUnassignedShards
expr: elasticsearch_cluster_health_unassigned_shards > 0
for: 2m
labels:
severity: critical
annotations:
summary: Elasticsearch unassigned shards (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has unassigned shards\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchPendingTasks
expr: elasticsearch_cluster_health_number_of_pending_tasks > 0
for: 15m
labels:
severity: warning
annotations:
summary: Elasticsearch pending tasks (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has pending tasks. Cluster works slowly.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchNoNewDocuments
expr: increase(elasticsearch_indices_indexing_index_total{es_data_node="true"}[10m]) < 1
for: 0m
labels:
severity: warning
annotations:
summary: Elasticsearch no new documents (instance {{ `{{ $labels.instance }}` }})
description: "No new documents for 10 min!\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# Threshold of 10ms (0.01s) per indexing operation is a rough default. Adjust based on your document size and cluster performance.
- alert: ElasticsearchHighIndexingLatency
expr: rate(elasticsearch_indices_indexing_index_time_seconds_total[5m]) / rate(elasticsearch_indices_indexing_index_total[5m]) > 0.01 and rate(elasticsearch_indices_indexing_index_total[5m]) > 0
for: 10m
labels:
severity: warning
annotations:
summary: Elasticsearch High Indexing Latency (instance {{ `{{ $labels.instance }}` }})
description: "The indexing latency on Elasticsearch cluster is higher than the threshold (current value: {{ `{{ $value }}` }}s).\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# Threshold of 10000 ops/s is a rough default. Adjust based on your cluster capacity and expected workload.
- alert: ElasticsearchHighIndexingRate
expr: sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 10000
for: 5m
labels:
severity: warning
annotations:
summary: Elasticsearch High Indexing Rate (instance {{ `{{ $labels.instance }}` }})
description: "The indexing rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# Threshold of 100 queries/s is very low for most production clusters. Adjust based on your expected query volume.
- alert: ElasticsearchHighQueryRate
expr: sum(rate(elasticsearch_indices_search_query_total[1m])) > 100
for: 5m
labels:
severity: warning
annotations:
summary: Elasticsearch High Query Rate (instance {{ `{{ $labels.instance }}` }})
description: "The query rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchHighQueryLatency
expr: rate(elasticsearch_indices_search_query_time_seconds[1m]) / rate(elasticsearch_indices_search_query_total[1m]) > 1 and rate(elasticsearch_indices_search_query_total[1m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: Elasticsearch High Query Latency (instance {{ `{{ $labels.instance }}` }})
description: "The query latency on Elasticsearch cluster is higher than the threshold (current value: {{ `{{ $value }}` }}s).\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -1,7 +1,6 @@
stalwart:
controllers:
main:
forceRename: stalwart
type: deployment
replicas: 1
strategy: Recreate
@@ -14,26 +13,9 @@ stalwart:
requests:
cpu: 10m
memory: 100Mi
metrics:
type: deployment
replicas: 1
strategy: Recreate
containers:
main:
image:
repository: quay.io/prometheuscommunity/elasticsearch-exporter
tag: v1.10.0@sha256:a6a4d4403f670faf6a94b8c7f9adbca3ead91f26dd64e5ccf95fa69025dc6e58
args:
- '--es.uri=https://elasticsearch-stalwart-es-http.tubearchivist:9200'
- '--es.ssl-skip-verify'
resources:
requests:
cpu: 1m
memory: 10Mi
service:
main:
controller: main
forceRename: stalwart
ports:
http:
port: 80
@@ -50,24 +32,6 @@ stalwart:
imaps:
port: 993
targetPort: 993
metrics:
controller: metrics
ports:
metrics:
port: 9114
targetPort: 9114
serviceMonitor:
main:
selector:
matchLabels:
app.kubernetes.io/name: stalwart-metrics
app.kubernetes.io/instance: stalwart-metrics
serviceName: '{{ include "bjw-s.common.lib.chart.names.fullname" $ }}'
endpoints:
- port: metrics
interval: 30s
scrapeTimeout: 10s
path: /metrics
route:
main:
kind: HTTPRoute

View File

@@ -12,3 +12,13 @@ Selector labels
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/part-of: {{ .Release.Name }}
{{- end }}
{{/*
ServiceAccount names
*/}}
{{- define "custom.serviceAccountName" -}}
talos-backup
{{- end -}}
{{- define "custom.serviceAccountSecretsName" -}}
talos-backup-secrets
{{- end -}}

View File

@@ -10,7 +10,7 @@ spec:
provider: openbao
parameters:
baoAddress: "http://openbao-internal.openbao:8200"
roleName: talos-backup
roleName: {{ include "custom.serviceAccountName" . }}
objects: |
- objectName: .s3cfg
fileName: .s3cfg
@@ -30,7 +30,7 @@ spec:
provider: openbao
parameters:
baoAddress: "http://openbao-internal.openbao:8200"
roleName: talos-backup
roleName: {{ include "custom.serviceAccountName" . }}
objects: |
- objectName: .s3cfg
fileName: .s3cfg
@@ -50,7 +50,7 @@ spec:
provider: openbao
parameters:
baoAddress: "http://openbao-internal.openbao:8200"
roleName: talos-backup
roleName: {{ include "custom.serviceAccountName" . }}
objects: |
- objectName: .s3cfg
fileName: .s3cfg
@@ -70,7 +70,7 @@ spec:
provider: openbao
parameters:
baoAddress: "http://openbao-internal.openbao:8200"
roleName: talos-defrag
roleName: {{ include "custom.serviceAccountName" . }}
objects: |
- objectName: config
fileName: config

View File

@@ -1,31 +1,21 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "custom.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ include "custom.serviceAccountName" . }}
{{- include "custom.labels" . | nindent 4 }}
---
apiVersion: talos.dev/v1alpha1
kind: ServiceAccount
metadata:
name: talos-backup-secrets
name: {{ include "custom.serviceAccountSecretsName" . }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-backup-secrets
app.kubernetes.io/name: {{ include "custom.serviceAccountSecretsName" . }}
{{- include "custom.labels" . | nindent 4 }}
spec:
roles:
- os:etcd:backup
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: talos-backup
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-backup
{{- include "custom.labels" . | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: talos-defrag
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: talos-defrag
{{- include "custom.labels" . | nindent 4 }}

View File

@@ -399,8 +399,6 @@ etcd-defrag:
schedule: 0 0 * * 0
backoffLimit: 3
parallelism: 1
serviceAccount:
name: talos-defrag
containers:
main:
image:
@@ -429,8 +427,6 @@ etcd-defrag:
schedule: 10 0 * * 0
backoffLimit: 3
parallelism: 1
serviceAccount:
name: talos-defrag
containers:
main:
image:
@@ -459,8 +455,6 @@ etcd-defrag:
schedule: 20 0 * * 0
backoffLimit: 3
parallelism: 1
serviceAccount:
name: talos-defrag
containers:
main:
image:

View File

@@ -1,169 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: elasticsearch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: elasticsearch
{{- include "custom.labels" . | nindent 4 }}
spec:
groups:
- name: ElasticsearchExporter
rules:
- alert: ElasticsearchHeapUsageTooHigh
expr: (elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 90 and elasticsearch_jvm_memory_max_bytes{area="heap"} > 0
for: 2m
labels:
severity: critical
annotations:
summary: Elasticsearch Heap Usage Too High (instance {{ `{{ $labels.instance }}` }})
description: "The heap usage is over 90%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchHeapUsageWarning
expr: (elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 80 and elasticsearch_jvm_memory_max_bytes{area="heap"} > 0
for: 2m
labels:
severity: warning
annotations:
summary: Elasticsearch Heap Usage warning (instance {{ `{{ $labels.instance }}` }})
description: "The heap usage is over 80%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchDiskOutOfSpace
expr: elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 10 and elasticsearch_filesystem_data_size_bytes > 0
for: 0m
labels:
severity: critical
annotations:
summary: Elasticsearch disk out of space (instance {{ `{{ $labels.instance }}` }})
description: "The disk usage is over 90%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchDiskSpaceLow
expr: elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 20 and elasticsearch_filesystem_data_size_bytes > 0
for: 2m
labels:
severity: warning
annotations:
summary: Elasticsearch disk space low (instance {{ `{{ $labels.instance }}` }})
description: "The disk usage is over 80%\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchClusterRed
expr: elasticsearch_cluster_health_status{color="red"} == 1
for: 0m
labels:
severity: critical
annotations:
summary: Elasticsearch Cluster Red (instance {{ `{{ $labels.instance }}` }})
description: "Elastic Cluster Red status\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchClusterYellow
expr: elasticsearch_cluster_health_status{color="yellow"} == 1
for: 0m
labels:
severity: warning
annotations:
summary: Elasticsearch Cluster Yellow (instance {{ `{{ $labels.instance }}` }})
description: "Elastic Cluster Yellow status\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# 1m delay allows a restart without triggering an alert.
- alert: ElasticsearchHealthyNodes
expr: elasticsearch_cluster_health_number_of_nodes < 3
for: 1m
labels:
severity: critical
annotations:
summary: Elasticsearch Healthy Nodes (instance {{ `{{ $labels.instance }}` }})
description: "Missing node in Elasticsearch cluster\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# 1m delay allows a restart without triggering an alert.
- alert: ElasticsearchHealthyDataNodes
expr: elasticsearch_cluster_health_number_of_data_nodes < 3
for: 1m
labels:
severity: critical
annotations:
summary: Elasticsearch Healthy Data Nodes (instance {{ `{{ $labels.instance }}` }})
description: "Missing data node in Elasticsearch cluster\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchRelocatingShards
expr: elasticsearch_cluster_health_relocating_shards > 0
for: 0m
labels:
severity: info
annotations:
summary: Elasticsearch relocating shards (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch is relocating shards\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchRelocatingShardsTooLong
expr: elasticsearch_cluster_health_relocating_shards > 0
for: 15m
labels:
severity: warning
annotations:
summary: Elasticsearch relocating shards too long (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has been relocating shards for 15min\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchInitializingShards
expr: elasticsearch_cluster_health_initializing_shards > 0
for: 0m
labels:
severity: info
annotations:
summary: Elasticsearch initializing shards (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch is initializing shards\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchInitializingShardsTooLong
expr: elasticsearch_cluster_health_initializing_shards > 0
for: 15m
labels:
severity: warning
annotations:
summary: Elasticsearch initializing shards too long (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has been initializing shards for 15 min\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchUnassignedShards
expr: elasticsearch_cluster_health_unassigned_shards > 0
for: 2m
labels:
severity: critical
annotations:
summary: Elasticsearch unassigned shards (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has unassigned shards\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchPendingTasks
expr: elasticsearch_cluster_health_number_of_pending_tasks > 0
for: 15m
labels:
severity: warning
annotations:
summary: Elasticsearch pending tasks (instance {{ `{{ $labels.instance }}` }})
description: "Elasticsearch has pending tasks. Cluster works slowly.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchNoNewDocuments
expr: increase(elasticsearch_indices_indexing_index_total{es_data_node="true"}[10m]) < 1
for: 0m
labels:
severity: warning
annotations:
summary: Elasticsearch no new documents (instance {{ `{{ $labels.instance }}` }})
description: "No new documents for 10 min!\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# Threshold of 10ms (0.01s) per indexing operation is a rough default. Adjust based on your document size and cluster performance.
- alert: ElasticsearchHighIndexingLatency
expr: rate(elasticsearch_indices_indexing_index_time_seconds_total[5m]) / rate(elasticsearch_indices_indexing_index_total[5m]) > 0.01 and rate(elasticsearch_indices_indexing_index_total[5m]) > 0
for: 10m
labels:
severity: warning
annotations:
summary: Elasticsearch High Indexing Latency (instance {{ `{{ $labels.instance }}` }})
description: "The indexing latency on Elasticsearch cluster is higher than the threshold (current value: {{ `{{ $value }}` }}s).\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# Threshold of 10000 ops/s is a rough default. Adjust based on your cluster capacity and expected workload.
- alert: ElasticsearchHighIndexingRate
expr: sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 10000
for: 5m
labels:
severity: warning
annotations:
summary: Elasticsearch High Indexing Rate (instance {{ `{{ $labels.instance }}` }})
description: "The indexing rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
# Threshold of 100 queries/s is very low for most production clusters. Adjust based on your expected query volume.
- alert: ElasticsearchHighQueryRate
expr: sum(rate(elasticsearch_indices_search_query_total[1m])) > 100
for: 5m
labels:
severity: warning
annotations:
summary: Elasticsearch High Query Rate (instance {{ `{{ $labels.instance }}` }})
description: "The query rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"
- alert: ElasticsearchHighQueryLatency
expr: rate(elasticsearch_indices_search_query_time_seconds[1m]) / rate(elasticsearch_indices_search_query_total[1m]) > 1 and rate(elasticsearch_indices_search_query_total[1m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: Elasticsearch High Query Latency (instance {{ `{{ $labels.instance }}` }})
description: "The query latency on Elasticsearch cluster is higher than the threshold (current value: {{ `{{ $value }}` }}s).\n VALUE = {{ `{{ $value }}` }}\n LABELS = {{ `{{ $labels }}` }}"

View File

@@ -1,7 +1,6 @@
tubearchivist:
controllers:
main:
forceRename: tubearchivist
type: deployment
replicas: 1
strategy: Recreate
@@ -97,48 +96,13 @@ tubearchivist:
devic.es/tun: "1"
requests:
devic.es/tun: "1"
metrics:
type: deployment
replicas: 1
strategy: Recreate
containers:
main:
image:
repository: quay.io/prometheuscommunity/elasticsearch-exporter
tag: v1.10.0@sha256:a6a4d4403f670faf6a94b8c7f9adbca3ead91f26dd64e5ccf95fa69025dc6e58
args:
- '--es.uri=https://elasticsearch-tubearchivist-es-http.tubearchivist:9200'
- '--es.ssl-skip-verify'
resources:
requests:
cpu: 1m
memory: 10Mi
service:
main:
controller: main
forceRename: tubearchivist
ports:
http:
port: 80
targetPort: 24000
metrics:
controller: metrics
ports:
metrics:
port: 9114
targetPort: 9114
serviceMonitor:
main:
selector:
matchLabels:
app.kubernetes.io/name: tubearchivist-metrics
app.kubernetes.io/instance: tubearchivist-metrics
serviceName: '{{ include "bjw-s.common.lib.chart.names.fullname" $ }}'
endpoints:
- port: metrics
interval: 30s
scrapeTimeout: 10s
path: /metrics
route:
main:
kind: HTTPRoute

View File

@@ -9,29 +9,59 @@ metadata:
data:
snapshot.sh: |
DATE=$(date +"%Y%m%d-%H-%M")
MAX_RETRIES=5
SUCCESS=false
echo " "
echo ">> Running Vault Snapshot Script ..."
echo " "
echo ">> Fetching Vault token ..."
export VAULT_TOKEN=$(vault write -field=token auth/approle/login role_id=$VAULT_APPROLE_ROLE_ID secret_id=$VAULT_APPROLE_SECRET_ID)
echo ">> Verifying required commands ..."
echo " "
if [ -z "$VAULT_TOKEN" ]; then
echo ">> ERROR: Failed to fetch Vault token! Exiting..."
exit 1
for i in $(seq 1 "$MAX_RETRIES"); do
if apk update 2>&1 >/dev/null; then
echo ">> Attempt $i: Repositories are reachable";
SUCCESS=true;
break;
else
echo ">> Attempt $i: Connection failed, retrying in 5 seconds ...";
sleep 5;
fi;
done;
if [ "$SUCCESS" = false ]; then
echo ">> ERROR: Could not connect to apk repositories after $MAX_RETRIES attempts, exiting ...";
exit 1;
fi
echo " "
echo ">> Taking Vault snapshot ..."
if ! command -v jq 2>&1 >/dev/null; then
echo ">> Command jq could not be found, installing";
apk add --no-cache -q jq;
if [ $? -eq 0 ]; then
echo ">> Installation successful";
else
echo ">> Installation failed with exit code $?";
exit 1;
fi;
fi;
echo " ";
echo ">> Fetching Vault token ...";
export VAULT_TOKEN=$(vault write auth/approle/login role_id=$VAULT_APPROLE_ROLE_ID secret_id=$VAULT_APPROLE_SECRET_ID -format=json | jq -r .auth.client_token);
echo " ";
echo ">> Taking Vault snapsot ...";
vault operator raft snapshot save /opt/backup/vault-snapshot-$DATE.snap
echo " "
echo ">> Setting ownership of Vault snapshot ..."
echo " ";
echo ">> Setting ownership of Vault snapsot ...";
chown 100:1000 /opt/backup/vault-snapshot-$DATE.snap
echo " "
echo ">> Completed Vault snapshot"
echo " ";
echo ">> Completed Vault snapshot";
---
apiVersion: v1
@@ -47,3 +77,75 @@ data:
echo " ";
echo ">> Running S3 backup for Vault snapshot";
OUTPUT=$(s3cmd sync --no-check-certificate -v /opt/backup/* "${BUCKET}/cl01tl/cl01tl-vault-snapshots/" 2>&1)
STATUS=$?
if [ $STATUS -ne 0 ]; then
if echo "$OUTPUT" | grep -q "403 Forbidden"; then
MESSAGE="403 Authentication Error: Your keys are wrong or you don't have permission"
elif echo "$OUTPUT" | grep -q "404 Not Found"; then
MESSAGE="404 Error: The bucket or folder does not exist"
elif echo "$OUTPUT" | grep -q "Connection refused"; then
MESSAGE="Network Error: Cannot reach the S3 endpoint"
else
MESSAGE="Unknown Error"
echo " ";
echo ">> Unknown Error, output:"
echo " "
echo "$OUTPUT"
fi
MAX_RETRIES=5
SUCCESS=false
echo " "
echo ">> Sending message to ntfy using curl ..."
echo " "
echo ">> Verifying required commands ..."
for i in $(seq 1 "$MAX_RETRIES"); do
if apk update 2>&1 >/dev/null; then
echo ">> Attempt $i: Repositories are reachable";
SUCCESS=true;
break;
else
echo ">> Attempt $i: Connection failed, retrying in 5 seconds ...";
sleep 5;
fi;
done;
if [ "$SUCCESS" = false ]; then
echo ">> ERROR: Could not connect to apk repositories after $MAX_RETRIES attempts, exiting ...";
exit 1;
fi
if ! command -v curl 2>&1 >/dev/null; then
echo ">> Command curl could not be found, installing";
apk add --no-cache -q curl;
if [ $? -eq 0 ]; then
echo ">> Installation successful";
else
echo ">> Installation failed with exit code $?";
exit 1;
fi;
fi;
echo " "
echo ">> Sending to NTFY ..."
echo ">> Message: $MESSAGE"
HTTP_STATUS=$(curl \
--silent \
--write-out '%{http_code}' \
-H "Authorization: Bearer ${NTFY_TOKEN}" \
-H "X-Priority: 5" \
-H "X-Tags: warning" \
-H "X-Title: Vault Backup Failed for ${TARGET}" \
-d "$MESSAGE" \
${NTFY_ENDPOINT}/${NTFY_TOPIC}
)
echo ">> HTTP Status Code: $HTTP_STATUS"
else
echo " ";
echo ">> S3 Sync succeeded"
fi

View File

@@ -138,7 +138,8 @@
"matchPackageNames": [
"excalidraw/excalidraw",
"searxng/searxng",
"d3fk/s3cmd"
"d3fk/s3cmd",
"ghcr.io/linuxserver/lidarr"
],
"addLabels": [
"automerge"
@@ -159,17 +160,7 @@
"minimumReleaseAge": "3 days"
},
{
"description": "Disable automerge for ghcr docker dependencies, unsupported release age",
"matchDatasources": [
"docker"
],
"matchPackageNames": [
"/^ghcr\\.io//"
],
"automerge": false
},
{
"description": "Automerge images, specific packages, without release age",
"description": "Automerge images, specific packages",
"matchUpdateTypes": [
"minor"
],
@@ -181,7 +172,8 @@
"{{{datasource}}}",
"automerge"
],
"automerge": true
"automerge": true,
"minimumReleaseAge": "3 days"
}
]
}