chore: Update manifests after change
This commit is contained in:
@@ -0,0 +1,57 @@
|
|||||||
|
apiVersion: postgresql.cnpg.io/v1
|
||||||
|
kind: Cluster
|
||||||
|
metadata:
|
||||||
|
name: ntfy-postgresql-18-cluster
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: ntfy-postgresql-18-cluster
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
instances: 3
|
||||||
|
imageName: "ghcr.io/cloudnative-pg/postgresql:18.3-standard-trixie"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
postgresUID: 26
|
||||||
|
postgresGID: 26
|
||||||
|
storage:
|
||||||
|
size: 10Gi
|
||||||
|
storageClass: local-path
|
||||||
|
walStorage:
|
||||||
|
size: 2Gi
|
||||||
|
storageClass: local-path
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
hugepages-2Mi: 256Mi
|
||||||
|
requests:
|
||||||
|
cpu: 20m
|
||||||
|
memory: 80Mi
|
||||||
|
affinity:
|
||||||
|
enablePodAntiAffinity: true
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
primaryUpdateMethod: switchover
|
||||||
|
primaryUpdateStrategy: unsupervised
|
||||||
|
logLevel: info
|
||||||
|
enableSuperuserAccess: false
|
||||||
|
enablePDB: true
|
||||||
|
postgresql:
|
||||||
|
parameters:
|
||||||
|
hot_standby_feedback: "on"
|
||||||
|
max_slot_wal_keep_size: 2000MB
|
||||||
|
shared_buffers: 128MB
|
||||||
|
monitoring:
|
||||||
|
enablePodMonitor: true
|
||||||
|
disableDefaultQueries: false
|
||||||
|
plugins:
|
||||||
|
- name: barman-cloud.cloudnative-pg.io
|
||||||
|
enabled: true
|
||||||
|
isWALArchiver: true
|
||||||
|
parameters:
|
||||||
|
barmanObjectName: "ntfy-postgresql-18-backup-garage-local"
|
||||||
|
serverName: "ntfy-postgresql-18-backup-1"
|
||||||
|
bootstrap:
|
||||||
|
initdb:
|
||||||
|
database: app
|
||||||
|
owner: app
|
||||||
@@ -38,7 +38,7 @@ spec:
|
|||||||
- serve
|
- serve
|
||||||
env:
|
env:
|
||||||
- name: TZ
|
- name: TZ
|
||||||
value: US/Central
|
value: America/Chicago
|
||||||
- name: NTFY_BASE_URL
|
- name: NTFY_BASE_URL
|
||||||
value: https://ntfy.alexlebens.net
|
value: https://ntfy.alexlebens.net
|
||||||
- name: NTFY_LISTEN_HTTP
|
- name: NTFY_LISTEN_HTTP
|
||||||
@@ -57,13 +57,16 @@ spec:
|
|||||||
- name: NTFY_BEHIND_PROXY
|
- name: NTFY_BEHIND_PROXY
|
||||||
value: "true"
|
value: "true"
|
||||||
- name: NTFY_ATTACHMENT_CACHE_DIR
|
- name: NTFY_ATTACHMENT_CACHE_DIR
|
||||||
value: /var/cache/ntfy/attachments
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: attachment-cache-dir
|
||||||
|
name: ntfy-config-secret
|
||||||
- name: NTFY_ATTACHMENT_TOTAL_SIZE_LIMIT
|
- name: NTFY_ATTACHMENT_TOTAL_SIZE_LIMIT
|
||||||
value: 4G
|
value: 10G
|
||||||
- name: NTFY_ATTACHMENT_FILE_SIZE_LIMIT
|
- name: NTFY_ATTACHMENT_FILE_SIZE_LIMIT
|
||||||
value: 15M
|
value: 150M
|
||||||
- name: NTFY_ATTACHMENT_EXPIRY_DURATION
|
- name: NTFY_ATTACHMENT_EXPIRY_DURATION
|
||||||
value: 36h
|
value: 72h
|
||||||
- name: NTFY_ENABLE_SIGNUP
|
- name: NTFY_ENABLE_SIGNUP
|
||||||
value: "false"
|
value: "false"
|
||||||
- name: NTFY_ENABLE_LOGIN
|
- name: NTFY_ENABLE_LOGIN
|
||||||
@@ -78,13 +81,12 @@ spec:
|
|||||||
value: :9090
|
value: :9090
|
||||||
- name: NTFY_LOG_LEVEL
|
- name: NTFY_LOG_LEVEL
|
||||||
value: info
|
value: info
|
||||||
image: binwiederhier/ntfy:v2.21.0
|
image: binwiederhier/ntfy:v2.21.0@sha256:2b9e12d56a538f4402da51328eeca02696c4b207ab7fbe031c27e51a22ca9b86
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
name: main
|
name: main
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 10m
|
cpu: 10m
|
||||||
memory: 128Mi
|
memory: 40Mi
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /var/cache/ntfy
|
- mountPath: /var/cache/ntfy
|
||||||
name: cache
|
name: cache
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
apiVersion: external-secrets.io/v1
|
||||||
|
kind: ExternalSecret
|
||||||
|
metadata:
|
||||||
|
name: ntfy-config-secret
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: ntfy-config-secret
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
spec:
|
||||||
|
secretStoreRef:
|
||||||
|
kind: ClusterSecretStore
|
||||||
|
name: vault
|
||||||
|
data:
|
||||||
|
- secretKey: attachment-cache-dir
|
||||||
|
remoteRef:
|
||||||
|
key: /garage/home-infra/ntfy-attachments
|
||||||
|
property: attachment-cache-dir
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
apiVersion: external-secrets.io/v1
|
||||||
|
kind: ExternalSecret
|
||||||
|
metadata:
|
||||||
|
name: ntfy-postgresql-18-backup-garage-local-secret
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: ntfy-postgresql-18-backup-garage-local-secret
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
secretStoreRef:
|
||||||
|
kind: ClusterSecretStore
|
||||||
|
name: vault
|
||||||
|
data:
|
||||||
|
- secretKey: ACCESS_REGION
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/postgres-backups
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_REGION
|
||||||
|
- secretKey: ACCESS_KEY_ID
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/postgres-backups
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_KEY_ID
|
||||||
|
- secretKey: ACCESS_SECRET_KEY
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/postgres-backups
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_SECRET_KEY
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
apiVersion: external-secrets.io/v1
|
||||||
|
kind: ExternalSecret
|
||||||
|
metadata:
|
||||||
|
name: ntfy-postgresql-18-recovery-secret
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
app.kubernetes.io/name: ntfy-postgresql-18-recovery-secret
|
||||||
|
spec:
|
||||||
|
secretStoreRef:
|
||||||
|
kind: ClusterSecretStore
|
||||||
|
name: vault
|
||||||
|
data:
|
||||||
|
- secretKey: ACCESS_REGION
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/postgres-backups
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_REGION
|
||||||
|
- secretKey: ACCESS_KEY_ID
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/postgres-backups
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_KEY_ID
|
||||||
|
- secretKey: ACCESS_SECRET_KEY
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/postgres-backups
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_SECRET_KEY
|
||||||
@@ -23,7 +23,7 @@ spec:
|
|||||||
name: ntfy
|
name: ntfy
|
||||||
namespace: ntfy
|
namespace: ntfy
|
||||||
port: 80
|
port: 80
|
||||||
weight: 100
|
weight: 1
|
||||||
matches:
|
matches:
|
||||||
- path:
|
- path:
|
||||||
type: PathPrefix
|
type: PathPrefix
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
apiVersion: barmancloud.cnpg.io/v1
|
||||||
|
kind: ObjectStore
|
||||||
|
metadata:
|
||||||
|
name: ntfy-postgresql-18-backup-garage-local
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: ntfy-postgresql-18-backup-garage-local
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
retentionPolicy: 7d
|
||||||
|
instanceSidecarConfiguration:
|
||||||
|
env:
|
||||||
|
- name: AWS_REQUEST_CHECKSUM_CALCULATION
|
||||||
|
value: when_required
|
||||||
|
- name: AWS_RESPONSE_CHECKSUM_VALIDATION
|
||||||
|
value: when_required
|
||||||
|
configuration:
|
||||||
|
destinationPath: s3://postgres-backups/cl01tl/ntfy/ntfy-postgresql-18-cluster
|
||||||
|
endpointURL: http://garage-main.garage:3900
|
||||||
|
s3Credentials:
|
||||||
|
accessKeyId:
|
||||||
|
name: ntfy-postgresql-18-backup-garage-local-secret
|
||||||
|
key: ACCESS_KEY_ID
|
||||||
|
secretAccessKey:
|
||||||
|
name: ntfy-postgresql-18-backup-garage-local-secret
|
||||||
|
key: ACCESS_SECRET_KEY
|
||||||
|
region:
|
||||||
|
name: ntfy-postgresql-18-backup-garage-local-secret
|
||||||
|
key: ACCESS_REGION
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
apiVersion: barmancloud.cnpg.io/v1
|
||||||
|
kind: ObjectStore
|
||||||
|
metadata:
|
||||||
|
name: "ntfy-postgresql-18-recovery"
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
app.kubernetes.io/name: "ntfy-postgresql-18-recovery"
|
||||||
|
spec:
|
||||||
|
configuration:
|
||||||
|
destinationPath: s3://postgres-backups/cl01tl/ntfy/ntfy-postgresql-18-cluster
|
||||||
|
endpointURL: http://garage-main.garage:3900
|
||||||
|
wal:
|
||||||
|
compression: snappy
|
||||||
|
maxParallel: 1
|
||||||
|
data:
|
||||||
|
compression: snappy
|
||||||
|
jobs: 1
|
||||||
|
s3Credentials:
|
||||||
|
accessKeyId:
|
||||||
|
name: ntfy-postgresql-18-recovery-secret
|
||||||
|
key: ACCESS_KEY_ID
|
||||||
|
secretAccessKey:
|
||||||
|
name: ntfy-postgresql-18-recovery-secret
|
||||||
|
key: ACCESS_SECRET_KEY
|
||||||
|
region:
|
||||||
|
name: ntfy-postgresql-18-recovery-secret
|
||||||
|
key: ACCESS_REGION
|
||||||
@@ -0,0 +1,270 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: ntfy-postgresql-18-alert-rules
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: ntfy-postgresql-18-alert-rules
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: cloudnative-pg/ntfy-postgresql-18
|
||||||
|
rules:
|
||||||
|
- alert: CNPGClusterBackendsWaitingWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster a backend is waiting for longer than 5 minutes.
|
||||||
|
description: |-
|
||||||
|
Pod {{ $labels.pod }}
|
||||||
|
has been waiting for longer than 5 minutes
|
||||||
|
expr: |
|
||||||
|
cnpg_backends_waiting_total{namespace="ntfy"} > 300
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterDatabaseDeadlockConflictsWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster has over 10 deadlock conflicts.
|
||||||
|
description: |-
|
||||||
|
There are over 10 deadlock conflicts in
|
||||||
|
{{ $labels.pod }}
|
||||||
|
expr: |
|
||||||
|
cnpg_pg_stat_database_deadlocks{namespace="ntfy"} > 10
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterHACritical
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster has no standby replicas!
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has no ready standby replicas. Your cluster at a severe
|
||||||
|
risk of data loss and downtime if the primary instance fails.
|
||||||
|
|
||||||
|
The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint
|
||||||
|
will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main.
|
||||||
|
|
||||||
|
This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less
|
||||||
|
instances. The replaced instance may need some time to catch-up with the cluster primary instance.
|
||||||
|
|
||||||
|
This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this
|
||||||
|
case you may want to silence it.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md
|
||||||
|
expr: |
|
||||||
|
max by (job) (cnpg_pg_replication_streaming_replicas{namespace="ntfy"} - cnpg_pg_replication_is_wal_receiver_up{namespace="ntfy"}) < 1
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterHAWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster less than 2 standby replicas.
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "{{`{{`}} $labels.job {{`}}`}}" has only {{`{{`}} $value {{`}}`}} standby replicas, putting
|
||||||
|
your cluster at risk if another instance fails. The cluster is still able to operate normally, although
|
||||||
|
the `-ro` and `-r` endpoints operate at reduced capacity.
|
||||||
|
|
||||||
|
This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may
|
||||||
|
need some time to catch-up with the cluster primary instance.
|
||||||
|
|
||||||
|
This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances.
|
||||||
|
In this case you may want to silence it.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md
|
||||||
|
expr: |
|
||||||
|
max by (job) (cnpg_pg_replication_streaming_replicas{namespace="ntfy"} - cnpg_pg_replication_is_wal_receiver_up{namespace="ntfy"}) < 2
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterHighConnectionsCritical
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Instance maximum number of connections critical!
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of
|
||||||
|
the maximum number of connections.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md
|
||||||
|
expr: |
|
||||||
|
sum by (pod) (cnpg_backends_total{namespace="ntfy", pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="ntfy", pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 95
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterHighConnectionsWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Instance is approaching the maximum number of connections.
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" instance {{`{{`}} $labels.pod {{`}}`}} is using {{`{{`}} $value {{`}}`}}% of
|
||||||
|
the maximum number of connections.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md
|
||||||
|
expr: |
|
||||||
|
sum by (pod) (cnpg_backends_total{namespace="ntfy", pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace="ntfy", pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) * 100 > 80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterHighReplicationLag
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster high replication lag
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" is experiencing a high replication lag of
|
||||||
|
{{`{{`}} $value {{`}}`}}ms.
|
||||||
|
|
||||||
|
High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md
|
||||||
|
expr: |
|
||||||
|
max(cnpg_pg_replication_lag{namespace="ntfy",pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) * 1000 > 1000
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterInstancesOnSameNode
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster instances are located on the same node.
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" has {{`{{`}} $value {{`}}`}}
|
||||||
|
instances on the same node {{`{{`}} $labels.node {{`}}`}}.
|
||||||
|
|
||||||
|
A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md
|
||||||
|
expr: |
|
||||||
|
count by (node) (kube_pod_info{namespace="ntfy", pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) > 1
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterLongRunningTransactionWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster query is taking longer than 5 minutes.
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster Pod {{ $labels.pod }}
|
||||||
|
is taking more than 5 minutes (300 seconds) for a query.
|
||||||
|
expr: |-
|
||||||
|
cnpg_backends_max_tx_duration_seconds{namespace="ntfy"} > 300
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterLowDiskSpaceCritical
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Instance is running out of disk space!
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" is running extremely low on disk space. Check attached PVCs!
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md
|
||||||
|
expr: |
|
||||||
|
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.9 OR
|
||||||
|
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.9 OR
|
||||||
|
max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"})
|
||||||
|
/
|
||||||
|
sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"})
|
||||||
|
*
|
||||||
|
on(namespace, persistentvolumeclaim) group_left(volume)
|
||||||
|
kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}
|
||||||
|
) > 0.9
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterLowDiskSpaceWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Instance is running out of disk space.
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" is running low on disk space. Check attached PVCs.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md
|
||||||
|
expr: |
|
||||||
|
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"} / kubelet_volume_stats_capacity_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"})) > 0.7 OR
|
||||||
|
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-wal"} / kubelet_volume_stats_capacity_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-wal"})) > 0.7 OR
|
||||||
|
max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"})
|
||||||
|
/
|
||||||
|
sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="ntfy", persistentvolumeclaim=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$-tbs.*"})
|
||||||
|
*
|
||||||
|
on(namespace, persistentvolumeclaim) group_left(volume)
|
||||||
|
kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}
|
||||||
|
) > 0.7
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterOffline
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster has no running instances!
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" has no ready instances.
|
||||||
|
|
||||||
|
Having an offline cluster means your applications will not be able to access the database, leading to
|
||||||
|
potential service disruption and/or data loss.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md
|
||||||
|
expr: |
|
||||||
|
(count(cnpg_collector_up{namespace="ntfy",pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"}) OR on() vector(0)) == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterPGDatabaseXidAgeWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster has a number of transactions from the frozen XID to the current one.
|
||||||
|
description: |-
|
||||||
|
Over 300,000,000 transactions from frozen xid
|
||||||
|
on pod {{ $labels.pod }}
|
||||||
|
expr: |
|
||||||
|
cnpg_pg_database_xid_age{namespace="ntfy"} > 300000000
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterPGReplicationWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster standby is lagging behind the primary.
|
||||||
|
description: |-
|
||||||
|
Standby is lagging behind by over 300 seconds (5 minutes)
|
||||||
|
expr: |
|
||||||
|
cnpg_pg_replication_lag{namespace="ntfy"} > 300
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterReplicaFailingReplicationWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster has a replica is failing to replicate.
|
||||||
|
description: |-
|
||||||
|
Replica {{ $labels.pod }}
|
||||||
|
is failing to replicate
|
||||||
|
expr: |
|
||||||
|
cnpg_pg_replication_in_recovery{namespace="ntfy"} > cnpg_pg_replication_is_wal_receiver_up{namespace="ntfy"}
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
|
- alert: CNPGClusterZoneSpreadWarning
|
||||||
|
annotations:
|
||||||
|
summary: CNPG Cluster instances in the same zone.
|
||||||
|
description: |-
|
||||||
|
CloudNativePG Cluster "ntfy/ntfy-postgresql-18-cluster" has instances in the same availability zone.
|
||||||
|
|
||||||
|
A disaster in one availability zone will lead to a potential service disruption and/or data loss.
|
||||||
|
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md
|
||||||
|
expr: |
|
||||||
|
3 > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace="ntfy", pod=~"ntfy-postgresql-18-cluster-([1-9][0-9]*)$"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
namespace: ntfy
|
||||||
|
cnpg_cluster: ntfy-postgresql-18-cluster
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
apiVersion: postgresql.cnpg.io/v1
|
||||||
|
kind: ScheduledBackup
|
||||||
|
metadata:
|
||||||
|
name: "ntfy-postgresql-18-scheduled-backup-live-backup"
|
||||||
|
namespace: ntfy
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: "ntfy-postgresql-18-scheduled-backup-live-backup"
|
||||||
|
helm.sh/chart: postgres-18-cluster-7.11.2
|
||||||
|
app.kubernetes.io/instance: ntfy
|
||||||
|
app.kubernetes.io/part-of: ntfy
|
||||||
|
app.kubernetes.io/version: "7.11.2"
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
immediate: true
|
||||||
|
suspend: false
|
||||||
|
schedule: "0 15 14 * * *"
|
||||||
|
backupOwnerReference: self
|
||||||
|
cluster:
|
||||||
|
name: ntfy-postgresql-18-cluster
|
||||||
|
method: plugin
|
||||||
|
pluginConfiguration:
|
||||||
|
name: barman-cloud.cloudnative-pg.io
|
||||||
|
parameters:
|
||||||
|
barmanObjectName: "ntfy-postgresql-18-backup-garage-local"
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: rclone-ntfy-attachments
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/controller: ntfy-attachments
|
||||||
|
app.kubernetes.io/instance: rclone
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
app.kubernetes.io/name: rclone
|
||||||
|
helm.sh/chart: rclone-4.6.2
|
||||||
|
namespace: rclone
|
||||||
|
spec:
|
||||||
|
suspend: false
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
startingDeadlineSeconds: 90
|
||||||
|
timeZone: US/Central
|
||||||
|
schedule: "10 0 * * *"
|
||||||
|
successfulJobsHistoryLimit: 1
|
||||||
|
failedJobsHistoryLimit: 1
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
parallelism: 1
|
||||||
|
backoffLimit: 3
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/controller: ntfy-attachments
|
||||||
|
app.kubernetes.io/instance: rclone
|
||||||
|
app.kubernetes.io/name: rclone
|
||||||
|
spec:
|
||||||
|
enableServiceLinks: false
|
||||||
|
serviceAccountName: default
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
hostIPC: false
|
||||||
|
hostNetwork: false
|
||||||
|
hostPID: false
|
||||||
|
dnsPolicy: ClusterFirst
|
||||||
|
restartPolicy: Never
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- sync
|
||||||
|
- src:ntfy-attachments
|
||||||
|
- dest:ntfy-attachments
|
||||||
|
- --s3-no-check-bucket
|
||||||
|
- --verbose
|
||||||
|
env:
|
||||||
|
- name: RCLONE_S3_PROVIDER
|
||||||
|
value: Other
|
||||||
|
- name: RCLONE_CONFIG_SRC_TYPE
|
||||||
|
value: s3
|
||||||
|
- name: RCLONE_CONFIG_SRC_PROVIDER
|
||||||
|
value: Other
|
||||||
|
- name: RCLONE_CONFIG_SRC_ENV_AUTH
|
||||||
|
value: "false"
|
||||||
|
- name: RCLONE_CONFIG_SRC_ACCESS_KEY_ID
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: ACCESS_KEY_ID
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_SRC_SECRET_ACCESS_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: ACCESS_SECRET_KEY
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_SRC_REGION
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: ACCESS_REGION
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_SRC_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: SRC_ENDPOINT
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_SRC_S3_FORCE_PATH_STYLE
|
||||||
|
value: "true"
|
||||||
|
- name: RCLONE_CONFIG_DEST_TYPE
|
||||||
|
value: s3
|
||||||
|
- name: RCLONE_CONFIG_DEST_PROVIDER
|
||||||
|
value: Other
|
||||||
|
- name: RCLONE_CONFIG_DEST_ENV_AUTH
|
||||||
|
value: "false"
|
||||||
|
- name: RCLONE_CONFIG_DEST_ACCESS_KEY_ID
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: ACCESS_KEY_ID
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_DEST_SECRET_ACCESS_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: ACCESS_SECRET_KEY
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_DEST_REGION
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: ACCESS_REGION
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_DEST_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: DEST_ENDPOINT
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
- name: RCLONE_CONFIG_SRC_DEST_FORCE_PATH_STYLE
|
||||||
|
value: "true"
|
||||||
|
image: rclone/rclone:1.73.3
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: sync
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
apiVersion: external-secrets.io/v1
|
||||||
|
kind: ExternalSecret
|
||||||
|
metadata:
|
||||||
|
name: garage-ntfy-attachments-secret
|
||||||
|
namespace: rclone
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: garage-ntfy-attachments-secret
|
||||||
|
app.kubernetes.io/instance: rclone
|
||||||
|
app.kubernetes.io/part-of: rclone
|
||||||
|
spec:
|
||||||
|
secretStoreRef:
|
||||||
|
kind: ClusterSecretStore
|
||||||
|
name: vault
|
||||||
|
data:
|
||||||
|
- secretKey: ACCESS_KEY_ID
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/ntfy-attachments
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_KEY_ID
|
||||||
|
- secretKey: ACCESS_REGION
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/ntfy-attachments
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_REGION
|
||||||
|
- secretKey: ACCESS_SECRET_KEY
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/home-infra/ntfy-attachments
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ACCESS_SECRET_KEY
|
||||||
|
- secretKey: SRC_ENDPOINT
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/config/local
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ENDPOINT
|
||||||
|
- secretKey: DEST_ENDPOINT
|
||||||
|
remoteRef:
|
||||||
|
conversionStrategy: Default
|
||||||
|
decodingStrategy: None
|
||||||
|
key: /garage/config/remote
|
||||||
|
metadataPolicy: None
|
||||||
|
property: ENDPOINT
|
||||||
Reference in New Issue
Block a user