add monitoring

This commit is contained in:
2025-03-02 23:04:31 -06:00
parent 02c99a4308
commit 69f48d0796
10 changed files with 405 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
apiVersion: v2
name: kube-prometheus-stack
version: 1.0.0
description: Kube Prometheus Stack
keywords:
- kube-prometheus-stack
- prometheus
- alertmanager
- metrics
- alerts
- kubernetes
home: https://wiki.alexlebens.dev/doc/kube-prometheus-stack-pPGJlzAqur
sources:
- https://github.com/prometheus/prometheus
- https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
maintainers:
- name: alexlebens
dependencies:
- name: kube-prometheus-stack
version: 69.6.0
repository: https://prometheus-community.github.io/helm-charts
icon: https://raw.githubusercontent.com/walkxcode/dashboard-icons/main/png/prometheus.png
appVersion: v0.79.2

View File

@@ -0,0 +1,37 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: alertmanager-config-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ .Release.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/component: web
app.kubernetes.io/part-of: {{ .Release.Name }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: discord_webhook
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /discord/webhook/alertmanager
metadataPolicy: None
property: webhook
- secretKey: pushover_token
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /pushover/key
metadataPolicy: None
property: alertmanager_key
- secretKey: user_key
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /pushover/key
metadataPolicy: None
property: user_key

View File

@@ -0,0 +1,8 @@
apiVersion: v1
kind: Namespace
metadata:
name: kube-prometheus-stack
labels:
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/warn: privileged

View File

@@ -0,0 +1,41 @@
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: external-nodes-http
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: external-nodes
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: {{ .Release.Name }}
spec:
staticConfigs:
- labels:
job: external-nodes
targets:
- ps08rp.alexlebens.net:9100
- ps09rp.alexlebens.net:9100
metricsPath: /metrics
scheme: HTTP
---
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: external-nodes-https
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: external-nodes
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: {{ .Release.Name }}
spec:
staticConfigs:
- labels:
job: external-nodes
targets:
- node-exporter-ps10rp.boreal-beaufort.ts.net
metricsPath: /metrics
scheme: HTTPS

View File

@@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: node-ps10rp
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: node-ps10rp
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/component: tailscale
app.kubernetes.io/part-of: {{ .Release.Name }}
annotations:
tailscale.com/tailnet-fqdn: node-exporter-ps10rp.boreal-beaufort.ts.net
spec:
externalName: placeholder
type: ExternalName

View File

@@ -0,0 +1,149 @@
kube-prometheus-stack:
crds:
enabled: false
defaultRules:
create: true
rules:
kubeControllerManager: false
kubeSchedulerAlerting: false
kubeSchedulerRecording: false
global:
rbac:
create: true
createAggregateClusterRoles: true
alertmanager:
enabled: true
config:
route:
group_by: ["namespace", "alertname"]
group_wait: 30s
group_interval: 5m
repeat_interval: 24h
receiver: discord
routes:
- receiver: "null"
matchers:
- alertname = "Watchdog"
- receiver: "pushover"
group_wait: 10s
group_interval: 5m
repeat_interval: 24h
matchers:
- severity = "critical"
receivers:
- name: "null"
- name: discord
discord_configs:
- send_resolved: true
webhook_url_file: /etc/alertmanager/secrets/alertmanager-config-secret/discord_webhook
- name: pushover
pushover_configs:
- send_resolved: true
user_key_file: /etc/alertmanager/secrets/alertmanager-config-secret/user_key
token_file: /etc/alertmanager/secrets/alertmanager-config-secret/pushover_token
alertmanagerSpec:
secrets:
- alertmanager-config-secret
replicas: 1
grafana:
enabled: false
kubeApiServer:
tlsConfig:
insecureSkipVerify: true
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: true
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
kubeStateMetrics:
enabled: true
nodeExporter:
operatingSystems:
darwin:
enabled: false
prometheusOperator:
admissionWebhooks:
enabled: true
namespaces:
releaseNamespace: true
additional:
- kube-system
- kube-prometheus-stack
- argocd
- argo-workflows
- authentik
- blocky
- cert-manager
- cloudnative-pg
- descheduler
- directus
- external-dns
- freshrss
- generic-device-plugin
- gitea
- grafana
- harbor
- hoarder
- home-assistant
- immich
- jellystat
- komodo
- lidarr2
- linkwarden
- loki
- matrix-synapse
- ollama
- outline
- photoview
- qbittorrent
- radarr5
- radarr5-4k
- radarr5-anime
- radarr5-standup
- reloader
- rook-ceph
- roundcube
- slskd
- sonarr4
- sonarr4-4k
- sonarr4-anime
- speedtest-exporter
- spegel
- stalwart
- tdarr
- traefik
- trivy
- unpoller
- vault
- vaultwarden
- volsync
prometheus:
ingress:
enabled: true
ingressClassName: tailscale
labels:
tailscale.com/proxy-class: no-metrics
hosts:
- prometheus-cl01tl
tls:
- secretName: prometheus-cl01tl
hosts:
- prometheus-cl01tl
prometheusSpec:
scrapeInterval: 30s
retention: 30d
externalUrl: https://prometheus-cl01tl.boreal-beaufort.ts.net
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
scrapeConfigSelectorNilUsesHelmValues: false
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: synology-iscsi-delete
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 200Gi

View File

@@ -0,0 +1,23 @@
apiVersion: v2
name: unpoller
version: 1.0.0
description: Unpoller
keywords:
- unpoller
- ubiquiti
- unifi
- metrics
home: https://wiki.alexlebens.dev/doc/unpoller-ZG6iBCZATk
sources:
- https://github.com/unpoller/unpoller
- https://github.com/unpoller/unpoller/pkgs/container/unpoller
- https://github.com/bjw-s/helm-charts/blob/main/charts/other/app-template/values.yaml
maintainers:
- name: alexlebens
dependencies:
- name: app-template
alias: unpoller
repository: https://bjw-s.github.io/helm-charts/
version: 3.7.1
icon: https://camo.githubusercontent.com/c5d07a5b3acfeac8e1c25bf56f440ffe032b86e4e7f15de82357f022a43fc927/68747470733a2f2f756e706f6c6c65722e636f6d2f696d672f6c6f676f2e706e67
appVersion: v2.11.2

View File

@@ -0,0 +1,30 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: unpoller-unifi-secret
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: unpoller-unifi-secret
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/component: web
app.kubernetes.io/part-of: {{ .Release.Name }}
spec:
secretStoreRef:
kind: ClusterSecretStore
name: vault
data:
- secretKey: UP_UNIFI_CONTROLLER_0_USER
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /unifi/auth/cl01tl
metadataPolicy: None
property: user
- secretKey: UP_UNIFI_CONTROLLER_0_PASS
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: /unifi/auth/cl01tl
metadataPolicy: None
property: password

View File

@@ -0,0 +1,21 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: unpoller
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: unpoller
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: {{ .Release.Name }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: unpoller
app.kubernetes.io/instance: {{ .Release.Name }}
endpoints:
- port: metrics
interval: 30s
scrapeTimeout: 10s
path: /metrics

View File

@@ -0,0 +1,57 @@
unpoller:
controllers:
main:
type: deployment
replicas: 1
strategy: Recreate
revisionHistoryLimit: 3
containers:
main:
image:
repository: ghcr.io/unpoller/unpoller
tag: v2.14.1
pullPolicy: IfNotPresent
env:
- name: UP_UNIFI_CONTROLLER_0_SAVE_ALARMS
value: 'false'
- name: UP_UNIFI_CONTROLLER_0_SAVE_ANOMALIES
value: 'false'
- name: UP_UNIFI_CONTROLLER_0_SAVE_DPI
value: 'false'
- name: UP_UNIFI_CONTROLLER_0_SAVE_EVENTS
value: 'false'
- name: UP_UNIFI_CONTROLLER_0_SAVE_IDS
value: 'false'
- name: UP_UNIFI_CONTROLLER_0_SAVE_SITES
value: 'true'
- name: UP_UNIFI_CONTROLLER_0_URL
value: https://unifi.alexlebens.net/
- name: UP_UNIFI_CONTROLLER_0_VERIFY_SSL
value: 'false'
- name: UP_INFLUXDB_DISABLE
value: 'true'
- name: UP_PROMETHEUS_HTTP_LISTEN
value: 0.0.0.0:9130
- name: UP_PROMETHEUS_NAMESPACE
value: unpoller
- name: UP_POLLER_DEBUG
value: 'false'
- name: UP_POLLER_QUIET
value: 'false'
envFrom:
- secretRef:
name: unpoller-unifi-secret
resources:
requests:
cpu: 10m
memory: 64Mi
serviceAccount:
create: true
service:
main:
controller: main
ports:
metrics:
port: 9130
targetPort: 9130
protocol: TCP