From 69f48d0796b73886a95c06ef3436ed1fba8b4f00 Mon Sep 17 00:00:00 2001 From: Alex Lebens Date: Sun, 2 Mar 2025 23:04:31 -0600 Subject: [PATCH] add monitoring --- .../kube-prometheus-stack/Chart.yaml | 23 +++ .../templates/external-secret.yaml | 37 +++++ .../templates/namespace.yaml | 8 + .../templates/scrape-config.yaml | 41 +++++ .../templates/service.yaml | 16 ++ .../kube-prometheus-stack/values.yaml | 149 ++++++++++++++++++ .../cl01tl/monitoring/unpoller/Chart.yaml | 23 +++ .../unpoller/templates/external-secret.yaml | 30 ++++ .../unpoller/templates/service-monitor.yaml | 21 +++ .../cl01tl/monitoring/unpoller/values.yaml | 57 +++++++ 10 files changed, 405 insertions(+) create mode 100644 clusters/cl01tl/monitoring/kube-prometheus-stack/Chart.yaml create mode 100644 clusters/cl01tl/monitoring/kube-prometheus-stack/templates/external-secret.yaml create mode 100644 clusters/cl01tl/monitoring/kube-prometheus-stack/templates/namespace.yaml create mode 100644 clusters/cl01tl/monitoring/kube-prometheus-stack/templates/scrape-config.yaml create mode 100644 clusters/cl01tl/monitoring/kube-prometheus-stack/templates/service.yaml create mode 100644 clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml create mode 100644 clusters/cl01tl/monitoring/unpoller/Chart.yaml create mode 100644 clusters/cl01tl/monitoring/unpoller/templates/external-secret.yaml create mode 100644 clusters/cl01tl/monitoring/unpoller/templates/service-monitor.yaml create mode 100644 clusters/cl01tl/monitoring/unpoller/values.yaml diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/Chart.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/Chart.yaml new file mode 100644 index 000000000..d9119787f --- /dev/null +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: kube-prometheus-stack +version: 1.0.0 +description: Kube Prometheus Stack +keywords: + - kube-prometheus-stack + - prometheus + - alertmanager + - metrics + - alerts + - kubernetes +home: https://wiki.alexlebens.dev/doc/kube-prometheus-stack-pPGJlzAqur +sources: + - https://github.com/prometheus/prometheus + - https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack +maintainers: + - name: alexlebens +dependencies: + - name: kube-prometheus-stack + version: 69.6.0 + repository: https://prometheus-community.github.io/helm-charts +icon: https://raw.githubusercontent.com/walkxcode/dashboard-icons/main/png/prometheus.png +appVersion: v0.79.2 diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/external-secret.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/external-secret.yaml new file mode 100644 index 000000000..cab09add5 --- /dev/null +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/external-secret.yaml @@ -0,0 +1,37 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: alertmanager-config-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + app.kubernetes.io/component: web + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: discord_webhook + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /discord/webhook/alertmanager + metadataPolicy: None + property: webhook + - secretKey: pushover_token + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /pushover/key + metadataPolicy: None + property: alertmanager_key + - secretKey: user_key + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /pushover/key + metadataPolicy: None + property: user_key diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/namespace.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/namespace.yaml new file mode 100644 index 000000000..94697b1e3 --- /dev/null +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/namespace.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: kube-prometheus-stack + labels: + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/warn: privileged diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/scrape-config.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/scrape-config.yaml new file mode 100644 index 000000000..21c7038a4 --- /dev/null +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/scrape-config.yaml @@ -0,0 +1,41 @@ +apiVersion: monitoring.coreos.com/v1alpha1 +kind: ScrapeConfig +metadata: + name: external-nodes-http + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: external-nodes + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + staticConfigs: + - labels: + job: external-nodes + targets: + - ps08rp.alexlebens.net:9100 + - ps09rp.alexlebens.net:9100 + metricsPath: /metrics + scheme: HTTP + +--- +apiVersion: monitoring.coreos.com/v1alpha1 +kind: ScrapeConfig +metadata: + name: external-nodes-https + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: external-nodes + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + staticConfigs: + - labels: + job: external-nodes + targets: + - node-exporter-ps10rp.boreal-beaufort.ts.net + metricsPath: /metrics + scheme: HTTPS diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/service.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/service.yaml new file mode 100644 index 000000000..aa75196ce --- /dev/null +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/templates/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: node-ps10rp + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: node-ps10rp + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + app.kubernetes.io/component: tailscale + app.kubernetes.io/part-of: {{ .Release.Name }} + annotations: + tailscale.com/tailnet-fqdn: node-exporter-ps10rp.boreal-beaufort.ts.net +spec: + externalName: placeholder + type: ExternalName diff --git a/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml b/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml new file mode 100644 index 000000000..ba7c69608 --- /dev/null +++ b/clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml @@ -0,0 +1,149 @@ +kube-prometheus-stack: + crds: + enabled: false + defaultRules: + create: true + rules: + kubeControllerManager: false + kubeSchedulerAlerting: false + kubeSchedulerRecording: false + global: + rbac: + create: true + createAggregateClusterRoles: true + alertmanager: + enabled: true + config: + route: + group_by: ["namespace", "alertname"] + group_wait: 30s + group_interval: 5m + repeat_interval: 24h + receiver: discord + routes: + - receiver: "null" + matchers: + - alertname = "Watchdog" + - receiver: "pushover" + group_wait: 10s + group_interval: 5m + repeat_interval: 24h + matchers: + - severity = "critical" + receivers: + - name: "null" + - name: discord + discord_configs: + - send_resolved: true + webhook_url_file: /etc/alertmanager/secrets/alertmanager-config-secret/discord_webhook + - name: pushover + pushover_configs: + - send_resolved: true + user_key_file: /etc/alertmanager/secrets/alertmanager-config-secret/user_key + token_file: /etc/alertmanager/secrets/alertmanager-config-secret/pushover_token + alertmanagerSpec: + secrets: + - alertmanager-config-secret + replicas: 1 + grafana: + enabled: false + kubeApiServer: + tlsConfig: + insecureSkipVerify: true + kubeControllerManager: + enabled: false + kubeEtcd: + enabled: true + kubeScheduler: + enabled: false + kubeProxy: + enabled: false + kubeStateMetrics: + enabled: true + nodeExporter: + operatingSystems: + darwin: + enabled: false + prometheusOperator: + admissionWebhooks: + enabled: true + namespaces: + releaseNamespace: true + additional: + - kube-system + - kube-prometheus-stack + - argocd + - argo-workflows + - authentik + - blocky + - cert-manager + - cloudnative-pg + - descheduler + - directus + - external-dns + - freshrss + - generic-device-plugin + - gitea + - grafana + - harbor + - hoarder + - home-assistant + - immich + - jellystat + - komodo + - lidarr2 + - linkwarden + - loki + - matrix-synapse + - ollama + - outline + - photoview + - qbittorrent + - radarr5 + - radarr5-4k + - radarr5-anime + - radarr5-standup + - reloader + - rook-ceph + - roundcube + - slskd + - sonarr4 + - sonarr4-4k + - sonarr4-anime + - speedtest-exporter + - spegel + - stalwart + - tdarr + - traefik + - trivy + - unpoller + - vault + - vaultwarden + - volsync + prometheus: + ingress: + enabled: true + ingressClassName: tailscale + labels: + tailscale.com/proxy-class: no-metrics + hosts: + - prometheus-cl01tl + tls: + - secretName: prometheus-cl01tl + hosts: + - prometheus-cl01tl + prometheusSpec: + scrapeInterval: 30s + retention: 30d + externalUrl: https://prometheus-cl01tl.boreal-beaufort.ts.net + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + scrapeConfigSelectorNilUsesHelmValues: false + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: synology-iscsi-delete + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 200Gi diff --git a/clusters/cl01tl/monitoring/unpoller/Chart.yaml b/clusters/cl01tl/monitoring/unpoller/Chart.yaml new file mode 100644 index 000000000..9807b97ea --- /dev/null +++ b/clusters/cl01tl/monitoring/unpoller/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: unpoller +version: 1.0.0 +description: Unpoller +keywords: + - unpoller + - ubiquiti + - unifi + - metrics +home: https://wiki.alexlebens.dev/doc/unpoller-ZG6iBCZATk +sources: + - https://github.com/unpoller/unpoller + - https://github.com/unpoller/unpoller/pkgs/container/unpoller + - https://github.com/bjw-s/helm-charts/blob/main/charts/other/app-template/values.yaml +maintainers: + - name: alexlebens +dependencies: + - name: app-template + alias: unpoller + repository: https://bjw-s.github.io/helm-charts/ + version: 3.7.1 +icon: https://camo.githubusercontent.com/c5d07a5b3acfeac8e1c25bf56f440ffe032b86e4e7f15de82357f022a43fc927/68747470733a2f2f756e706f6c6c65722e636f6d2f696d672f6c6f676f2e706e67 +appVersion: v2.11.2 diff --git a/clusters/cl01tl/monitoring/unpoller/templates/external-secret.yaml b/clusters/cl01tl/monitoring/unpoller/templates/external-secret.yaml new file mode 100644 index 000000000..2785c2ecf --- /dev/null +++ b/clusters/cl01tl/monitoring/unpoller/templates/external-secret.yaml @@ -0,0 +1,30 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: unpoller-unifi-secret + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: unpoller-unifi-secret + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + app.kubernetes.io/component: web + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + secretStoreRef: + kind: ClusterSecretStore + name: vault + data: + - secretKey: UP_UNIFI_CONTROLLER_0_USER + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /unifi/auth/cl01tl + metadataPolicy: None + property: user + - secretKey: UP_UNIFI_CONTROLLER_0_PASS + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: /unifi/auth/cl01tl + metadataPolicy: None + property: password diff --git a/clusters/cl01tl/monitoring/unpoller/templates/service-monitor.yaml b/clusters/cl01tl/monitoring/unpoller/templates/service-monitor.yaml new file mode 100644 index 000000000..f16f0fe6a --- /dev/null +++ b/clusters/cl01tl/monitoring/unpoller/templates/service-monitor.yaml @@ -0,0 +1,21 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: unpoller + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: unpoller + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: {{ .Release.Name }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: unpoller + app.kubernetes.io/instance: {{ .Release.Name }} + endpoints: + - port: metrics + interval: 30s + scrapeTimeout: 10s + path: /metrics diff --git a/clusters/cl01tl/monitoring/unpoller/values.yaml b/clusters/cl01tl/monitoring/unpoller/values.yaml new file mode 100644 index 000000000..77aa830d5 --- /dev/null +++ b/clusters/cl01tl/monitoring/unpoller/values.yaml @@ -0,0 +1,57 @@ +unpoller: + controllers: + main: + type: deployment + replicas: 1 + strategy: Recreate + revisionHistoryLimit: 3 + containers: + main: + image: + repository: ghcr.io/unpoller/unpoller + tag: v2.14.1 + pullPolicy: IfNotPresent + env: + - name: UP_UNIFI_CONTROLLER_0_SAVE_ALARMS + value: 'false' + - name: UP_UNIFI_CONTROLLER_0_SAVE_ANOMALIES + value: 'false' + - name: UP_UNIFI_CONTROLLER_0_SAVE_DPI + value: 'false' + - name: UP_UNIFI_CONTROLLER_0_SAVE_EVENTS + value: 'false' + - name: UP_UNIFI_CONTROLLER_0_SAVE_IDS + value: 'false' + - name: UP_UNIFI_CONTROLLER_0_SAVE_SITES + value: 'true' + - name: UP_UNIFI_CONTROLLER_0_URL + value: https://unifi.alexlebens.net/ + - name: UP_UNIFI_CONTROLLER_0_VERIFY_SSL + value: 'false' + - name: UP_INFLUXDB_DISABLE + value: 'true' + - name: UP_PROMETHEUS_HTTP_LISTEN + value: 0.0.0.0:9130 + - name: UP_PROMETHEUS_NAMESPACE + value: unpoller + - name: UP_POLLER_DEBUG + value: 'false' + - name: UP_POLLER_QUIET + value: 'false' + envFrom: + - secretRef: + name: unpoller-unifi-secret + resources: + requests: + cpu: 10m + memory: 64Mi + serviceAccount: + create: true + service: + main: + controller: main + ports: + metrics: + port: 9130 + targetPort: 9130 + protocol: TCP