add monitoring
This commit is contained in:
23
clusters/cl01tl/monitoring/kube-prometheus-stack/Chart.yaml
Normal file
23
clusters/cl01tl/monitoring/kube-prometheus-stack/Chart.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
apiVersion: v2
|
||||
name: kube-prometheus-stack
|
||||
version: 1.0.0
|
||||
description: Kube Prometheus Stack
|
||||
keywords:
|
||||
- kube-prometheus-stack
|
||||
- prometheus
|
||||
- alertmanager
|
||||
- metrics
|
||||
- alerts
|
||||
- kubernetes
|
||||
home: https://wiki.alexlebens.dev/doc/kube-prometheus-stack-pPGJlzAqur
|
||||
sources:
|
||||
- https://github.com/prometheus/prometheus
|
||||
- https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
|
||||
maintainers:
|
||||
- name: alexlebens
|
||||
dependencies:
|
||||
- name: kube-prometheus-stack
|
||||
version: 69.6.0
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
icon: https://raw.githubusercontent.com/walkxcode/dashboard-icons/main/png/prometheus.png
|
||||
appVersion: v0.79.2
|
@@ -0,0 +1,37 @@
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: alertmanager-config-secret
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ .Release.Name }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion }}
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: {{ .Release.Name }}
|
||||
spec:
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: vault
|
||||
data:
|
||||
- secretKey: discord_webhook
|
||||
remoteRef:
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
key: /discord/webhook/alertmanager
|
||||
metadataPolicy: None
|
||||
property: webhook
|
||||
- secretKey: pushover_token
|
||||
remoteRef:
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
key: /pushover/key
|
||||
metadataPolicy: None
|
||||
property: alertmanager_key
|
||||
- secretKey: user_key
|
||||
remoteRef:
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
key: /pushover/key
|
||||
metadataPolicy: None
|
||||
property: user_key
|
@@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: kube-prometheus-stack
|
||||
labels:
|
||||
pod-security.kubernetes.io/audit: privileged
|
||||
pod-security.kubernetes.io/enforce: privileged
|
||||
pod-security.kubernetes.io/warn: privileged
|
@@ -0,0 +1,41 @@
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: external-nodes-http
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: external-nodes
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion }}
|
||||
app.kubernetes.io/component: metrics
|
||||
app.kubernetes.io/part-of: {{ .Release.Name }}
|
||||
spec:
|
||||
staticConfigs:
|
||||
- labels:
|
||||
job: external-nodes
|
||||
targets:
|
||||
- ps08rp.alexlebens.net:9100
|
||||
- ps09rp.alexlebens.net:9100
|
||||
metricsPath: /metrics
|
||||
scheme: HTTP
|
||||
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: external-nodes-https
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: external-nodes
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion }}
|
||||
app.kubernetes.io/component: metrics
|
||||
app.kubernetes.io/part-of: {{ .Release.Name }}
|
||||
spec:
|
||||
staticConfigs:
|
||||
- labels:
|
||||
job: external-nodes
|
||||
targets:
|
||||
- node-exporter-ps10rp.boreal-beaufort.ts.net
|
||||
metricsPath: /metrics
|
||||
scheme: HTTPS
|
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: node-ps10rp
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: node-ps10rp
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion }}
|
||||
app.kubernetes.io/component: tailscale
|
||||
app.kubernetes.io/part-of: {{ .Release.Name }}
|
||||
annotations:
|
||||
tailscale.com/tailnet-fqdn: node-exporter-ps10rp.boreal-beaufort.ts.net
|
||||
spec:
|
||||
externalName: placeholder
|
||||
type: ExternalName
|
149
clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml
Normal file
149
clusters/cl01tl/monitoring/kube-prometheus-stack/values.yaml
Normal file
@@ -0,0 +1,149 @@
|
||||
kube-prometheus-stack:
|
||||
crds:
|
||||
enabled: false
|
||||
defaultRules:
|
||||
create: true
|
||||
rules:
|
||||
kubeControllerManager: false
|
||||
kubeSchedulerAlerting: false
|
||||
kubeSchedulerRecording: false
|
||||
global:
|
||||
rbac:
|
||||
create: true
|
||||
createAggregateClusterRoles: true
|
||||
alertmanager:
|
||||
enabled: true
|
||||
config:
|
||||
route:
|
||||
group_by: ["namespace", "alertname"]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 24h
|
||||
receiver: discord
|
||||
routes:
|
||||
- receiver: "null"
|
||||
matchers:
|
||||
- alertname = "Watchdog"
|
||||
- receiver: "pushover"
|
||||
group_wait: 10s
|
||||
group_interval: 5m
|
||||
repeat_interval: 24h
|
||||
matchers:
|
||||
- severity = "critical"
|
||||
receivers:
|
||||
- name: "null"
|
||||
- name: discord
|
||||
discord_configs:
|
||||
- send_resolved: true
|
||||
webhook_url_file: /etc/alertmanager/secrets/alertmanager-config-secret/discord_webhook
|
||||
- name: pushover
|
||||
pushover_configs:
|
||||
- send_resolved: true
|
||||
user_key_file: /etc/alertmanager/secrets/alertmanager-config-secret/user_key
|
||||
token_file: /etc/alertmanager/secrets/alertmanager-config-secret/pushover_token
|
||||
alertmanagerSpec:
|
||||
secrets:
|
||||
- alertmanager-config-secret
|
||||
replicas: 1
|
||||
grafana:
|
||||
enabled: false
|
||||
kubeApiServer:
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
kubeControllerManager:
|
||||
enabled: false
|
||||
kubeEtcd:
|
||||
enabled: true
|
||||
kubeScheduler:
|
||||
enabled: false
|
||||
kubeProxy:
|
||||
enabled: false
|
||||
kubeStateMetrics:
|
||||
enabled: true
|
||||
nodeExporter:
|
||||
operatingSystems:
|
||||
darwin:
|
||||
enabled: false
|
||||
prometheusOperator:
|
||||
admissionWebhooks:
|
||||
enabled: true
|
||||
namespaces:
|
||||
releaseNamespace: true
|
||||
additional:
|
||||
- kube-system
|
||||
- kube-prometheus-stack
|
||||
- argocd
|
||||
- argo-workflows
|
||||
- authentik
|
||||
- blocky
|
||||
- cert-manager
|
||||
- cloudnative-pg
|
||||
- descheduler
|
||||
- directus
|
||||
- external-dns
|
||||
- freshrss
|
||||
- generic-device-plugin
|
||||
- gitea
|
||||
- grafana
|
||||
- harbor
|
||||
- hoarder
|
||||
- home-assistant
|
||||
- immich
|
||||
- jellystat
|
||||
- komodo
|
||||
- lidarr2
|
||||
- linkwarden
|
||||
- loki
|
||||
- matrix-synapse
|
||||
- ollama
|
||||
- outline
|
||||
- photoview
|
||||
- qbittorrent
|
||||
- radarr5
|
||||
- radarr5-4k
|
||||
- radarr5-anime
|
||||
- radarr5-standup
|
||||
- reloader
|
||||
- rook-ceph
|
||||
- roundcube
|
||||
- slskd
|
||||
- sonarr4
|
||||
- sonarr4-4k
|
||||
- sonarr4-anime
|
||||
- speedtest-exporter
|
||||
- spegel
|
||||
- stalwart
|
||||
- tdarr
|
||||
- traefik
|
||||
- trivy
|
||||
- unpoller
|
||||
- vault
|
||||
- vaultwarden
|
||||
- volsync
|
||||
prometheus:
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: tailscale
|
||||
labels:
|
||||
tailscale.com/proxy-class: no-metrics
|
||||
hosts:
|
||||
- prometheus-cl01tl
|
||||
tls:
|
||||
- secretName: prometheus-cl01tl
|
||||
hosts:
|
||||
- prometheus-cl01tl
|
||||
prometheusSpec:
|
||||
scrapeInterval: 30s
|
||||
retention: 30d
|
||||
externalUrl: https://prometheus-cl01tl.boreal-beaufort.ts.net
|
||||
serviceMonitorSelectorNilUsesHelmValues: false
|
||||
podMonitorSelectorNilUsesHelmValues: false
|
||||
scrapeConfigSelectorNilUsesHelmValues: false
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: synology-iscsi-delete
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 200Gi
|
Reference in New Issue
Block a user