This commit is contained in:
2023-09-26 18:14:36 -06:00
commit fb5a0fc542
443 changed files with 21892 additions and 0 deletions

110
Prometheus/.drone.yml Normal file
View File

@@ -0,0 +1,110 @@
---
kind: pipeline
type: docker
name: deploy
environment:
PACKAGE: Prometheus
URL: prometheus.alexlebens.net alertmanager.alexlebens.net ps03fd.alexlebens.net cadvisor.alexlebens.net
steps:
- name: Tests
image: ps03fd.alexlebens.net:5000/droneimage:latest
commands:
- /scripts/tests.sh
- name: Nofification > Discord | Tests - Failure
image: appleboy/drone-discord
settings:
webhook_id:
from_secret: discord_webhook_id
webhook_token:
from_secret: discord_webhook_token
username: DroneCI - ps03fd
message: Docker compose validation for Prometheus failed.
when:
status:
- failure
- name: Configuration
image: ps03fd.alexlebens.net:5000/droneimage:latest
commands:
- mkdir ~/.ssh/
- echo "$SSH_KEY" > ~/.ssh/id_rsa
- chmod 600 ~/.ssh/id_rsa
- ssh-keyscan -H $HOST_IP >> ~/.ssh/known_hosts
- /scripts/configuration.sh
environment:
SSH_KEY:
from_secret: ssh_key_ps03fd_drone
DOCKER_HOST:
from_secret: docker_host_ps03fd
HOST_IP:
from_secret: host_ip_ps03fd
UPTIMEKUMA_NAME:
from_secret: uptimekuma_name
UPTIMEKUMA_PASSWORD:
from_secret: uptimekuma_password
UPTIMEKUMA_URL:
from_secret: uptimekuma_url
when:
branch:
- main
- name: Nofification > Discord | Configuration - Failure
image: appleboy/drone-discord
settings:
webhook_id:
from_secret: discord_webhook_id
webhook_token:
from_secret: discord_webhook_token
username: DroneCI - ps03fd
message: Configuration for Prometheus failed.
when:
status:
- failure
- name: Deploy
image: ps03fd.alexlebens.net:5000/droneimage:latest
commands:
- mkdir ~/.ssh/
- echo "$SSH_KEY" > ~/.ssh/id_rsa
- chmod 600 ~/.ssh/id_rsa
- ssh-keyscan -H $HOST_IP >> ~/.ssh/known_hosts
- /scripts/deploy.sh
environment:
SSH_KEY:
from_secret: ssh_key_ps03fd_drone
DOCKER_HOST:
from_secret: docker_host_ps03fd
HOST_IP:
from_secret: host_ip_ps03fd
when:
branch:
- main
- name: Nofification > Discord | Deploy - Success
image: appleboy/drone-discord
settings:
webhook_id:
from_secret: discord_webhook_id
webhook_token:
from_secret: discord_webhook_token
username: DroneCI - ps03fd
message: Docker compose deployment for Prometheus succeeded
when:
status:
- sucess
- name: Nofification > Discord | Deploy - Failure
image: appleboy/drone-discord
settings:
webhook_id:
from_secret: discord_webhook_id
webhook_token:
from_secret: discord_webhook_token
username: DroneCI - ps03fd
message: Docker compose deployment for Prometheus failed.
when:
status:
- failure

15
Prometheus/.env Normal file
View File

@@ -0,0 +1,15 @@
UP_INFLUXDB_DISABLE="true"
UP_POLLER_DEBUG="false"
UP_UNIFI_DYNAMIC="false"
UP_PROMETHEUS_HTTP_LISTEN=0.0.0.0:9130
UP_PROMETHEUS_NAMESPACE=unifipoller
UP_UNIFI_CONTROLLER_0_PASS=unifipoller123QAZ
UP_UNIFI_CONTROLLER_0_SAVE_ALARMS="true"
UP_UNIFI_CONTROLLER_0_SAVE_ANOMALIES="true"
UP_UNIFI_CONTROLLER_0_SAVE_DPI="true"
UP_UNIFI_CONTROLLER_0_SAVE_EVENTS="true"
UP_UNIFI_CONTROLLER_0_SAVE_IDS="true"
UP_UNIFI_CONTROLLER_0_SAVE_SITES="true"
UP_UNIFI_CONTROLLER_0_URL=https://unifi.alexlebens.net
UP_UNIFI_CONTROLLER_0_USER=unifipoller
TZ=America/Denver

View File

@@ -0,0 +1,38 @@
groups:
- name: service
rules:
- alert: service_down
expr: up == 0
for: 10m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.job }} down"
description: "{{ $labels.job }} has been down for more than 10 minutes."
- name: infrastructure
rules:
- alert: high_load
expr: node_load1 > 8
for: 10m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.job }} under high load"
description: "{{ $labels.job }} is under high load."
- name: disk_space
rules:
- alert: disk_will_fill
expr: predict_linear(node_filesystem_free_bytes{job="host-ps03fd"}[4h], 4 * 3600) < 0
for: 5m
labels:
severity: critical
- alert: disk_10_percent_free
expr: node_exporter:node_filesystem_free_bytes:fs_used_percents >= 90
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} is low on disk space"
description: "{{ $labels.instance }} has only {{ $value }}% free."

View File

@@ -0,0 +1,53 @@
route:
receiver: email-self
group_by: [ alertname, service ]
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
routes:
- matchers:
- service="infrastructure"
receiver: email-self
routes:
- matchers:
- severity="critical"
receiver: 'pushover-self'
- matchers:
- service="backend"
receiver: email-self
routes:
- matchers:
- severity="critical"
receiver: 'pushover-self'
- matchers:
- service="frontend"
receiver: email-self
routes:
- matchers:
- severity="critical"
receiver: 'pushover-self'
inhibit_rules:
- source_matchers: [ severity="critical" ]
target_matchers: [ severity="warning" ]
equal: [ alertname, service ]
receivers:
- name: pushover-self
pushover_configs:
- token: agy1n4b4p8chrgatsusxxub2se9x7q
user_key: u15he1cutrs9szvdz48tgypgachkh5
- name: email-self
email_configs:
- to: alexanderlebens@gmail.com
from: alexanderlebens@gmail.com
smarthost: smtp.gmail.com:587
auth_username: "alexanderlebens@gmail.com"
auth_identity: "alexanderlebens@gmail.com"
auth_password: "rcqzctyvzzctoesz"

View File

@@ -0,0 +1,313 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "/alertmanager/alert.rules.yml"
alerting:
alertmanagers:
- scheme: https
static_configs:
- targets:
- "alertmanager.alexlebens.net"
scrape_configs:
- job_name: "prometheus"
scrape_interval: 5s
static_configs:
- targets: ["localhost:9090"]
labels:
service: backend
- job_name: "alertmanager"
scheme: http
static_configs:
- targets: ["prometheus_alertmanager:9093"]
labels:
service: backend
- job_name: "docker-host"
scrape_interval: 5s
static_configs:
- targets: ["alexlebens.net:9323"]
labels:
service: infrastructure
- job_name: "docker-containers"
docker_sd_configs:
- host: unix:///var/run/docker.sock
relabel_configs:
- source_labels: [__meta_docker_container_label_prometheus_job]
regex: .+
action: keep
- regex: __meta_docker_container_label_prometheus_(.+)
action: labelmap
replacement: $1
- job_name: "windows-vd01wd"
scrape_interval: 5s
static_configs:
- targets: ["pd04wd.alexlebens.net:9182"]
labels:
service: host
- job_name: "windows-vd01wd-gpu"
scrape_interval: 5s
static_configs:
- targets: ["pd04wd.alexlebens.net:9835"]
labels:
service: host
- job_name: "host-ps03fd"
scrape_interval: 5s
scheme: http
static_configs:
- targets: ["prometheus_node_exporter:9100"]
labels:
service: host
- job_name: "folders-ps03fd"
scrape_interval: 5m
scrape_timeout: 60s
scheme: http
static_configs:
- targets: ["ps03fd.alexlebens.net:9974"]
labels:
service: host
basic_auth:
username: ""
password: "SimplePassword"
- job_name: "synology"
scrape_interval: 5s
static_configs:
- targets: ["synology.alexlebens.net:9100"]
labels:
service: device
- job_name: "pikvm"
metrics_path: "/api/export/prometheus/metrics"
basic_auth:
username: admin
password: 35etdgcb#%ETDGCB
scheme: https
static_configs:
- targets: ["pikvm.alexlebens.net"]
labels:
service: device
tls_config:
insecure_skip_verify: true # For self-signed certificate
- job_name: "synology-snmp"
scrape_interval: 300s
scrape_timeout: 240s
scheme: http
static_configs:
- targets: ["synology.alexlebens.net"]
labels:
service: device
metrics_path: /snmp
params:
module: [synology]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: prometheus_snmp_exporter:9116
- job_name: "airgradient-br"
metrics_path: /metrics
scrape_interval: 30s
static_configs:
- targets: ["airgradientbr.alexlebens.net:9926"]
labels:
service: device
- job_name: "traefik"
scrape_interval: 5s
scheme: https
static_configs:
- targets: ["traefik.alexlebens.net"]
labels:
service: backend
- job_name: "pihole"
scrape_interval: 30s
scheme: http
static_configs:
- targets: ["pihole_exporter:9617"]
labels:
service: backend
- job_name: "coredns"
scheme: http
static_configs:
- targets: ["coredns:9153"]
labels:
service: backend
- job_name: "cadvisor"
scheme: http
scrape_interval: 30s
scrape_timeout: 20s
static_configs:
- targets: ["prometheus_cadvisor:8080"]
labels:
service: infrastructure
- job_name: "unpoller"
scheme: http
static_configs:
- targets: ["prometheus_unpoller:9130"]
labels:
service: infrastructure
- job_name: "grafana"
scheme: http
static_configs:
- targets: ["grafana:3000"]
labels:
service: backend
- job_name: "loki"
scheme: http
static_configs:
- targets: ["loki:3100"]
labels:
service: backend
- job_name: "promtail"
scheme: http
static_configs:
- targets: ["loki_promtail:9080"]
labels:
service: backend
- job_name: "watchtower"
scheme: http
metrics_path: /v1/metrics
bearer_token: token
static_configs:
- targets: ["watchtower:8080"]
labels:
service: backend
- job_name: "qbittorrent"
scheme: http
static_configs:
- targets: ["qbittorrent_exporter:8000"]
labels:
service: frontend
- job_name: "speedtest"
scheme: http
scrape_interval: 1h
scrape_timeout: 1m
static_configs:
- targets: ["speedtest:9798"]
labels:
service: infrastructure
- job_name: "authentik"
scheme: http
static_configs:
- targets: ["authentik_server:9300"]
labels:
service: infrastructure
- job_name: "uptimekuma"
scheme: http
static_configs:
- targets: ["uptimekuma:3001"]
labels:
service: backend
basic_auth:
username: admin
password: "24wrsfxv@$WRSFXV"
- job_name: "plex"
scheme: http
static_configs:
- targets: ["plex_exporter:9594"]
labels:
service: frontend
- job_name: "radarr"
scheme: http
static_configs:
- targets: ["radarr_exporter:9708"]
labels:
service: frontend
- job_name: "radarr_anime"
scheme: http
static_configs:
- targets: ["radarr_anime_exporter:9708"]
labels:
service: frontend
- job_name: "radarr_art"
scheme: http
static_configs:
- targets: ["radarr_art_exporter:9708"]
labels:
service: frontend
- job_name: "radarr_documentaries"
scheme: http
static_configs:
- targets: ["radarr_documentaries_exporter:9708"]
labels:
service: frontend
- job_name: "radarr_standup"
scheme: http
static_configs:
- targets: ["radarr_standup_exporter:9708"]
labels:
service: frontend
- job_name: "sonarr"
scheme: http
static_configs:
- targets: ["sonarr_exporter:9707"]
labels:
service: frontend
- job_name: "sonarr_anime_exporter"
scheme: http
static_configs:
- targets: ["sonarr_anime_exporter:9707"]
labels:
service: frontend
- job_name: "sonarr_documentaries_exporter"
scheme: http
static_configs:
- targets: ["sonarr_anime_exporter:9707"]
labels:
service: frontend
- job_name: "lidarr"
scheme: http
static_configs:
- targets: ["lidarr_exporter:9709"]
labels:
service: frontend
- job_name: "readarr_books"
scheme: http
static_configs:
- targets: ["readarr_books_exporter:9708"]
labels:
service: frontend
- job_name: "readarr_audio"
scheme: http
static_configs:
- targets: ["readarr_audio_exporter:9708"]
labels:
service: frontend

File diff suppressed because it is too large Load Diff

3
Prometheus/README.md Normal file
View File

@@ -0,0 +1,3 @@
# Prometheus
[![Build Status](https://drone.alexlebens.net/api/badges/alexlebens/Prometheus/status.svg)](https://drone.alexlebens.net/alexlebens/Prometheus)

View File

@@ -0,0 +1,17 @@
{
"type": "docker",
"name": "Prometheus - Docker",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://prometheus.alexlebens.net",
"accepted_statuscodes": [
"200-299"
],
"docker_container": "prometheus",
"docker_host": 1
}

View File

@@ -0,0 +1,17 @@
{
"type": "docker",
"name": "Prometheus Alertmanager - Docker",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://alertmanager.alexlebens.net",
"accepted_statuscodes": [
"200-299"
],
"docker_container": "prometheus_alertmanager",
"docker_host": 1
}

View File

@@ -0,0 +1,17 @@
{
"type": "docker",
"name": "Prometheus Node Exporter - Docker",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://ps03fd.alexlebens.net",
"accepted_statuscodes": [
"200-299"
],
"docker_container": "prometheus_node_exporter",
"docker_host": 1
}

View File

@@ -0,0 +1,17 @@
{
"type": "docker",
"name": "Prometheus SNMP - Docker",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://snmp.alexlebens.net",
"accepted_statuscodes": [
"200-299"
],
"docker_container": "prometheus_snmp_exporter",
"docker_host": 1
}

View File

@@ -0,0 +1,17 @@
{
"type": "docker",
"name": "Prometheus cAdvisor - Docker",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://cadvisor.alexlebens.net",
"accepted_statuscodes": [
"200-299"
],
"docker_container": "prometheus_cadvisor",
"docker_host": 1
}

View File

@@ -0,0 +1,17 @@
{
"type": "docker",
"name": "Prometheus Unpoller - Docker",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://unpoller.alexlebens.net",
"accepted_statuscodes": [
"200-299"
],
"docker_container": "prometheus_unpoller",
"docker_host": 1
}

View File

@@ -0,0 +1,15 @@
{
"type": "http",
"name": "Prometheus - Web",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://prometheus.alexlebens.net",
"accepted_statuscodes": [
"200-299"
]
}

View File

@@ -0,0 +1,15 @@
{
"type": "http",
"name": "Prometheus Alertmanager - Web",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://alertmanager.alexlebens.net",
"accepted_statuscodes": [
"200-299"
]
}

View File

@@ -0,0 +1,15 @@
{
"type": "http",
"name": "Prometheus cAdvisor - Web",
"interval": 60,
"retryInterval": 20,
"maxretries": 1,
"notificationIDList": [
3,
4
],
"url": "https://cadvisor.alexlebens.net",
"accepted_statuscodes": [
"200-299"
]
}

View File

@@ -0,0 +1,179 @@
services:
prometheus:
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --web.console.libraries=/usr/share/prometheus/console_libraries
- --web.console.templates=/usr/share/prometheus/consoles
- --web.enable-lifecycle
container_name: prometheus
image: prom/prometheus:latest
labels:
traefik.docker.network: traefik
traefik.enable: true
traefik.http.routers.prometheus.entrypoints: websecure
traefik.http.routers.prometheus.rule: Host(`prometheus.alexlebens.net`)
traefik.http.routers.prometheus.service: prometheus
traefik.http.routers.prometheus.middlewares: authentik@file
traefik.http.services.prometheus.loadbalancer.server.port: 9090
logging:
driver: json-file
options:
max-size: 50m
max-file: "3"
networks:
traefik: null
privileged: true
restart: always
user: root
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- prometheus_config:/etc/prometheus
- prometheus_data:/prometheus
- alertmanager:/alertmanager
alertmanager:
command:
- --config.file=/alertmanager/alertmanager.yml
container_name: prometheus_alertmanager
depends_on:
- prometheus
image: prom/alertmanager:latest
labels:
traefik.docker.network: traefik
traefik.enable: true
traefik.http.routers.alertmanager.entrypoints: websecure
traefik.http.routers.alertmanager.rule: Host(`alertmanager.alexlebens.net`)
traefik.http.routers.alertmanager.service: alertmanager
traefik.http.routers.alertmanager.middlewares: authentik@file
traefik.http.services.alertmanager.loadbalancer.server.port: 9093
logging:
driver: json-file
options:
max-size: 50m
max-file: "3"
networks:
traefik: null
privileged: true
restart: always
user: root
volumes:
- alertmanager:/alertmanager
node_exporter:
container_name: prometheus_node_exporter
image: prom/node-exporter:latest
depends_on:
- alertmanager
logging:
driver: json-file
options:
max-size: 50m
max-file: "3"
networks:
traefik: null
privileged: true
restart: always
volumes:
- /host:/host
- /var/lib/docker/volumes/partition:/partition
snmp_exporter:
command:
- --config.file=/etc/snmp_exporter/snmp.yml
container_name: prometheus_snmp_exporter
depends_on:
- node_exporter
dns:
- 192.168.1.15
image: prom/snmp-exporter:latest
logging:
driver: json-file
options:
max-size: 50m
max-file: "3"
networks:
traefik: null
restart: unless-stopped
volumes:
- snmp_exporter:/etc/snmp_exporter
cadvisor:
container_name: prometheus_cadvisor
image: gcr.io/cadvisor/cadvisor:v0.46.0
depends_on:
- snmp_exporter
labels:
traefik.docker.network: traefik
traefik.enable: true
traefik.http.routers.cadvisor.entrypoints: websecure
traefik.http.routers.cadvisor.rule: Host(`cadvisor.alexlebens.net`)
traefik.http.routers.cadvisor.service: cadvisor
traefik.http.routers.cadvisor.middlewares: authentik@file
traefik.http.services.cadvisor.loadbalancer.server.port: 8080
logging:
driver: json-file
options:
max-size: 50m
max-file: "3"
networks:
traefik: null
privileged: true
restart: always
volumes:
- /cgroup:/cgroup:ro
- /dev/disk/:/dev/disk:ro
- /:/rootfs:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /var/run:/var/run:ro
unpoller:
container_name: prometheus_unpoller
depends_on:
- cadvisor
env_file:
- .env
image: golift/unifi-poller:latest
logging:
driver: json-file
options:
max-size: 50m
max-file: "3"
networks:
traefik: null
restart: always
networks:
traefik:
name: traefik
external: true
volumes:
prometheus_config:
driver: local
driver_opts:
type: none
o: bind
device: /var/lib/docker/volumes/partition/prometheus_config
prometheus_data:
driver: local
driver_opts:
type: none
o: bind
device: /var/lib/docker/volumes/partition/prometheus
alertmanager:
driver: local
driver_opts:
type: none
o: bind
device: /var/lib/docker/volumes/partition/prometheus_alertmanager
snmp_exporter:
driver: local
driver_opts:
type: none
o: bind
device: /var/lib/docker/volumes/partition/prometheus_snmp_exporter