Merge branch 'monitoring' of nemo/nebula into master
This commit is contained in:
commit
c035808849
|
@ -89,6 +89,7 @@ Issues I've faced/reported as a result of this project:
|
|||
6. `ubooquity` docker container doesn't let you set admin password: https://github.com/linuxserver/docker-ubooquity/issues/17. (Couldn't reproduce, closed) :white_check_mark:
|
||||
7. Traefik customresponseheaders can't contain colons on the docker backend: https://github.com/containous/traefik/issues/2517. Fixed with https://github.com/containous/traefik/pull/2509 :white_check_mark:
|
||||
8. Traefik Security headers don't overwrite upstream headers: https://github.com/containous/traefik/issues/2618
|
||||
9. Transmission exporter broke with different data types while unmarshalling JSON in go. I filed a PR https://github.com/metalmatze/transmission-exporter/pull/2
|
||||
|
||||
# Plumbing
|
||||
|
||||
|
|
6
main.tf
6
main.tf
|
@ -28,3 +28,9 @@ module "radicale" {
|
|||
source = "radicale"
|
||||
domain = "radicale.bb8.fun"
|
||||
}
|
||||
|
||||
module "monitoring" {
|
||||
source = "monitoring"
|
||||
gf-security-admin-password = "${var.gf-security-admin-password}"
|
||||
domain = "bb8.fun"
|
||||
}
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
# Alert for any instance that is unreachable for >1 minute.
|
||||
ALERT InstanceDown
|
||||
IF up == 0
|
||||
FOR 1m
|
||||
LABELS { severity = "page" }
|
||||
ANNOTATIONS {
|
||||
summary = "Instance {{ $labels.instance }} down",
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
|
||||
}
|
||||
|
||||
# Alert for any instance that is under high load for >1 minute.
|
||||
ALERT HighLoad
|
||||
IF node_load1 > 0.85
|
||||
FOR 1m
|
||||
LABELS { severity = "page" }
|
||||
ANNOTATIONS {
|
||||
summary = "Instance {{ $labels.instance }} is under high load",
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
route:
|
||||
receiver: "slack"
|
||||
|
||||
receivers:
|
||||
- name: "slack"
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
username: "ALERT_SLACK_USERNAME"
|
||||
channel: "ALERT_SLACK_CHANNEL"
|
||||
api_url: "ALERT_SLACK_INCOMING_WEBHOOK_URL"
|
|
@ -0,0 +1,27 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
external_labels:
|
||||
monitor: "docker-monitor"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "prometheus"
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
|
||||
- job_name: "node"
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ["nodeexporter:9100"]
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
|
||||
- job_name: 'transmission'
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['transmission-exporter:19091']
|
||||
|
||||
rule_files:
|
||||
- "alert.rules"
|
|
@ -0,0 +1,19 @@
|
|||
data "docker_registry_image" "grafana" {
|
||||
name = "grafana/grafana"
|
||||
}
|
||||
|
||||
data "docker_registry_image" "prometheus" {
|
||||
name = "prom/prometheus"
|
||||
}
|
||||
|
||||
# data "docker_registry_image" "alertmanager" {
|
||||
# name = "prom/alertmanager"
|
||||
# }
|
||||
|
||||
data "docker_registry_image" "nodeexporter" {
|
||||
name = "prom/node-exporter"
|
||||
}
|
||||
|
||||
data "docker_registry_image" "transmission-exporter" {
|
||||
name = "metalmatze/transmission-exporter"
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
resource "docker_image" "grafana" {
|
||||
name = "${data.docker_registry_image.grafana.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.grafana.sha256_digest}"]
|
||||
}
|
||||
|
||||
resource "docker_image" "prometheus" {
|
||||
name = "${data.docker_registry_image.prometheus.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.prometheus.sha256_digest}"]
|
||||
}
|
||||
|
||||
resource "docker_image" "nodeexporter" {
|
||||
name = "${data.docker_registry_image.nodeexporter.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.nodeexporter.sha256_digest}"]
|
||||
}
|
||||
|
||||
resource "docker_image" "transmission-exporter" {
|
||||
name = "${data.docker_registry_image.transmission-exporter.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.transmission-exporter.sha256_digest}"]
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
resource docker_container "grafana" {
|
||||
name = "grafana"
|
||||
image = "${docker_image.grafana.latest}"
|
||||
|
||||
labels {
|
||||
# "traefik.frontend.auth.basic" = "${var.basic_auth}"
|
||||
"traefik.port" = 3000
|
||||
"traefik.enable" = "true"
|
||||
"traefik.frontend.headers.SSLTemporaryRedirect" = "true"
|
||||
"traefik.frontend.headers.STSSeconds" = "2592000"
|
||||
"traefik.frontend.headers.STSIncludeSubdomains" = "false"
|
||||
"traefik.frontend.headers.contentTypeNosniff" = "true"
|
||||
"traefik.frontend.headers.browserXSSFilter" = "true"
|
||||
|
||||
# "traefik.frontend.headers.customResponseHeaders" = "${var.xpoweredby}"
|
||||
# "traefik.frontend.headers.customFrameOptionsValue" = "${var.xfo_allow}"
|
||||
}
|
||||
|
||||
volumes {
|
||||
host_path = "/mnt/xwing/data/grafana"
|
||||
container_path = "/var/lib/grafana"
|
||||
}
|
||||
|
||||
links = ["prometheus"]
|
||||
|
||||
env = [
|
||||
"GF_SECURITY_ADMIN_PASSWORD=${var.gf-security-admin-password}",
|
||||
"GF_SERVER_ROOT_URL=https://grafana.${var.domain}",
|
||||
]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
||||
|
||||
resource docker_container "prometheus" {
|
||||
name = "prometheus"
|
||||
image = "${docker_image.prometheus.latest}"
|
||||
|
||||
# prometheus:prometheus
|
||||
user = "985:983"
|
||||
|
||||
command = ["--config.file=/etc/prometheus/prometheus.yml"]
|
||||
|
||||
volumes {
|
||||
host_path = "/mnt/xwing/data/prometheus"
|
||||
container_path = "/prometheus"
|
||||
}
|
||||
|
||||
upload {
|
||||
content = "${file("${path.module}/config/prometheus.yml")}"
|
||||
file = "/etc/prometheus/prometheus.yml"
|
||||
}
|
||||
|
||||
links = ["nodeexporter", "cadvisor"]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
||||
|
||||
resource docker_container "nodeexporter" {
|
||||
name = "nodeexporter"
|
||||
image = "${docker_image.nodeexporter.latest}"
|
||||
|
||||
volumes {
|
||||
host_path = "/proc"
|
||||
container_path = "/host/proc"
|
||||
}
|
||||
|
||||
volumes {
|
||||
host_path = "/sys"
|
||||
container_path = "/host/sys"
|
||||
}
|
||||
|
||||
volumes {
|
||||
host_path = "/"
|
||||
container_path = "/rootfs"
|
||||
read_only = true
|
||||
}
|
||||
|
||||
command = [
|
||||
"--path.procfs=/host/proc",
|
||||
"--path.sysfs=/host/sys",
|
||||
"--collector.filesystem.ignored-mount-points=\"^/(sys|proc|dev|host|etc)($$|/)\"",
|
||||
]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
# Transmission Exporter for prometheus
|
||||
# https://github.com/metalmatze/transmission-exporter
|
||||
resource docker_container "transmission-exporter" {
|
||||
name = "transmission-exporter"
|
||||
image = "${docker_image.transmission-exporter.latest}"
|
||||
|
||||
links = ["transmission"]
|
||||
|
||||
env = [
|
||||
"TRANSMISSION_ADDR=http://transmission:9091"
|
||||
]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
variable "gf-security-admin-password" {
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "domain" {
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "alert-slack-username" {
|
||||
default = "Prometheus"
|
||||
}
|
||||
|
||||
variable "alert-slack-channel" {
|
||||
default = "#notifications"
|
||||
}
|
||||
|
||||
variable "alert-slack-incoming-webhook" {
|
||||
default = "https://hooks.slack.com/whatever"
|
||||
}
|
|
@ -32,3 +32,7 @@ variable "ips" {
|
|||
static = "139.59.48.222"
|
||||
}
|
||||
}
|
||||
|
||||
variable "gf-security-admin-password" {
|
||||
type = "string"
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue