diff --git a/README.md b/README.md index 1004586..32af9d3 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ Issues I've faced/reported as a result of this project: 6. `ubooquity` docker container doesn't let you set admin password: https://github.com/linuxserver/docker-ubooquity/issues/17. (Couldn't reproduce, closed) :white_check_mark: 7. Traefik customresponseheaders can't contain colons on the docker backend: https://github.com/containous/traefik/issues/2517. Fixed with https://github.com/containous/traefik/pull/2509 :white_check_mark: 8. Traefik Security headers don't overwrite upstream headers: https://github.com/containous/traefik/issues/2618 +9. Transmission exporter broke with different data types while unmarshalling JSON in go. I filed a PR https://github.com/metalmatze/transmission-exporter/pull/2 # Plumbing diff --git a/main.tf b/main.tf index 9ad5848..d94433b 100644 --- a/main.tf +++ b/main.tf @@ -28,3 +28,9 @@ module "radicale" { source = "radicale" domain = "radicale.bb8.fun" } + +module "monitoring" { + source = "monitoring" + gf-security-admin-password = "${var.gf-security-admin-password}" + domain = "bb8.fun" +} diff --git a/monitoring/config/alert.rules b/monitoring/config/alert.rules new file mode 100644 index 0000000..05be2cc --- /dev/null +++ b/monitoring/config/alert.rules @@ -0,0 +1,19 @@ +# Alert for any instance that is unreachable for >1 minute. +ALERT InstanceDown + IF up == 0 + FOR 1m + LABELS { severity = "page" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} down", + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.", + } + +# Alert for any instance that is under high load for >1 minute. +ALERT HighLoad + IF node_load1 > 0.85 + FOR 1m + LABELS { severity = "page" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} is under high load", + description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.", +} diff --git a/monitoring/config/alertmanager.template.yml b/monitoring/config/alertmanager.template.yml new file mode 100644 index 0000000..b8ee17c --- /dev/null +++ b/monitoring/config/alertmanager.template.yml @@ -0,0 +1,10 @@ +route: + receiver: "slack" + +receivers: + - name: "slack" + slack_configs: + - send_resolved: true + username: "ALERT_SLACK_USERNAME" + channel: "ALERT_SLACK_CHANNEL" +api_url: "ALERT_SLACK_INCOMING_WEBHOOK_URL" diff --git a/monitoring/config/prometheus.yml b/monitoring/config/prometheus.yml new file mode 100644 index 0000000..5cfe42a --- /dev/null +++ b/monitoring/config/prometheus.yml @@ -0,0 +1,27 @@ +global: + scrape_interval: 15s + external_labels: + monitor: "docker-monitor" + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: "node" + scrape_interval: 5s + static_configs: + - targets: ["nodeexporter:9100"] + + - job_name: 'cadvisor' + scrape_interval: 5s + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'transmission' + scrape_interval: 5s + static_configs: + - targets: ['transmission-exporter:19091'] + +rule_files: +- "alert.rules" diff --git a/monitoring/data.tf b/monitoring/data.tf new file mode 100644 index 0000000..344dcc3 --- /dev/null +++ b/monitoring/data.tf @@ -0,0 +1,19 @@ +data "docker_registry_image" "grafana" { + name = "grafana/grafana" +} + +data "docker_registry_image" "prometheus" { + name = "prom/prometheus" +} + +# data "docker_registry_image" "alertmanager" { +# name = "prom/alertmanager" +# } + +data "docker_registry_image" "nodeexporter" { + name = "prom/node-exporter" +} + +data "docker_registry_image" "transmission-exporter" { + name = "metalmatze/transmission-exporter" +} diff --git a/monitoring/images.tf b/monitoring/images.tf new file mode 100644 index 0000000..46b903d --- /dev/null +++ b/monitoring/images.tf @@ -0,0 +1,19 @@ +resource "docker_image" "grafana" { + name = "${data.docker_registry_image.grafana.name}" + pull_triggers = ["${data.docker_registry_image.grafana.sha256_digest}"] +} + +resource "docker_image" "prometheus" { + name = "${data.docker_registry_image.prometheus.name}" + pull_triggers = ["${data.docker_registry_image.prometheus.sha256_digest}"] +} + +resource "docker_image" "nodeexporter" { + name = "${data.docker_registry_image.nodeexporter.name}" + pull_triggers = ["${data.docker_registry_image.nodeexporter.sha256_digest}"] +} + +resource "docker_image" "transmission-exporter" { + name = "${data.docker_registry_image.transmission-exporter.name}" + pull_triggers = ["${data.docker_registry_image.transmission-exporter.sha256_digest}"] +} diff --git a/monitoring/main.tf b/monitoring/main.tf new file mode 100644 index 0000000..9ed8ac5 --- /dev/null +++ b/monitoring/main.tf @@ -0,0 +1,91 @@ +resource docker_container "grafana" { + name = "grafana" + image = "${docker_image.grafana.latest}" + + labels { + # "traefik.frontend.auth.basic" = "${var.basic_auth}" + "traefik.port" = 3000 + "traefik.enable" = "true" + "traefik.frontend.headers.SSLTemporaryRedirect" = "true" + "traefik.frontend.headers.STSSeconds" = "2592000" + "traefik.frontend.headers.STSIncludeSubdomains" = "false" + "traefik.frontend.headers.contentTypeNosniff" = "true" + "traefik.frontend.headers.browserXSSFilter" = "true" + + # "traefik.frontend.headers.customResponseHeaders" = "${var.xpoweredby}" + # "traefik.frontend.headers.customFrameOptionsValue" = "${var.xfo_allow}" + } + + volumes { + host_path = "/mnt/xwing/data/grafana" + container_path = "/var/lib/grafana" + } + + links = ["prometheus"] + + env = [ + "GF_SECURITY_ADMIN_PASSWORD=${var.gf-security-admin-password}", + "GF_SERVER_ROOT_URL=https://grafana.${var.domain}", + ] + + restart = "unless-stopped" + destroy_grace_seconds = 10 + must_run = true +} + +resource docker_container "prometheus" { + name = "prometheus" + image = "${docker_image.prometheus.latest}" + + # prometheus:prometheus + user = "985:983" + + command = ["--config.file=/etc/prometheus/prometheus.yml"] + + volumes { + host_path = "/mnt/xwing/data/prometheus" + container_path = "/prometheus" + } + + upload { + content = "${file("${path.module}/config/prometheus.yml")}" + file = "/etc/prometheus/prometheus.yml" + } + + links = ["nodeexporter", "cadvisor"] + + restart = "unless-stopped" + destroy_grace_seconds = 10 + must_run = true +} + +resource docker_container "nodeexporter" { + name = "nodeexporter" + image = "${docker_image.nodeexporter.latest}" + + volumes { + host_path = "/proc" + container_path = "/host/proc" + } + + volumes { + host_path = "/sys" + container_path = "/host/sys" + } + + volumes { + host_path = "/" + container_path = "/rootfs" + read_only = true + } + + command = [ + "--path.procfs=/host/proc", + "--path.sysfs=/host/sys", + "--collector.filesystem.ignored-mount-points=\"^/(sys|proc|dev|host|etc)($$|/)\"", + ] + + restart = "unless-stopped" + destroy_grace_seconds = 10 + must_run = true +} diff --git a/monitoring/transmission.tf b/monitoring/transmission.tf new file mode 100644 index 0000000..c3e0018 --- /dev/null +++ b/monitoring/transmission.tf @@ -0,0 +1,16 @@ +# Transmission Exporter for prometheus +# https://github.com/metalmatze/transmission-exporter +resource docker_container "transmission-exporter" { + name = "transmission-exporter" + image = "${docker_image.transmission-exporter.latest}" + + links = ["transmission"] + + env = [ + "TRANSMISSION_ADDR=http://transmission:9091" + ] + + restart = "unless-stopped" + destroy_grace_seconds = 10 + must_run = true +} diff --git a/monitoring/variables.tf b/monitoring/variables.tf new file mode 100644 index 0000000..7f698e3 --- /dev/null +++ b/monitoring/variables.tf @@ -0,0 +1,19 @@ +variable "gf-security-admin-password" { + type = "string" +} + +variable "domain" { + type = "string" +} + +variable "alert-slack-username" { + default = "Prometheus" +} + +variable "alert-slack-channel" { + default = "#notifications" +} + +variable "alert-slack-incoming-webhook" { + default = "https://hooks.slack.com/whatever" +} diff --git a/variables.tf b/variables.tf index 1028573..2524023 100644 --- a/variables.tf +++ b/variables.tf @@ -32,3 +32,7 @@ variable "ips" { static = "139.59.48.222" } } + +variable "gf-security-admin-password" { + type = "string" +}