Initial work on monitoring
- Using https://github.com/danguita/prometheus-monitoring-stack/blob/master/docker-compose.yml as base
This commit is contained in:
parent
63bca598d4
commit
5712a339ce
6
main.tf
6
main.tf
|
@ -24,7 +24,13 @@ module "docker" {
|
|||
domain = "bb8.fun"
|
||||
}
|
||||
|
||||
|
||||
module "radicale" {
|
||||
source = "radicale"
|
||||
domain = "radicale.bb8.fun"
|
||||
}
|
||||
|
||||
module "monitoring" {
|
||||
source = "monitoring"
|
||||
gf-security-admin-password = "${var.gf-security-admin-password}"
|
||||
}
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
# Alert for any instance that is unreachable for >1 minute.
|
||||
ALERT InstanceDown
|
||||
IF up == 0
|
||||
FOR 1m
|
||||
LABELS { severity = "page" }
|
||||
ANNOTATIONS {
|
||||
summary = "Instance {{ $labels.instance }} down",
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
|
||||
}
|
||||
|
||||
# Alert for any instance that is under high load for >1 minute.
|
||||
ALERT HighLoad
|
||||
IF node_load1 > 0.85
|
||||
FOR 1m
|
||||
LABELS { severity = "page" }
|
||||
ANNOTATIONS {
|
||||
summary = "Instance {{ $labels.instance }} is under high load",
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
route:
|
||||
receiver: "slack"
|
||||
|
||||
receivers:
|
||||
- name: "slack"
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
username: "ALERT_SLACK_USERNAME"
|
||||
channel: "ALERT_SLACK_CHANNEL"
|
||||
api_url: "ALERT_SLACK_INCOMING_WEBHOOK_URL"
|
|
@ -0,0 +1,17 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
external_labels:
|
||||
monitor: "docker-monitor"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "prometheus"
|
||||
static_configs:
|
||||
- targets: ["prometheus:9090"]
|
||||
|
||||
- job_name: "node"
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ["node_exporter:9100"]
|
||||
|
||||
rule_files:
|
||||
- "alert.rules"
|
|
@ -0,0 +1,16 @@
|
|||
data "docker_registry_image" "grafana" {
|
||||
name = "grafana/grafana"
|
||||
}
|
||||
|
||||
data "docker_registry_image" "prometheus" {
|
||||
name = "prom/prometheus"
|
||||
}
|
||||
|
||||
data "docker_registry_image" "alertmanager" {
|
||||
name = "prom/alertmanager"
|
||||
}
|
||||
|
||||
data "docker_registry_image" "nodeexporter" {
|
||||
name = "prom/nodeexporter"
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
resource "docker_image" "grafana" {
|
||||
name = "${data.docker_registry_image.grafana.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.grafana.sha256_digest}"]
|
||||
}
|
||||
|
||||
resource "docker_image" "prometheus" {
|
||||
name = "${data.docker_registry_image.prometheus.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.prometheus.sha256_digest}"]
|
||||
}
|
||||
|
||||
resource "docker_image" "alertmanager" {
|
||||
name = "${data.docker_registry_image.alertmanager.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.alertmanager.sha256_digest}"]
|
||||
}
|
||||
resource "docker_image" "nodeexporter" {
|
||||
name = "${data.docker_registry_image.nodeexporter.name}"
|
||||
pull_triggers = ["${data.docker_registry_image.nodeexporter.sha256_digest}"]
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
resource docker_container "grafana" {
|
||||
name = "grafana"
|
||||
image = "${docker_image.grafana.latest}"
|
||||
|
||||
labels {
|
||||
# "traefik.frontend.auth.basic" = "${var.basic_auth}"
|
||||
"traefik.port" = 3000
|
||||
"traefik.enable" = "true"
|
||||
"traefik.frontend.headers.SSLTemporaryRedirect" = "true"
|
||||
"traefik.frontend.headers.STSSeconds" = "2592000"
|
||||
"traefik.frontend.headers.STSIncludeSubdomains" = "false"
|
||||
"traefik.frontend.headers.contentTypeNosniff" = "true"
|
||||
"traefik.frontend.headers.browserXSSFilter" = "true"
|
||||
# "traefik.frontend.headers.customResponseHeaders" = "${var.xpoweredby}"
|
||||
# "traefik.frontend.headers.customFrameOptionsValue" = "${var.xfo_allow}"
|
||||
}
|
||||
|
||||
volumes {
|
||||
host_path = "/mnt/xwing/data/grafana"
|
||||
container_path = "/var/lib/grafana"
|
||||
}
|
||||
|
||||
links = ["prometheus"]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
||||
|
||||
resource docker_container "prometheus" {
|
||||
name = "prometheus"
|
||||
image = "${docker_image.prometheus.latest}"
|
||||
|
||||
command = ["-config.file=/etc/prometheus/prometheus.yml"]
|
||||
|
||||
volumes {
|
||||
host_path = "/mnt/xwing/data/prometheus"
|
||||
container_path = "/prometheus"
|
||||
}
|
||||
|
||||
upload {
|
||||
content = "${file("${path.module}/config/prometheus.yml")}"
|
||||
file = "/etc/prometheus/prometheus.yml"
|
||||
}
|
||||
|
||||
links = ["nodeexporter"]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
||||
|
||||
resource docker_container "nodeexporter" {
|
||||
name = "nodeexporter"
|
||||
image = "${docker_image.nodeexporter.latest}"
|
||||
|
||||
command = ["-config.file=/etc/prometheus/prometheus.yml"]
|
||||
|
||||
volumes {
|
||||
host_path = "/proc"
|
||||
container_path = "/host/proc"
|
||||
}
|
||||
|
||||
volumes {
|
||||
host_path = "/sys"
|
||||
container_path = "/host/sys"
|
||||
}
|
||||
|
||||
volumes {
|
||||
host_path = "/"
|
||||
container_path = "/rootfs"
|
||||
read_only = true
|
||||
}
|
||||
|
||||
command = [
|
||||
"-collector.procfs=/host/proc",
|
||||
"-collector.sysfs=/host/sys",
|
||||
"-collector.filesystem.ignored-mount-points=\"^/(sys|proc|dev|host|etc)($$|/)\""
|
||||
]
|
||||
|
||||
restart = "unless-stopped"
|
||||
destroy_grace_seconds = 10
|
||||
must_run = true
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
variable "gf-security-admin-password" {
|
||||
type = "string"
|
||||
}
|
||||
# variable "email" {
|
||||
# type = "string"
|
||||
# }
|
||||
|
||||
# variable "domain" {
|
||||
# type = "string"
|
||||
# }
|
||||
|
||||
variable "alert-slack-username" {
|
||||
default = "Prometheus"
|
||||
}
|
||||
variable "alert-slack-channel" {
|
||||
default = "#notifications"
|
||||
}
|
||||
variable "alert-slack-incoming-webhook" {
|
||||
default = "https://hooks.slack.com/whatever"
|
||||
}
|
|
@ -32,3 +32,7 @@ variable "ips" {
|
|||
static = "139.59.48.222"
|
||||
}
|
||||
}
|
||||
|
||||
variable "gf-security-admin-password" {
|
||||
type = "string"
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue