Initial work on monitoring
- Using https://github.com/danguita/prometheus-monitoring-stack/blob/master/docker-compose.yml as base
This commit is contained in:
parent
63bca598d4
commit
5712a339ce
6
main.tf
6
main.tf
|
@ -24,7 +24,13 @@ module "docker" {
|
||||||
domain = "bb8.fun"
|
domain = "bb8.fun"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
module "radicale" {
|
module "radicale" {
|
||||||
source = "radicale"
|
source = "radicale"
|
||||||
domain = "radicale.bb8.fun"
|
domain = "radicale.bb8.fun"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module "monitoring" {
|
||||||
|
source = "monitoring"
|
||||||
|
gf-security-admin-password = "${var.gf-security-admin-password}"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Alert for any instance that is unreachable for >1 minute.
|
||||||
|
ALERT InstanceDown
|
||||||
|
IF up == 0
|
||||||
|
FOR 1m
|
||||||
|
LABELS { severity = "page" }
|
||||||
|
ANNOTATIONS {
|
||||||
|
summary = "Instance {{ $labels.instance }} down",
|
||||||
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Alert for any instance that is under high load for >1 minute.
|
||||||
|
ALERT HighLoad
|
||||||
|
IF node_load1 > 0.85
|
||||||
|
FOR 1m
|
||||||
|
LABELS { severity = "page" }
|
||||||
|
ANNOTATIONS {
|
||||||
|
summary = "Instance {{ $labels.instance }} is under high load",
|
||||||
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
route:
|
||||||
|
receiver: "slack"
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: "slack"
|
||||||
|
slack_configs:
|
||||||
|
- send_resolved: true
|
||||||
|
username: "ALERT_SLACK_USERNAME"
|
||||||
|
channel: "ALERT_SLACK_CHANNEL"
|
||||||
|
api_url: "ALERT_SLACK_INCOMING_WEBHOOK_URL"
|
|
@ -0,0 +1,17 @@
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
external_labels:
|
||||||
|
monitor: "docker-monitor"
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: "prometheus"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["prometheus:9090"]
|
||||||
|
|
||||||
|
- job_name: "node"
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ["node_exporter:9100"]
|
||||||
|
|
||||||
|
rule_files:
|
||||||
|
- "alert.rules"
|
|
@ -0,0 +1,16 @@
|
||||||
|
data "docker_registry_image" "grafana" {
|
||||||
|
name = "grafana/grafana"
|
||||||
|
}
|
||||||
|
|
||||||
|
data "docker_registry_image" "prometheus" {
|
||||||
|
name = "prom/prometheus"
|
||||||
|
}
|
||||||
|
|
||||||
|
data "docker_registry_image" "alertmanager" {
|
||||||
|
name = "prom/alertmanager"
|
||||||
|
}
|
||||||
|
|
||||||
|
data "docker_registry_image" "nodeexporter" {
|
||||||
|
name = "prom/nodeexporter"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
resource "docker_image" "grafana" {
|
||||||
|
name = "${data.docker_registry_image.grafana.name}"
|
||||||
|
pull_triggers = ["${data.docker_registry_image.grafana.sha256_digest}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "docker_image" "prometheus" {
|
||||||
|
name = "${data.docker_registry_image.prometheus.name}"
|
||||||
|
pull_triggers = ["${data.docker_registry_image.prometheus.sha256_digest}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "docker_image" "alertmanager" {
|
||||||
|
name = "${data.docker_registry_image.alertmanager.name}"
|
||||||
|
pull_triggers = ["${data.docker_registry_image.alertmanager.sha256_digest}"]
|
||||||
|
}
|
||||||
|
resource "docker_image" "nodeexporter" {
|
||||||
|
name = "${data.docker_registry_image.nodeexporter.name}"
|
||||||
|
pull_triggers = ["${data.docker_registry_image.nodeexporter.sha256_digest}"]
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
resource docker_container "grafana" {
|
||||||
|
name = "grafana"
|
||||||
|
image = "${docker_image.grafana.latest}"
|
||||||
|
|
||||||
|
labels {
|
||||||
|
# "traefik.frontend.auth.basic" = "${var.basic_auth}"
|
||||||
|
"traefik.port" = 3000
|
||||||
|
"traefik.enable" = "true"
|
||||||
|
"traefik.frontend.headers.SSLTemporaryRedirect" = "true"
|
||||||
|
"traefik.frontend.headers.STSSeconds" = "2592000"
|
||||||
|
"traefik.frontend.headers.STSIncludeSubdomains" = "false"
|
||||||
|
"traefik.frontend.headers.contentTypeNosniff" = "true"
|
||||||
|
"traefik.frontend.headers.browserXSSFilter" = "true"
|
||||||
|
# "traefik.frontend.headers.customResponseHeaders" = "${var.xpoweredby}"
|
||||||
|
# "traefik.frontend.headers.customFrameOptionsValue" = "${var.xfo_allow}"
|
||||||
|
}
|
||||||
|
|
||||||
|
volumes {
|
||||||
|
host_path = "/mnt/xwing/data/grafana"
|
||||||
|
container_path = "/var/lib/grafana"
|
||||||
|
}
|
||||||
|
|
||||||
|
links = ["prometheus"]
|
||||||
|
|
||||||
|
restart = "unless-stopped"
|
||||||
|
destroy_grace_seconds = 10
|
||||||
|
must_run = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource docker_container "prometheus" {
|
||||||
|
name = "prometheus"
|
||||||
|
image = "${docker_image.prometheus.latest}"
|
||||||
|
|
||||||
|
command = ["-config.file=/etc/prometheus/prometheus.yml"]
|
||||||
|
|
||||||
|
volumes {
|
||||||
|
host_path = "/mnt/xwing/data/prometheus"
|
||||||
|
container_path = "/prometheus"
|
||||||
|
}
|
||||||
|
|
||||||
|
upload {
|
||||||
|
content = "${file("${path.module}/config/prometheus.yml")}"
|
||||||
|
file = "/etc/prometheus/prometheus.yml"
|
||||||
|
}
|
||||||
|
|
||||||
|
links = ["nodeexporter"]
|
||||||
|
|
||||||
|
restart = "unless-stopped"
|
||||||
|
destroy_grace_seconds = 10
|
||||||
|
must_run = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource docker_container "nodeexporter" {
|
||||||
|
name = "nodeexporter"
|
||||||
|
image = "${docker_image.nodeexporter.latest}"
|
||||||
|
|
||||||
|
command = ["-config.file=/etc/prometheus/prometheus.yml"]
|
||||||
|
|
||||||
|
volumes {
|
||||||
|
host_path = "/proc"
|
||||||
|
container_path = "/host/proc"
|
||||||
|
}
|
||||||
|
|
||||||
|
volumes {
|
||||||
|
host_path = "/sys"
|
||||||
|
container_path = "/host/sys"
|
||||||
|
}
|
||||||
|
|
||||||
|
volumes {
|
||||||
|
host_path = "/"
|
||||||
|
container_path = "/rootfs"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
|
||||||
|
command = [
|
||||||
|
"-collector.procfs=/host/proc",
|
||||||
|
"-collector.sysfs=/host/sys",
|
||||||
|
"-collector.filesystem.ignored-mount-points=\"^/(sys|proc|dev|host|etc)($$|/)\""
|
||||||
|
]
|
||||||
|
|
||||||
|
restart = "unless-stopped"
|
||||||
|
destroy_grace_seconds = 10
|
||||||
|
must_run = true
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
variable "gf-security-admin-password" {
|
||||||
|
type = "string"
|
||||||
|
}
|
||||||
|
# variable "email" {
|
||||||
|
# type = "string"
|
||||||
|
# }
|
||||||
|
|
||||||
|
# variable "domain" {
|
||||||
|
# type = "string"
|
||||||
|
# }
|
||||||
|
|
||||||
|
variable "alert-slack-username" {
|
||||||
|
default = "Prometheus"
|
||||||
|
}
|
||||||
|
variable "alert-slack-channel" {
|
||||||
|
default = "#notifications"
|
||||||
|
}
|
||||||
|
variable "alert-slack-incoming-webhook" {
|
||||||
|
default = "https://hooks.slack.com/whatever"
|
||||||
|
}
|
|
@ -32,3 +32,7 @@ variable "ips" {
|
||||||
static = "139.59.48.222"
|
static = "139.59.48.222"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "gf-security-admin-password" {
|
||||||
|
type = "string"
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue