Initial work on monitoring

- Using
https://github.com/danguita/prometheus-monitoring-stack/blob/master/docker-compose.yml
as base
This commit is contained in:
Nemo 2017-12-28 17:59:54 +05:30
parent 63bca598d4
commit 5712a339ce
9 changed files with 195 additions and 0 deletions

View File

@ -24,7 +24,13 @@ module "docker" {
domain = "bb8.fun"
}
module "radicale" {
source = "radicale"
domain = "radicale.bb8.fun"
}
module "monitoring" {
source = "monitoring"
gf-security-admin-password = "${var.gf-security-admin-password}"
}

View File

@ -0,0 +1,19 @@
# Alert for any instance that is unreachable for >1 minute.
ALERT InstanceDown
IF up == 0
FOR 1m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
}
# Alert for any instance that is under high load for >1 minute.
ALERT HighLoad
IF node_load1 > 0.85
FOR 1m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} is under high load",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
}

View File

@ -0,0 +1,10 @@
route:
receiver: "slack"
receivers:
- name: "slack"
slack_configs:
- send_resolved: true
username: "ALERT_SLACK_USERNAME"
channel: "ALERT_SLACK_CHANNEL"
api_url: "ALERT_SLACK_INCOMING_WEBHOOK_URL"

View File

@ -0,0 +1,17 @@
global:
scrape_interval: 15s
external_labels:
monitor: "docker-monitor"
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["prometheus:9090"]
- job_name: "node"
scrape_interval: 5s
static_configs:
- targets: ["node_exporter:9100"]
rule_files:
- "alert.rules"

16
monitoring/data.tf Normal file
View File

@ -0,0 +1,16 @@
data "docker_registry_image" "grafana" {
name = "grafana/grafana"
}
data "docker_registry_image" "prometheus" {
name = "prom/prometheus"
}
data "docker_registry_image" "alertmanager" {
name = "prom/alertmanager"
}
data "docker_registry_image" "nodeexporter" {
name = "prom/nodeexporter"
}

18
monitoring/images.tf Normal file
View File

@ -0,0 +1,18 @@
resource "docker_image" "grafana" {
name = "${data.docker_registry_image.grafana.name}"
pull_triggers = ["${data.docker_registry_image.grafana.sha256_digest}"]
}
resource "docker_image" "prometheus" {
name = "${data.docker_registry_image.prometheus.name}"
pull_triggers = ["${data.docker_registry_image.prometheus.sha256_digest}"]
}
resource "docker_image" "alertmanager" {
name = "${data.docker_registry_image.alertmanager.name}"
pull_triggers = ["${data.docker_registry_image.alertmanager.sha256_digest}"]
}
resource "docker_image" "nodeexporter" {
name = "${data.docker_registry_image.nodeexporter.name}"
pull_triggers = ["${data.docker_registry_image.nodeexporter.sha256_digest}"]
}

85
monitoring/main.tf Normal file
View File

@ -0,0 +1,85 @@
resource docker_container "grafana" {
name = "grafana"
image = "${docker_image.grafana.latest}"
labels {
# "traefik.frontend.auth.basic" = "${var.basic_auth}"
"traefik.port" = 3000
"traefik.enable" = "true"
"traefik.frontend.headers.SSLTemporaryRedirect" = "true"
"traefik.frontend.headers.STSSeconds" = "2592000"
"traefik.frontend.headers.STSIncludeSubdomains" = "false"
"traefik.frontend.headers.contentTypeNosniff" = "true"
"traefik.frontend.headers.browserXSSFilter" = "true"
# "traefik.frontend.headers.customResponseHeaders" = "${var.xpoweredby}"
# "traefik.frontend.headers.customFrameOptionsValue" = "${var.xfo_allow}"
}
volumes {
host_path = "/mnt/xwing/data/grafana"
container_path = "/var/lib/grafana"
}
links = ["prometheus"]
restart = "unless-stopped"
destroy_grace_seconds = 10
must_run = true
}
resource docker_container "prometheus" {
name = "prometheus"
image = "${docker_image.prometheus.latest}"
command = ["-config.file=/etc/prometheus/prometheus.yml"]
volumes {
host_path = "/mnt/xwing/data/prometheus"
container_path = "/prometheus"
}
upload {
content = "${file("${path.module}/config/prometheus.yml")}"
file = "/etc/prometheus/prometheus.yml"
}
links = ["nodeexporter"]
restart = "unless-stopped"
destroy_grace_seconds = 10
must_run = true
}
resource docker_container "nodeexporter" {
name = "nodeexporter"
image = "${docker_image.nodeexporter.latest}"
command = ["-config.file=/etc/prometheus/prometheus.yml"]
volumes {
host_path = "/proc"
container_path = "/host/proc"
}
volumes {
host_path = "/sys"
container_path = "/host/sys"
}
volumes {
host_path = "/"
container_path = "/rootfs"
read_only = true
}
command = [
"-collector.procfs=/host/proc",
"-collector.sysfs=/host/sys",
"-collector.filesystem.ignored-mount-points=\"^/(sys|proc|dev|host|etc)($$|/)\""
]
restart = "unless-stopped"
destroy_grace_seconds = 10
must_run = true
}

20
monitoring/variables.tf Normal file
View File

@ -0,0 +1,20 @@
variable "gf-security-admin-password" {
type = "string"
}
# variable "email" {
# type = "string"
# }
# variable "domain" {
# type = "string"
# }
variable "alert-slack-username" {
default = "Prometheus"
}
variable "alert-slack-channel" {
default = "#notifications"
}
variable "alert-slack-incoming-webhook" {
default = "https://hooks.slack.com/whatever"
}

View File

@ -32,3 +32,7 @@ variable "ips" {
static = "139.59.48.222"
}
}
variable "gf-security-admin-password" {
type = "string"
}