Nemo
5712a339ce
- Using https://github.com/danguita/prometheus-monitoring-stack/blob/master/docker-compose.yml as base
20 lines
660 B
Plaintext
20 lines
660 B
Plaintext
# Alert for any instance that is unreachable for >1 minute.
|
|
ALERT InstanceDown
|
|
IF up == 0
|
|
FOR 1m
|
|
LABELS { severity = "page" }
|
|
ANNOTATIONS {
|
|
summary = "Instance {{ $labels.instance }} down",
|
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
|
|
}
|
|
|
|
# Alert for any instance that is under high load for >1 minute.
|
|
ALERT HighLoad
|
|
IF node_load1 > 0.85
|
|
FOR 1m
|
|
LABELS { severity = "page" }
|
|
ANNOTATIONS {
|
|
summary = "Instance {{ $labels.instance }} is under high load",
|
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
|
|
}
|