nebula/monitoring/config/alert.rules

20 lines
660 B
Plaintext

# Alert for any instance that is unreachable for >1 minute.
ALERT InstanceDown
IF up == 0
FOR 1m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
}
# Alert for any instance that is under high load for >1 minute.
ALERT HighLoad
IF node_load1 > 0.85
FOR 1m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} is under high load",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
}