20 lines
660 B
Plaintext
20 lines
660 B
Plaintext
|
# Alert for any instance that is unreachable for >1 minute.
|
||
|
ALERT InstanceDown
|
||
|
IF up == 0
|
||
|
FOR 1m
|
||
|
LABELS { severity = "page" }
|
||
|
ANNOTATIONS {
|
||
|
summary = "Instance {{ $labels.instance }} down",
|
||
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
|
||
|
}
|
||
|
|
||
|
# Alert for any instance that is under high load for >1 minute.
|
||
|
ALERT HighLoad
|
||
|
IF node_load1 > 0.85
|
||
|
FOR 1m
|
||
|
LABELS { severity = "page" }
|
||
|
ANNOTATIONS {
|
||
|
summary = "Instance {{ $labels.instance }} is under high load",
|
||
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.",
|
||
|
}
|