# Alert for any instance that is unreachable for >1 minute. ALERT InstanceDown IF up == 0 FOR 1m LABELS { severity = "page" } ANNOTATIONS { summary = "Instance {{ $labels.instance }} down", description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.", } # Alert for any instance that is under high load for >1 minute. ALERT HighLoad IF node_load1 > 0.85 FOR 1m LABELS { severity = "page" } ANNOTATIONS { summary = "Instance {{ $labels.instance }} is under high load", description = "{{ $labels.instance }} of job {{ $labels.job }} has been under high load for more than 1 minute.", }