blob: 7b53cc9ff2aa3219569f7435a7ec0df837a3348b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
#!/usr/bin/env bash
set -euo pipefail
outOfServicePercentWarn=20
outOfServicePercentCrit=33
endpoint=''
while [ $# -gt 0 ]; do
case "$1" in
-f) export BOTO_CONFIG="$2"; shift 2;;
-h) endpoint="$2"; shift 2;;
-w) outOfServicePercentWarn="$2"; shift 2;;
-c) outOfServicePercentCrit="$2"; shift 2;;
*) echo "$0: unsupported argument: $1" >&2; exit 1;;
esac
done
cmd=( aws elb describe-instance-health )
c=0
while [[ "$endpoint" != *.*.elb.amazonaws.com* ]]; do
endpoint=$(dig "$endpoint" CNAME +short)
(( ++c ))
if (( c > 10 )); then
echo "failed to resolve '$1'" >&2
exit 255
fi
done
cmd+=( --region $(echo "$endpoint" | cut -d. -f2) )
elbName=$(echo "$endpoint" | cut -d. -f1 | sed -r 's/^(internal-)?(.*)-[0-9]+$/\2/')
cmd+=( --load-balancer-name "$elbName" )
json=$("${cmd[@]}")
totalCount=$(echo "$json" | jq -c '.InstanceStates | length')
outOfServiceInfo=$(echo "$json" | jq -c '.InstanceStates | map(select(.State == "OutOfService") | .InstanceId)')
outOfServiceCount=$(echo "$outOfServiceInfo" | jq -r 'length')
outOfServiceCountWarn=${outOfServiceCountWarn:-$(( totalCount * outOfServicePercentWarn / 100 ))}
outOfServiceCountCrit=${outOfServiceCountCrit:-$(( totalCount * outOfServicePercentCrit / 100 ))}
stat="total=$totalCount out_of_service=$outOfServiceCount;$outOfServiceCountWarn;$outOfServiceCountCrit"
outOfServiceInstances=$(echo "$outOfServiceInfo" | jq -r 'join(", ")')
if [ "$outOfServiceCount" -eq 0 ]; then
echo "OK: $elbName - $totalCount instances|$stat"
exit 0
elif [ "$outOfServiceCount" -ge "$outOfServiceCountCrit" ]; then
echo "CRITICAL: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat"
exit 2
elif [ "$outOfServiceCount" -ge "$outOfServiceCountWarn" ]; then
echo "WARNING: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat"
exit 1
else
echo "OK: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat"
exit 0
fi
|