aboutsummaryrefslogtreecommitdiff
path: root/pkgs/check_aws_ec2_elb/check_aws_ec2_elb
blob: 7b53cc9ff2aa3219569f7435a7ec0df837a3348b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env bash

set -euo pipefail

outOfServicePercentWarn=20
outOfServicePercentCrit=33
endpoint=''

while [ $# -gt 0 ]; do
  case "$1" in
    -f) export BOTO_CONFIG="$2"; shift 2;;
    -h) endpoint="$2"; shift 2;;
    -w) outOfServicePercentWarn="$2"; shift 2;;
    -c) outOfServicePercentCrit="$2"; shift 2;;
    *) echo "$0: unsupported argument: $1" >&2; exit 1;;
  esac
done

cmd=( aws elb describe-instance-health )

c=0
while [[ "$endpoint" != *.*.elb.amazonaws.com* ]]; do
  endpoint=$(dig "$endpoint" CNAME +short)
  (( ++c ))
  if (( c > 10 )); then
    echo "failed to resolve '$1'" >&2
    exit 255
  fi
done

cmd+=( --region $(echo "$endpoint" | cut -d. -f2) )
elbName=$(echo "$endpoint" | cut -d. -f1 | sed -r 's/^(internal-)?(.*)-[0-9]+$/\2/')
cmd+=( --load-balancer-name "$elbName" )

json=$("${cmd[@]}")

totalCount=$(echo "$json" | jq -c '.InstanceStates | length')
outOfServiceInfo=$(echo "$json" | jq -c '.InstanceStates | map(select(.State == "OutOfService") | .InstanceId)')
outOfServiceCount=$(echo "$outOfServiceInfo" | jq -r 'length')

outOfServiceCountWarn=${outOfServiceCountWarn:-$(( totalCount * outOfServicePercentWarn / 100 ))}
outOfServiceCountCrit=${outOfServiceCountCrit:-$(( totalCount * outOfServicePercentCrit / 100 ))}

stat="total=$totalCount out_of_service=$outOfServiceCount;$outOfServiceCountWarn;$outOfServiceCountCrit"
outOfServiceInstances=$(echo "$outOfServiceInfo" | jq -r 'join(", ")')

if [ "$outOfServiceCount" -eq 0 ]; then
  echo "OK: $elbName - $totalCount instances|$stat"
  exit 0
elif [ "$outOfServiceCount" -ge "$outOfServiceCountCrit" ]; then
  echo "CRITICAL: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat"
  exit 2
elif [ "$outOfServiceCount" -ge "$outOfServiceCountWarn" ]; then
  echo "WARNING: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat"
  exit 1
else
  echo "OK: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat"
  exit 0
fi