#!/usr/bin/env bash set -euo pipefail outOfServicePercentWarn=20 outOfServicePercentCrit=33 endpoint='' while [ $# -gt 0 ]; do case "$1" in -f) export BOTO_CONFIG="$2"; shift 2;; -h) endpoint="$2"; shift 2;; -w) outOfServicePercentWarn="$2"; shift 2;; -c) outOfServicePercentCrit="$2"; shift 2;; *) echo "$0: unsupported argument: $1" >&2; exit 1;; esac done cmd=( aws elb describe-instance-health ) c=0 while [[ "$endpoint" != *.*.elb.amazonaws.com* ]]; do endpoint=$(dig "$endpoint" CNAME +short) (( ++c )) if (( c > 10 )); then echo "failed to resolve '$1'" >&2 exit 255 fi done cmd+=( --region $(echo "$endpoint" | cut -d. -f2) ) elbName=$(echo "$endpoint" | cut -d. -f1 | sed -r 's/^(internal-)?(.*)-[0-9]+$/\2/') cmd+=( --load-balancer-name "$elbName" ) json=$("${cmd[@]}") totalCount=$(echo "$json" | jq -c '.InstanceStates | length') outOfServiceInfo=$(echo "$json" | jq -c '.InstanceStates | map(select(.State == "OutOfService") | .InstanceId)') outOfServiceCount=$(echo "$outOfServiceInfo" | jq -r 'length') outOfServiceCountWarn=${outOfServiceCountWarn:-$(( totalCount * outOfServicePercentWarn / 100 ))} outOfServiceCountCrit=${outOfServiceCountCrit:-$(( totalCount * outOfServicePercentCrit / 100 ))} stat="total=$totalCount out_of_service=$outOfServiceCount;$outOfServiceCountWarn;$outOfServiceCountCrit" outOfServiceInstances=$(echo "$outOfServiceInfo" | jq -r 'join(", ")') if [ "$outOfServiceCount" -eq 0 ]; then echo "OK: $elbName - $totalCount instances|$stat" exit 0 elif [ "$outOfServiceCount" -ge "$outOfServiceCountCrit" ]; then echo "CRITICAL: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat" exit 2 elif [ "$outOfServiceCount" -ge "$outOfServiceCountWarn" ]; then echo "WARNING: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat" exit 1 else echo "OK: $elbName - $outOfServiceCount/$totalCount out of service: $outOfServiceInstances|$stat" exit 0 fi