diff options
Diffstat (limited to 'apps/mariadb/slave-watchdog.nix')
-rw-r--r-- | apps/mariadb/slave-watchdog.nix | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/apps/mariadb/slave-watchdog.nix b/apps/mariadb/slave-watchdog.nix new file mode 100644 index 0000000..8d1147e --- /dev/null +++ b/apps/mariadb/slave-watchdog.nix @@ -0,0 +1,103 @@ +{ changeMaster, importDump }: '' +set -euo pipefail + +ch="$1" +status=$(mktemp) +trap 'rm -f "$status"' EXIT + +slave_status () { + if ! mysql -e ';'; then + echo unknown; return + fi + + if mysql -e "SHOW SLAVE '$1' STATUS\\G" | sed 's,^ *,,' > "$status"; then + if grep -oE '\bMaster_Server_Id:\s*[1-9][0-9]*' "$status" >&2; then + io_errno=$(awk '/Last_IO_Errno:/ {print $2}' "$status") + sql_errno=$(awk '/Last_SQL_Errno:/ {print $2}' "$status") + case "$io_errno:$sql_errno" in + 0:0) + echo ok + return + ;; + 0:*) + awk '/Last_SQL_Error:/ {print $0}' "$status" >&2 + echo "sql_error:$sql_errno" + return + ;; + *:*) + awk '/Last_IO_Error:/ {print $0}' "$status" >&2 + echo "io_error:$io_errno" + return + ;; + esac + fi + fi + echo none +} + +sql_errors=0 +none_count=0 +while true; do + st=$(slave_status "$ch") + + case "$st" in + ok|unknown) + echo "status: $st" >&2 + exit + ;; + none) + # XXX existing slave might not be initialized yet after mariadb restarts + (( ++none_count )) + echo "status: $st (count: $none_count)" >&2 + if [ "$none_count" -lt 10 ]; then + sleep 1m + continue + fi + mysql -v -N -e "CALL mysql.resetSlave('$ch')" >&2 + ${changeMaster} "$ch" | mysql + if ${importDump} "$ch" | mysql; then + mysql -v -N -e "CALL mysql.startSlave('$ch')" >&2 + exit + else + echo 'Import failed. Starting over' >&2 + mysql -v -N -e "CALL mysql.resetSlave('$ch')" >&2 + exit 1 + fi + ;; + io_error:*) + echo "status: $st" >&2 + mysql -v -N -e "CALL mysql.stopSlave('$ch')" >&2 + ${changeMaster} "$ch" | mysql + mysql -v -N -e "CALL mysql.startSlave('$ch')" >&2 + exit 1 + ;; + sql_error:1205) # Lock wait timeout exceeded + echo "status: $st" >&2 + mysql -v -N -e "CALL mysql.startSlave('$ch')" >&2 + exit 1 + ;; + sql_error:*) + (( ++sql_errors )) + echo "status: $st (count: $sql_errors)" >&2 + if [ "$sql_errors" -le 1 ]; then + mysql -v -N -e "CALL mysql.pauseSlave('$ch')" >&2 + sleep 1s + mysql -v -N -e "CALL mysql.startSlave('$ch')" >&2 + elif [ "$sql_errors" -le 2 ]; then + mysql -v -N -e "CALL mysql.stopSlave('$ch')" >&2 + # this *unlikely* *may* change replication option (ignore tables, etc.) + ${changeMaster} "$ch" | mysql + mysql -v -N -e "CALL mysql.startSlave('$ch')" >&2 + else + echo '!!! Resetting slave !!!' >&2 + mysql -v -N -e "CALL mysql.resetSlave('$ch')" >&2 + exit 1 + fi + sleep 2m + ;; + *) echo "BUG: $st" >&2; exit 255;; + esac + sleep 1s +done +'' + |