aboutsummaryrefslogtreecommitdiff
path: root/apps/mysqlbackup.nix
diff options
context:
space:
mode:
Diffstat (limited to 'apps/mysqlbackup.nix')
-rw-r--r--apps/mysqlbackup.nix428
1 files changed, 428 insertions, 0 deletions
diff --git a/apps/mysqlbackup.nix b/apps/mysqlbackup.nix
new file mode 100644
index 0000000..509e010
--- /dev/null
+++ b/apps/mysqlbackup.nix
@@ -0,0 +1,428 @@
+{ config, pkgs, lib, ... }:
+let
+ inherit (lib) mkOption mkIf mkDefault mapAttrsToList flatten hasPrefix filter
+ concatMapStringsSep concatStringsSep optionalString filterAttrs
+ splitString removeSuffix;
+ inherit (lib.types) bool str int path either enum nullOr listOf attrsOf submodule;
+ inherit (builtins) isString isBool isInt isList isPath toString length;
+
+ cfg = config.nixsap.apps.mysqlbackup;
+ privateDir = "/run/mysqlbackup";
+
+ mysql = "${pkgs.mysql}/bin/mysql";
+ mysqldump = "${pkgs.mysql}/bin/mysqldump";
+ s3cmd = "${pkgs.s3cmd}/bin/s3cmd ${optionalString (cfg.s3cfg != null) "-c '${cfg.s3cfg}'"}";
+
+ gpgPubKeys = flatten [ cfg.encrypt ];
+ gpg = "${pkgs.gpg}/bin/gpg2";
+ pubring = pkgs.runCommand "pubring.gpg" {} ''
+ ${gpg} --homedir . --import ${toString gpgPubKeys}
+ cp pubring.gpg $out
+ '';
+
+ default = d: t: mkOption { type = t; default = d; };
+ explicit = filterAttrs (n: v: n != "_module" && v != null);
+ mandatory = type: mkOption { inherit type; };
+ optional = type: mkOption { type = nullOr type; default = null; };
+ sub = options: submodule { inherit options; } ;
+
+ connection = mkOption {
+ description = "Connection options used by mysqlbackup";
+ type = sub {
+ compress = default true bool;
+ host = mandatory str;
+ max-allowed-packet = optional int;
+ password-file = optional path;
+ port = optional int;
+ socket = optional path;
+ ssl = optional bool;
+ ssl-ca = optional path;
+ ssl-cert = optional path;
+ ssl-key = optional path;
+ ssl-verify-server-cert = optional bool;
+ user = optional str;
+ };
+ };
+
+ databases = mkOption {
+ description = "What to dump and what to ignore";
+ default = {};
+ type = sub {
+ like = mkOption {
+ description = ''
+ Databases to dump. MySQL wildcards (_ and %) are supported.
+ Logical OR is applied to all entries.
+ '';
+ type = either str (listOf str);
+ default = "%";
+ example = [ "%\\_live\\_%" ];
+ };
+ not-like = mkOption {
+ description = ''
+ Databases to skip. MySQL wildcards (_ and %) are supported.
+ You don't need to specify `performance_schema` or `information_schema`
+ here, they are always ignored. Logical AND is applied to all entries.
+ '';
+ type = either str (listOf str);
+ default = [];
+ example = [ "tmp\\_%" "snap\\_%" ];
+ };
+ empty-tables-like = mkOption {
+ description = ''
+ Tables to ignore. MySQL wildcards (_ and %) are supported.
+ Note that the schemas of these tables will be dumped anyway.
+ Each table template can be prefixed with a database template.
+ In that case it will be applied to matching databases only,
+ instead of all databases'';
+ type = either str (listOf str);
+ default = [];
+ example = [ "bob%.alice\\_message" ];
+ };
+ skip-tables-like = mkOption {
+ description = ''
+ Tables to ignore. MySQL wildcards (_ and %) are supported.
+ Each table template can be prefixed with a database template.
+ In that case it will be applied to matching databases only,
+ instead of all databases'';
+ type = either str (listOf str);
+ default = [];
+ example = [ "tmp%" "%\\_backup" ];
+ };
+ };
+ };
+
+ server = submodule ({ name, ... }:
+ {
+ options = { inherit connection databases; };
+ config.connection.host = mkDefault name;
+ }
+ );
+
+ connectionKeys = flatten (mapAttrsToList (_: s: with s.connection; [ password-file ssl-key ]) cfg.servers);
+ keys = filter (f: f != null && hasPrefix "/run/keys/" f) ( connectionKeys ++ [cfg.s3cfg] );
+
+ showDatabases = name: server: pkgs.writeText "show-databases-${name}.sql" ''
+ SHOW DATABASES WHERE `Database` NOT IN ('information_schema', 'performance_schema', 'tmp', 'innodb')
+ AND (${concatMapStringsSep " OR " (e: "`Database` LIKE '${e}'") (flatten [server.databases.like])})
+ ${concatMapStringsSep " " (e: "AND `Database` NOT LIKE '${e}'") (flatten [server.databases.not-like])}
+ ;
+ '';
+
+ defaultsFile = name: server:
+ let
+ inc = optionalString (server.connection.password-file != null)
+ "!include ${privateDir}/cnf/${name}";
+ show = n: v:
+ if isBool v then (if v then "1" else "0")
+ else if isInt v then toString v
+ else if isString v then "${v}"
+ else if isPath v then "'${v}'"
+ else abort "Unrecognized option ${n}";
+ in pkgs.writeText "my-${name}.cnf"
+ ( concatStringsSep "\n" (
+ [ "[client]" ]
+ ++ mapAttrsToList (k: v: "${k} = ${show k v}")
+ (filterAttrs (k: _: k != "password-file") (explicit server.connection))
+ ++ [ "${inc}\n" ]
+ )
+ );
+
+ listTables = name: server: tables:
+ let
+ anyDb = s: if 1 == length (splitString "." s)
+ then "%.${s}" else s;
+ query = optionalString (0 < length tables) ''
+ set -euo pipefail
+ db="$1"
+ cat <<SQL | ${mysql} --defaults-file=${defaultsFile name server} -N
+ SELECT CONCAT(table_schema, '.', table_name) AS tables
+ FROM information_schema.tables HAVING tables LIKE '$db.%'
+ AND ( ${concatMapStringsSep " OR " (e: "tables LIKE '${e}'")
+ (map anyDb tables)} );
+ SQL
+ '';
+ in pkgs.writeBashScript "list-tables-${name}" query;
+
+ job = name: server: pkgs.writeBashScript "job-${name}" ''
+ set -euo pipefail
+ db=$(basename "$0")
+ cd "${cfg.dumpDir}/$DATE"
+
+ dump="$db@${name},$DATE.mysql.xz"
+ ${if (gpgPubKeys != []) then ''
+ aim="$dump.gpg"
+ '' else ''
+ aim="$dump"
+ ''}
+
+ if ! [ -r "$aim" ]; then
+ {
+ empty=()
+
+ empty+=( $(${listTables name server server.databases.empty-tables-like} "$db") )
+ if [ ''${#empty[@]} -gt 0 ]; then
+ tables=( "''${empty[@]/#*./}" )
+ ${mysqldump} --defaults-file=${defaultsFile name server} \
+ --skip-comments --force --single-transaction \
+ --no-data "$db" "''${tables[@]}"
+ fi
+
+ empty+=( $(${listTables name server server.databases.skip-tables-like} "$db") )
+
+ if [ ''${#empty[@]} -gt 0 ]; then
+ ignoretables+=( "''${empty[@]/#/--ignore-table=}" )
+ fi
+
+ ${mysqldump} --defaults-file=${defaultsFile name server} \
+ --skip-comments --force --single-transaction \
+ "''${ignoretables[@]:+''${ignoretables[@]}}" \
+ "$db"
+ } | ${pkgs.pxz}/bin/pxz -2 -T2 > "$dump".tmp
+ ${pkgs.xz}/bin/xz -t -v "$dump".tmp
+ mv "$dump".tmp "$dump"
+
+ ${optionalString (gpgPubKeys != []) ''
+ recipient=( $(${gpg} --homedir '${privateDir}/gnupg' -k --with-colons --fast-list-mode | \
+ ${pkgs.gawk}/bin/awk -F: '/^pub/{print $5}') )
+ r=( "''${recipient[@]/#/-r}" )
+ ${gpg} --homedir '${privateDir}/gnupg' --batch --no-tty --yes \
+ "''${r[@]}" --trust-model always \
+ --compress-algo none \
+ -v -e "$dump"
+ rm -f "$dump"
+ ''}
+ else
+ echo "$aim exists. Not dumping." >&2
+ fi
+ ${optionalString (cfg.s3uri != null) ''
+ remote="${removeSuffix "/" cfg.s3uri}/$DATE/$aim"
+ if ! ${s3cmd} ls "$remote" | ${pkgs.gnugrep}/bin/grep -qF "/$aim"; then
+ ${s3cmd} put "$aim" "$remote"
+ else
+ echo "$remote exists. Not uploading." >&2
+ fi
+ ''}
+ '';
+
+ mkJobs = name: server: pkgs.writeBashScript "mkjobs-${name}" ''
+ set -euo pipefail
+ mkdir -p '${privateDir}/jobs/${name}'
+ for db in $(${mysql} --defaults-file=${defaultsFile name server} -N < ${showDatabases name server} | shuf)
+ do
+ ln -svf ${job name server} "${privateDir}/jobs/${name}/$db"
+ done
+ '';
+
+ preStart = ''
+ mkdir --mode=0750 -p '${cfg.dumpDir}'
+ chown -R ${cfg.user}:${cfg.user} '${cfg.dumpDir}'
+ chmod -R u=rwX,g=rX,o= ${cfg.dumpDir}
+
+ rm -rf '${privateDir}'
+ mkdir --mode=0700 -p '${privateDir}'
+ chown ${cfg.user}:${cfg.user} '${privateDir}'
+ '';
+
+ main = pkgs.writeBashScriptBin "mysqlbackup" ''
+ set -euo pipefail
+ umask 0027
+ DATE=$(date --iso-8601)
+ HOME='${privateDir}'
+ PARALLEL_SHELL=${pkgs.bash}/bin/bash
+ export DATE
+ export HOME
+ export PARALLEL_SHELL
+
+ clean() {
+ ${pkgs.findutils}/bin/find '${cfg.dumpDir}' -type f -name '*.tmp' -delete || true
+ }
+
+ listSets() {
+ ${pkgs.findutils}/bin/find '${cfg.dumpDir}' \
+ -maxdepth 1 -mindepth 1 -type d -name '????-??-??' \
+ | sort -V
+ }
+
+ enoughStorage() {
+ local n
+ local used
+ local total
+ local avg
+ local p
+ n=$(listSets | wc -l)
+ used=$(du -x -s --block-size=1M '${cfg.dumpDir}' | cut -f1)
+ total=$(df --output=size --block-size=1M '${cfg.dumpDir}' | tail -n 1)
+ if [ "$n" -eq 0 ]; then
+ echo "no sets" >&2
+ return 0
+ fi
+
+ avg=$(( used / n ))
+ p=$(( 100 * avg * (n + 1) / total ))
+ printf "estimated storage: %d of %d MiB (%d%%, max ${toString cfg.storage}%%)\n" \
+ "$((used + avg))" "$total" "$p" >&2
+ if [ "$p" -le ${toString cfg.storage} ]; then
+ return 0
+ else
+ return 1
+ fi
+ }
+
+ clean
+
+ listSets | head -n -${toString (cfg.slots - 1)} \
+ | ${pkgs.findutils}/bin/xargs --no-run-if-empty rm -rfv \
+ || true
+
+ while ! enoughStorage; do
+ listSets | head -n 1 \
+ | ${pkgs.findutils}/bin/xargs --no-run-if-empty rm -rfv \
+ || true
+ done
+
+ mkdir -p "${cfg.dumpDir}/$DATE"
+ mkdir -p '${privateDir}/cnf'
+ mkdir -p '${privateDir}/jobs'
+
+ ${optionalString (gpgPubKeys != []) ''
+ # shellcheck disable=SC2174
+ mkdir --mode=0700 -p '${privateDir}/gnupg'
+ ln -sf ${pubring} '${privateDir}/gnupg/pubring.gpg'
+ ''}
+
+ ${concatStringsSep "\n" (
+ mapAttrsToList (n: s: ''
+ printf '[client]\npassword=' > '${privateDir}/cnf/${n}'
+ cat '${s.connection.password-file}' >> '${privateDir}/cnf/${n}'
+ '') (filterAttrs (_: s: s.connection.password-file != null) cfg.servers)
+ )}
+
+ {
+ cat <<'LIST'
+ ${concatStringsSep "\n" (mapAttrsToList (mkJobs) cfg.servers)}
+ LIST
+ } | ${pkgs.parallel}/bin/parallel \
+ --halt-on-error 0 \
+ --jobs 100% \
+ --line-buffer \
+ --no-notice \
+ --no-run-if-empty \
+ --retries 2 \
+ --shuf \
+ --tagstr '* {}:' \
+ --timeout ${toString (10 * 60)} \
+ || true
+
+ failed=0
+ log="${cfg.dumpDir}/$DATE/joblog.txt"
+
+ {
+ cd '${privateDir}/jobs' && find -type l -printf '%P\n';
+ } | ${pkgs.parallel}/bin/parallel \
+ --halt-on-error 0 \
+ --joblog "$log" \
+ --jobs '${toString cfg.jobs}' \
+ --line-buffer \
+ --no-notice \
+ --no-run-if-empty \
+ --retries 2 \
+ --tagstr '* {}:' \
+ --timeout ${toString (6 * 60 * 60)} \
+ '${privateDir}/jobs/{}' || failed=$?
+
+ cat "$log"
+ clean
+
+ du -sh "${cfg.dumpDir}/$DATE" || true
+ exit "$failed"
+ '';
+
+in {
+ options.nixsap.apps.mysqlbackup = {
+ user = mkOption {
+ description = "User to run as";
+ default = "mysqlbackup";
+ type = str;
+ };
+
+ startAt = mkOption {
+ description = "Time to start (systemd format)";
+ default = "02:00";
+ type = str;
+ };
+
+ dumpDir = mkOption {
+ description = "Directory to save dumps in";
+ default = "/mysqlbackup";
+ type = path;
+ };
+
+ slots = mkOption {
+ description = ''
+ How many backup sets should be kept locally.
+ However, old sets will be removed anyway if storage
+ constraints apply.
+ '';
+ default = 60;
+ type = int;
+ };
+
+ storage = mkOption {
+ description = ''
+ Percent of storage backups can occupy.
+ '';
+ default = 75;
+ type = int;
+ };
+
+ encrypt = mkOption {
+ description = "Public GPG key(s) for encrypting the dumps";
+ default = [ ];
+ type = either path (listOf path);
+ };
+
+ servers = mkOption {
+ default = {};
+ type = attrsOf server;
+ };
+
+ jobs = mkOption {
+ description = ''
+ Number of jobs (mysqldump) to run in parallel.
+ In the format of GNU Parallel, e. g. "100%", -1. +3, 7, etc.
+ '';
+ default = "50%";
+ type = either int str;
+ };
+
+ s3cfg = mkOption {
+ description = "s3cmd config file (secret)";
+ type = nullOr path;
+ default = null;
+ };
+
+ s3uri = mkOption {
+ description = "S3 bucket URI with prefix in s3cmd format";
+ type = nullOr str;
+ default = null;
+ example = "s3://backups/nightly";
+ };
+ };
+
+ config = mkIf (cfg.servers != {}) {
+ nixsap.system.users.daemons = [ cfg.user ];
+ nixsap.deployment.keyrings.${cfg.user} = keys;
+ systemd.services.mysqlbackup = {
+ description = "MySQL backup";
+ after = [ "local-fs.target" "keys.target" "network.target" ];
+ wants = [ "keys.target" ];
+ startAt = cfg.startAt;
+ inherit preStart;
+ serviceConfig = {
+ ExecStart = "${main}/bin/mysqlbackup";
+ User = cfg.user;
+ PermissionsStartOnly = true;
+ };
+ };
+ };
+}