Merge pull request #48423 from charles-dyfis-net/bees

bees: init at 0.6.1; nixos/modules: services.bees init
This commit is contained in:
Silvan Mosberger 2018-12-02 18:38:47 +01:00 committed by GitHub
commit 4afae70e2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 473 additions and 0 deletions

View File

@ -340,6 +340,7 @@
./services/misc/apache-kafka.nix
./services/misc/autofs.nix
./services/misc/autorandr.nix
./services/misc/bees.nix
./services/misc/bepasty.nix
./services/misc/canto-daemon.nix
./services/misc/calibre-server.nix

View File

@ -0,0 +1,123 @@
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.services.beesd;
logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; };
fsOptions = with types; {
options.spec = mkOption {
type = str;
description = ''
Description of how to identify the filesystem to be duplicated by this
instance of bees. Note that deduplication crosses subvolumes; one must
not configure multiple instances for subvolumes of the same filesystem
(or block devices which are part of the same filesystem), but only for
completely independent btrfs filesystems.
</para>
<para>
This must be in a format usable by findmnt; that could be a key=value
pair, or a bare path to a mount point.
'';
example = "LABEL=MyBulkDataDrive";
};
options.hashTableSizeMB = mkOption {
type = types.addCheck types.int (n: mod n 16 == 0);
default = 1024; # 1GB; default from upstream beesd script
description = ''
Hash table size in MB; must be a multiple of 16.
</para>
<para>
A larger ratio of index size to storage size means smaller blocks of
duplicate content are recognized.
</para>
<para>
If you have 1TB of data, a 4GB hash table (which is to say, a value of
4096) will permit 4KB extents (the smallest possible size) to be
recognized, whereas a value of 1024 -- creating a 1GB hash table --
will recognize only aligned duplicate blocks of 16KB.
'';
};
options.verbosity = mkOption {
type = types.enum (attrNames logLevels ++ attrValues logLevels);
apply = v: if isString v then logLevels.${v} else v;
default = "info";
description = "Log verbosity (syslog keyword/level).";
};
options.workDir = mkOption {
type = str;
default = ".beeshome";
description = ''
Name (relative to the root of the filesystem) of the subvolume where
the hash table will be stored.
'';
};
options.extraOptions = mkOption {
type = listOf str;
default = [];
description = ''
Extra command-line options passed to the daemon. See upstream bees documentation.
'';
example = literalExample ''
[ "--thread-count" "4" ]
'';
};
};
in {
options.services.beesd = {
filesystems = mkOption {
type = with types; attrsOf (submodule fsOptions);
description = "BTRFS filesystems to run block-level deduplication on.";
default = { };
example = literalExample ''
{
root = {
spec = "LABEL=root";
hashTableSizeMB = 2048;
verbosity = "crit";
extraOptions = [ "--loadavg-target" "5.0" ];
};
}
'';
};
};
config = {
systemd.services = mapAttrs' (name: fs: nameValuePair "beesd@${name}" {
description = "Block-level BTRFS deduplication for %i";
after = [ "sysinit.target" ];
serviceConfig = let
configOpts = [
fs.spec
"verbosity=${toString fs.verbosity}"
"idxSizeMB=${toString fs.hashTableSizeMB}"
"workDir=${fs.workDir}"
];
configOptsStr = escapeShellArgs configOpts;
in {
# Values from https://github.com/Zygo/bees/blob/v0.6.1/scripts/beesd%40.service.in
ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}";
ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}";
CPUAccounting = true;
CPUWeight = 12;
IOSchedulingClass = "idle";
IOSchedulingPriority = 7;
IOWeight = 10;
KillMode = "control-group";
KillSignal = "SIGTERM";
MemoryAccounting = true;
Nice = 19;
Restart = "on-abnormal";
StartupCPUWeight = 25;
StartupIOWeight = 25;
SyslogIdentifier = "bees"; # would otherwise be "bees-service-wrapper"
};
wantedBy = ["multi-user.target"];
}) cfg.filesystems;
};
}

55
nixos/tests/bees.nix Normal file
View File

@ -0,0 +1,55 @@
import ./make-test.nix ({ lib, ... }:
{
name = "bees";
machine = { config, pkgs, ... }: {
boot.initrd.postDeviceCommands = ''
${pkgs.btrfs-progs}/bin/mkfs.btrfs -f -L aux1 /dev/vdb
${pkgs.btrfs-progs}/bin/mkfs.btrfs -f -L aux2 /dev/vdc
'';
virtualisation.emptyDiskImages = [ 4096 4096 ];
fileSystems = lib.mkVMOverride {
"/aux1" = { # filesystem configured to be deduplicated
device = "/dev/disk/by-label/aux1";
fsType = "btrfs";
};
"/aux2" = { # filesystem not configured to be deduplicated
device = "/dev/disk/by-label/aux2";
fsType = "btrfs";
};
};
services.beesd.filesystems = {
aux1 = {
spec = "LABEL=aux1";
hashTableSizeMB = 16;
verbosity = "debug";
};
};
};
testScript =
let
withRetry = content: maxTests: sleepTime: ''
max_tests=${lib.escapeShellArg maxTests}; sleep_time=${lib.escapeShellArg sleepTime}; for ((i=0; i<max_tests; i++)); do ${content} && exit 0; sleep "$sleep_time"; done; exit 1;
'';
someContentIsShared = loc: ''[[ $(btrfs fi du -s --raw ${lib.escapeShellArg loc}/dedup-me-{1,2} | awk 'BEGIN { count=0; } NR>1 && $3 == 0 { count++ } END { print count }') -eq 0 ]]'';
in ''
# shut down the instance started by systemd at boot, so we can test our test procedure
$machine->succeed("systemctl stop beesd\@aux1.service");
$machine->succeed("dd if=/dev/urandom of=/aux1/dedup-me-1 bs=1M count=8");
$machine->succeed("cp --reflink=never /aux1/dedup-me-1 /aux1/dedup-me-2");
$machine->succeed("cp --reflink=never /aux1/* /aux2/");
$machine->succeed("sync");
$machine->fail(q(${someContentIsShared "/aux1"}));
$machine->fail(q(${someContentIsShared "/aux2"}));
$machine->succeed("systemctl start beesd\@aux1.service");
# assert that "Set Shared" column is nonzero
$machine->succeed(q(${withRetry (someContentIsShared "/aux1") 20 2}));
$machine->fail(q(${someContentIsShared "/aux2"}));
# assert that 16MB hash table size requested was honored
$machine->succeed(q([[ $(stat -c %s /aux1/.beeshome/beeshash.dat) = $(( 16 * 1024 * 1024)) ]]))
'';
})

View File

@ -0,0 +1,223 @@
#!@bash@/bin/bash
PATH=@bash@/bin:@coreutils@/bin:@utillinux@/bin:@btrfsProgs@/bin:$PATH
beesd_bin=@bees@/lib/bees/bees
# PLEASE KEEP NIX-ISMS ABOVE THIS LINE TO EASE UPSTREAM MERGE
#!/usr/bin/env bash
shopt -s extglob
# Upstream wrapper requires UUID to be used for configuration.
# However, when declaratively describing a host, we may not know its UUID, and
# shouldn't need to persist something that will differ between hosts built from
# the same configuration template.
# Thus, for using bees from NixOS, we have our own wrapper, which supports not
# just UUID but any specification permitted by findmnt
[[ $bees_debug ]] && { PS4=':${BASH_SOURCE##*/}:$LINENO+'; set -x; }
usage() {
cat >&2 <<EOF
Usage: ${BASH_SOURCE##*/} run|cleanup config-name|fsSpec [idxSizeMB=...] [verbosity=...] [workDir=...] [-- daemon-options...]
fsSpec should be in a format recognized by findmnt. Alternately,
"config-name" may refer to a file that exists in ${bees_config_dir:-/etc/bees}
with a .conf extension; if that file does not specify UUID, findmnt will be
used in addition.
Note that while config files may presently use shell arithmetic, use of this
functionality is not encouraged going forward: Setting ''idxSizeMB=4096'' is
preferred over ''DB_SIZE=$((1024*1024*1024*4))'' or ''DB_SIZE=$(( AL16M * 256 ))'',
although both of these are presently supported.
If fsSpec contains a /, it assumed to be a mount point to be looked up by
findmnt, not a config file name.
daemon-options are passed directly through to the daemon on startup, as
documented at https://github.com/Zygo/bees/blob/master/docs/options.md.
EOF
exit 1
}
die() { echo "$*" >&2; exit 1; }
allConfigNames=( blockdev fsSpec home idxSize idxSizeMB mntDir runDir status verbosity workDir )
# Alternate names for configuration values; "bees_" will always be prepended
declare -A altConfigNames=(
# from original bees wrapper
[BEESHOME]=home
[BEESSTATUS]=status
[MNT_DIR]=mntDir
[UUID]=uuid
[WORK_DIR]=runDir
[DB_SIZE]=idxSize
)
# legacy bees config files can be arbitrary shell scripts, so we need to actually evaluate them
sandboxedConfigFileEval() {
bash_exe=$(type -P bash) || exit
PATH=/var/empty ENV='' BASH_ENV='' AL128K="$((128*1024))" AL16M="$((16*1024*1024))" "$bash_exe" -r ${bees_debug+-x} \
-c 'eval "$(</dev/stdin)" >&2; for var; do [[ ${!var} ]] && printf "%q=%s\\0" "$var" "${!var}"; done' \
"${!altConfigNames[@]}" "${allConfigNames[@]}" \
<"$1"
}
readConfigFileIfExists() {
local line
[[ -s $1 ]] || return 1
while IFS= read -r -d '' line; do
line=${line%%+([[:space:]])"#"*}
[[ $line ]] || continue
[[ $line = *=* ]] || {
printf 'WARNING: Config file line not recognized: %q\n' "$line" >&2
continue
}
set_option "$line"
done < <(sandboxedConfigFileEval "$1")
}
set_option() {
local k v
k="${1%%=*}" v="${1#*=}"
[[ ${altConfigNames[$k]} ]] && k=${altConfigNames[$k]}
printf -v "bees_$k" %s "$v"
}
uuid_re='^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$'
# Shared code for setting configuration used by other operations.
#
# Reads from global associative array "opts" containing options passed in as
# key=value pairs on the command line, looks for config-file overrides, and
# sets individual global variables.
_setup() {
declare fstype
bees_fsSpec=$1; shift
# Look for file-based configuration, additional to honoring configuration on the command line
bees_config_dir="${bees_config_dir:-/etc/bees}"
if [[ $bees_fsSpec =~ $uuid_re ]]; then
bees_uuid=$bees_fsSpec
# If our spec looks like a bare UUID, and no config file exists in the new
# format, fall back to legacy config file search mechanism (grep; ewww).
if ! readConfigFileIfExists "$bees_config_dir/UUID=$bees_fsSpec.conf"; then
# Legacy approach to finding a config file: Grep for a *.conf file
# containing the UUID within its text. Permitting spaces around the "="
# appears to be a bug, but is retained for compatibility with the
# original upstream script.
allConfFiles=( "$bees_config_dir"/*.conf )
if (( ${#allConfFiles[@]} )); then
# in read or readarray with -d '', the NUL terminating the empty string is used as delimiter character.
readarray -d '' -t matchingConfFiles < <(grep -E -l -Z "^[^#]*UUID[[:space:]]*=[[:space:]]*" "${allConfFiles[@]}")
else
matchingConfFiles=( )
fi
if (( ${#matchingConfFiles[@]} == 1 )); then
# Exactly one configuration file exists in our target directory with a reference to the UUID given.
bees_config_file=${matchingConfFiles[0]}
readConfigFileIfExists "$bees_config_file"
echo "NOTE: Please consider renaming $bees_config_file to $bees_config_dir/UUID=$bees_fsSpec" >&2
echo " ...and passing UUID=$bees_fsSpec on startup." >&2
elif (( ${#matchingConfFiles[@]} > 1 )); then
# The legacy wrapper would silently use the first file and ignore
# others, but... no.
echo "ERROR: Passed a bare UUID, but multiple configuration files match it:" >&2
printf ' - %q\n' "${matchingConfFiles[@]}" >&2
die "Unable to continue."
fi
fi
else
# For a non-UUID fsSpec that is not a path, look only for a config file
# exactly matching its text.
#
# (Passing a mount point as a fsSpec is only supported with the new
# wrapper; all key=value pairs can be passed on the command line in this
# mode, so config file support is not needed).
[[ $bees_fsSpec = */* ]] || readConfigFileIfExists "$bees_config_dir/$bees_fsSpec.conf"
fi
[[ $bees_uuid ]] || {
# if bees_uuid is not in our .conf file, look it up with findmnt
read -r bees_uuid fstype < <(findmnt -n -o uuid,fstype "$bees_fsSpec") && [[ $fstype ]] || exit
[[ $fstype = btrfs ]] || die "Device type is $fstype, not btrfs"
}
[[ $bees_uuid = */* ]] || readConfigFileIfExists "$bees_config_dir/UUID=$bees_uuid.conf"
# Honor any values read from config files above; otherwise, set defaults.
bees_workDir="${bees_workDir:-.beeshome}"
bees_runDir="${bees_runDir:-/run/bees}"
bees_mntDir="${bees_mntDir:-$bees_runDir/mnt/$bees_uuid}"
bees_home="${bees_home:-$bees_mntDir/$bees_workDir}"
bees_status="${bees_status:-${bees_runDir}/$bees_uuid.status}"
bees_verbosity="${bees_verbosity:-6}"
bees_idxSizeMB="${bees_idxSizeMB:-1024}"
bees_idxSize=${bees_idxSize:-"$(( bees_idxSizeMB * 1024 * 1024 ))"}
bees_blockdev=${bees_blockdev:-"/dev/disk/by-uuid/$bees_uuid"}
[[ -b $bees_blockdev ]] || die "Block device $bees_blockdev missing"
(( bees_idxSize % (16 * 1024 * 1024) == 0 )) || die "DB size must be divisible by 16MB"
}
do_run() {
local db old_db_size
_setup "$1"; shift
mkdir -p -- "$bees_mntDir" || exit
# subvol id 5 is reserved for the root subvolume of a btrfs filesystem.
mountpoint -q "$bees_mntDir" || mount -osubvolid=5 -- "$bees_blockdev" "$bees_mntDir" || exit
if [[ -d $bees_home ]]; then
btrfs subvolume show "$bees_home" >/dev/null 2>&1 || die "$bees_home exists but is not a subvolume"
else
btrfs subvolume create "$bees_home" || exit
sync # workaround for Zygo/bees#93
fi
db=$bees_home/beeshash.dat
touch -- "$db"
old_db_size=$(stat -c %s -- "$db")
new_db_size=$bees_idxSize
if (( old_db_size != new_db_size )); then
rm -f -- "$bees_home"/beescrawl."$bees_uuid".dat
truncate -s "$new_db_size" -- "$db" || exit
fi
chmod 700 -- "$bees_home"
# BEESSTATUS and BEESHOME are the only variables handled by the legacy
# wrapper for which getenv() is called in C code.
BEESSTATUS=$bees_status BEESHOME=$bees_home exec "${beesd_bin:-/lib/bees/bees}" \
--verbose "$bees_verbosity" \
"$@" "$bees_mntDir" || exit
}
do_cleanup() {
_setup "$1"; shift
mountpoint -q "$bees_mntDir" && umount -l -- "$bees_mntDir" || exit
}
(( $# >= 2 )) || usage
declare -f "do_$1" >/dev/null 2>&1 || usage
mode=$1; shift # must be a do_* function; currently "run" or "cleanup"
declare -a args=( "$1" ); shift # pass first argument (config-name|fsSpec) through literally
# parse other arguments as key=value pairs, or pass them through literally if they do not match that form.
# similarly, any option after "--" will be passed through literally.
while (( $# )); do
if [[ $1 = *=* ]]; then
set_option "$1"
elif [[ $1 = -- ]]; then
shift
args+=( "$@" )
break
else
args+=( "$1" )
fi
shift
done
"do_$mode" "${args[@]}"

View File

@ -0,0 +1,69 @@
{ stdenv, runCommand, makeWrapper, fetchFromGitHub, bash, btrfs-progs, coreutils, pythonPackages, utillinux }:
let
version = "0.6.1";
sha256 = "0h7idclmhyp14mq6786x7f2237vqpn70gyi88ik4g70xl84yfgyh";
bees = stdenv.mkDerivation rec {
name = "bees-${version}";
inherit version;
src = fetchFromGitHub {
owner = "Zygo";
repo = "bees";
rev = "v${version}";
inherit sha256;
};
buildInputs = [
btrfs-progs # for btrfs/ioctl.h
utillinux # for uuid.h
];
nativeBuildInputs = [
pythonPackages.markdown # documentation build
];
preBuild = ''
git() { if [[ $1 = describe ]]; then echo ${version}; else command git "$@"; fi; }
export -f git
'';
postBuild = ''
unset -f git
'';
buildFlags = [
"ETC_PREFIX=/var/run/bees/configs"
];
makeFlags = [
"SHELL=bash"
"PREFIX=$(out)"
"ETC_PREFIX=$(out)/etc"
"BEES_VERSION=${version}"
"SYSTEMD_SYSTEM_UNIT_DIR=$(out)/etc/systemd/system"
];
meta = with stdenv.lib; {
homepage = "https://github.com/Zygo/bees";
description = "Block-oriented BTRFS deduplication service";
license = licenses.gpl3;
platforms = platforms.linux;
maintainers = with maintainers; [ chaduffy ];
longDescription = "Best-Effort Extent-Same: bees finds not just identical files, but also identical extents within files that differ";
};
};
in
runCommand "bees-service-${version}" {
inherit bash bees coreutils utillinux;
btrfsProgs = btrfs-progs; # needs to be a valid shell variable name
} ''
mkdir -p -- "$out/bin"
substituteAll ${./bees-service-wrapper} "$out"/bin/bees-service-wrapper
chmod +x "$out"/bin/bees-service-wrapper
ln -s ${bees}/bin/beesd "$out"/bin/beesd
''

View File

@ -21936,6 +21936,8 @@ with pkgs;
beep = callPackage ../misc/beep { };
bees = callPackage ../tools/filesystems/bees { };
blackbird = callPackage ../misc/themes/blackbird { };
bootil = callPackage ../development/libraries/bootil { };