Merge pull request #255025 from tweag/fileset.union

`lib.fileset.union`, `lib.fileset.unions`: init
This commit is contained in:
Robert Hensing 2023-09-21 11:49:57 +02:00 committed by GitHub
commit 5c97f01a9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 612 additions and 177 deletions

View File

@ -9,7 +9,7 @@ File sets are easy and safe to use, providing obvious and composable semantics w
These sections apply to the entire library.
See the [function reference](#sec-functions-library-fileset) for function-specific documentation.
The file set library is currently very limited but is being expanded to include more functions over time.
The file set library is currently somewhat limited but is being expanded to include more functions over time.
## Implicit coercion from paths to file sets {#sec-fileset-path-coercion}

View File

@ -41,13 +41,21 @@ An attribute set with these values:
- `_type` (constant string `"fileset"`):
Tag to indicate this value is a file set.
- `_internalVersion` (constant string equal to the current version):
Version of the representation
- `_internalVersion` (constant `2`, the current version):
Version of the representation.
- `_internalBase` (path):
Any files outside of this path cannot influence the set of files.
This is always a directory.
- `_internalBaseRoot` (path):
The filesystem root of `_internalBase`, same as `(lib.path.splitRoot _internalBase).root`.
This is here because this needs to be computed anyway, and this computation shouldn't be duplicated.
- `_internalBaseComponents` (list of strings):
The path components of `_internalBase`, same as `lib.path.subpath.components (lib.path.splitRoot _internalBase).subpath`.
This is here because this needs to be computed anyway, and this computation shouldn't be duplicated.
- `_internalTree` ([filesetTree](#filesettree)):
A tree representation of all included files under `_internalBase`.
@ -59,8 +67,8 @@ An attribute set with these values:
One of the following:
- `{ <name> = filesetTree; }`:
A directory with a nested `filesetTree` value for every directory entry.
Even entries that aren't included are present as `null` because it improves laziness and allows using this as a sort of `builtins.readDir` cache.
A directory with a nested `filesetTree` value for directory entries.
Entries not included may either be omitted or set to `null`, as necessary to improve efficiency or laziness.
- `"directory"`:
A directory with all its files included recursively, allowing early cutoff for some operations.
@ -169,15 +177,9 @@ Arguments:
## To update in the future
Here's a list of places in the library that need to be updated in the future:
- > The file set library is currently very limited but is being expanded to include more functions over time.
- > The file set library is currently somewhat limited but is being expanded to include more functions over time.
in [the manual](../../doc/functions/fileset.section.md)
- > Currently the only way to construct file sets is using implicit coercion from paths.
in [the `toSource` reference](./default.nix)
- > For now filesets are always paths
in [the `toSource` implementation](./default.nix), also update the variable name there
- Once a tracing function exists, `__noEval` in [internal.nix](./internal.nix) should mention it
- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed
- If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function

View File

@ -1,4 +1,6 @@
#!/usr/bin/env bash
#!/usr/bin/env nix-shell
#!nix-shell -i bash -p sta jq bc nix -I nixpkgs=../..
# shellcheck disable=SC2016
# Benchmarks lib.fileset
# Run:
@ -28,38 +30,6 @@ work="$tmp/work"
mkdir "$work"
cd "$work"
# Create a fairly populated tree
touch f{0..5}
mkdir d{0..5}
mkdir e{0..5}
touch d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}/d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}/e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/d{0..5}/d{0..5}/f{0..5}
bench() {
NIX_PATH=nixpkgs=$1 NIX_SHOW_STATS=1 NIX_SHOW_STATS_PATH=$tmp/stats.json \
nix-instantiate --eval --strict --show-trace >/dev/null \
--expr '(import <nixpkgs/lib>).fileset.toSource { root = ./.; fileset = ./.; }'
cat "$tmp/stats.json"
}
echo "Running benchmark on index" >&2
bench "$nixpkgs" > "$tmp/new.json"
(
echo "Checking out $compareTo" >&2
git -C "$nixpkgs" worktree add --quiet "$tmp/worktree" "$compareTo"
trap 'git -C "$nixpkgs" worktree remove "$tmp/worktree"' EXIT
echo "Running benchmark on $compareTo" >&2
bench "$tmp/worktree" > "$tmp/old.json"
)
declare -a stats=(
".envs.elements"
".envs.number"
@ -77,18 +47,94 @@ declare -a stats=(
".values.number"
)
different=0
for stat in "${stats[@]}"; do
oldValue=$(jq "$stat" "$tmp/old.json")
newValue=$(jq "$stat" "$tmp/new.json")
if (( oldValue != newValue )); then
percent=$(bc <<< "scale=100; result = 100/$oldValue*$newValue; scale=4; result / 1")
if (( oldValue < newValue )); then
echo -e "Statistic $stat ($newValue) is \e[0;31m$percent% (+$(( newValue - oldValue )))\e[0m of the old value $oldValue" >&2
else
echo -e "Statistic $stat ($newValue) is \e[0;32m$percent% (-$(( oldValue - newValue )))\e[0m of the old value $oldValue" >&2
runs=10
run() {
# Empty the file
: > cpuTimes
for i in $(seq 0 "$runs"); do
NIX_PATH=nixpkgs=$1 NIX_SHOW_STATS=1 NIX_SHOW_STATS_PATH=$tmp/stats.json \
nix-instantiate --eval --strict --show-trace >/dev/null \
--expr 'with import <nixpkgs/lib>; with fileset; '"$2"
# Only measure the time after the first run, one is warmup
if (( i > 0 )); then
jq '.cpuTime' "$tmp/stats.json" >> cpuTimes
fi
(( different++ )) || true
fi
done
echo "$different stats differ between the current tree and $compareTo"
done
# Compute mean and standard deviation
read -r mean sd < <(sta --mean --sd --brief <cpuTimes)
jq --argjson mean "$mean" --argjson sd "$sd" \
'.cpuTimeMean = $mean | .cpuTimeSd = $sd' \
"$tmp/stats.json"
}
bench() {
echo "Benchmarking expression $1" >&2
#echo "Running benchmark on index" >&2
run "$nixpkgs" "$1" > "$tmp/new.json"
(
#echo "Checking out $compareTo" >&2
git -C "$nixpkgs" worktree add --quiet "$tmp/worktree" "$compareTo"
trap 'git -C "$nixpkgs" worktree remove "$tmp/worktree"' EXIT
#echo "Running benchmark on $compareTo" >&2
run "$tmp/worktree" "$1" > "$tmp/old.json"
)
read -r oldMean oldSd newMean newSd percentageMean percentageSd < \
<(jq -rn --slurpfile old "$tmp/old.json" --slurpfile new "$tmp/new.json" \
' $old[0].cpuTimeMean as $om
| $old[0].cpuTimeSd as $os
| $new[0].cpuTimeMean as $nm
| $new[0].cpuTimeSd as $ns
| (100 / $om * $nm) as $pm
# Copied from https://github.com/sharkdp/hyperfine/blob/b38d550b89b1dab85139eada01c91a60798db9cc/src/benchmark/relative_speed.rs#L46-L53
| ($pm * pow(pow($ns / $nm; 2) + pow($os / $om; 2); 0.5)) as $ps
| [ $om, $os, $nm, $ns, $pm, $ps ]
| @sh')
echo -e "Mean CPU time $newMean (σ = $newSd) for $runs runs is \e[0;33m$percentageMean% (σ = $percentageSd%)\e[0m of the old value $oldMean (σ = $oldSd)" >&2
different=0
for stat in "${stats[@]}"; do
oldValue=$(jq "$stat" "$tmp/old.json")
newValue=$(jq "$stat" "$tmp/new.json")
if (( oldValue != newValue )); then
percent=$(bc <<< "scale=100; result = 100/$oldValue*$newValue; scale=4; result / 1")
if (( oldValue < newValue )); then
echo -e "Statistic $stat ($newValue) is \e[0;31m$percent% (+$(( newValue - oldValue )))\e[0m of the old value $oldValue" >&2
else
echo -e "Statistic $stat ($newValue) is \e[0;32m$percent% (-$(( oldValue - newValue )))\e[0m of the old value $oldValue" >&2
fi
(( different++ )) || true
fi
done
echo "$different stats differ between the current tree and $compareTo"
echo ""
}
# Create a fairly populated tree
touch f{0..5}
mkdir d{0..5}
mkdir e{0..5}
touch d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}/d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}/e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/d{0..5}/d{0..5}/f{0..5}
bench 'toSource { root = ./.; fileset = ./.; }'
rm -rf -- *
touch {0..1000}
bench 'toSource { root = ./.; fileset = unions (mapAttrsToList (name: value: ./. + "/${name}") (builtins.readDir ./.)); }'
rm -rf -- *

View File

@ -3,15 +3,22 @@ let
inherit (import ./internal.nix { inherit lib; })
_coerce
_coerceMany
_toSourceFilter
_unionMany
;
inherit (builtins)
isList
isPath
pathExists
typeOf
;
inherit (lib.lists)
imap0
;
inherit (lib.path)
hasPrefix
splitRoot
@ -29,6 +36,10 @@ let
cleanSourceWith
;
inherit (lib.trivial)
pipe
;
in {
/*
@ -51,16 +62,51 @@ in {
} -> SourceLike
Example:
# Import the current directory into the store but only include files under ./src
toSource { root = ./.; fileset = ./src; }
# Import the current directory into the store
# but only include files under ./src
toSource {
root = ./.;
fileset = ./src;
}
=> "/nix/store/...-source"
# The file set coerced from path ./bar could contain files outside the root ./foo, which is not allowed
toSource { root = ./foo; fileset = ./bar; }
# Import the current directory into the store
# but only include ./Makefile and all files under ./src
toSource {
root = ./.;
fileset = union
./Makefile
./src;
}
=> "/nix/store/...-source"
# Trying to include a file outside the root will fail
toSource {
root = ./.;
fileset = unions [
./Makefile
./src
../LICENSE
];
}
=> <error>
# The root needs to point to a directory that contains all the files
toSource {
root = ../.;
fileset = unions [
./Makefile
./src
../LICENSE
];
}
=> "/nix/store/...-source"
# The root has to be a local filesystem path
toSource { root = "/nix/store/...-source"; fileset = ./.; }
toSource {
root = "/nix/store/...-source";
fileset = ./.;
}
=> <error>
*/
toSource = {
@ -69,7 +115,7 @@ in {
Paths in [strings](https://nixos.org/manual/nix/stable/language/values.html#type-string), including Nix store paths, cannot be passed as `root`.
`root` has to be a directory.
<!-- Ignore the indentation here, this is a nixdoc rendering bug that needs to be fixed -->
<!-- Ignore the indentation here, this is a nixdoc rendering bug that needs to be fixed: https://github.com/nix-community/nixdoc/issues/75 -->
:::{.note}
Changing `root` only affects the directory structure of the resulting store path, it does not change which files are added to the store.
The only way to change which files get added to the store is by changing the `fileset` attribute.
@ -78,25 +124,32 @@ The only way to change which files get added to the store is by changing the `fi
root,
/*
(required) The file set whose files to import into the store.
Currently the only way to construct file sets is using [implicit coercion from paths](#sec-fileset-path-coercion).
If a directory does not recursively contain any file, it is omitted from the store path contents.
File sets can be created using other functions in this library.
This argument can also be a path,
which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
<!-- Ignore the indentation here, this is a nixdoc rendering bug that needs to be fixed: https://github.com/nix-community/nixdoc/issues/75 -->
:::{.note}
If a directory does not recursively contain any file, it is omitted from the store path contents.
:::
*/
fileset,
}:
let
# We cannot rename matched attribute arguments, so let's work around it with an extra `let in` statement
# For now filesets are always paths
filesetPath = fileset;
filesetArg = fileset;
in
let
fileset = _coerce "lib.fileset.toSource: `fileset`" filesetPath;
fileset = _coerce "lib.fileset.toSource: `fileset`" filesetArg;
rootFilesystemRoot = (splitRoot root).root;
filesetFilesystemRoot = (splitRoot fileset._internalBase).root;
sourceFilter = _toSourceFilter fileset;
in
if ! isPath root then
if isStringLike root then
throw ''
lib.fileset.toSource: `root` "${toString root}" is a string-like value, but it should be a path instead.
lib.fileset.toSource: `root` ("${toString root}") is a string-like value, but it should be a path instead.
Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
else
throw ''
@ -105,27 +158,124 @@ The only way to change which files get added to the store is by changing the `fi
# See also ../path/README.md
else if rootFilesystemRoot != filesetFilesystemRoot then
throw ''
lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` "${toString root}":
lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` ("${toString root}"):
`root`: root "${toString rootFilesystemRoot}"
`fileset`: root "${toString filesetFilesystemRoot}"
Different roots are not supported.''
else if ! pathExists root then
throw ''
lib.fileset.toSource: `root` ${toString root} does not exist.''
lib.fileset.toSource: `root` (${toString root}) does not exist.''
else if pathType root != "directory" then
throw ''
lib.fileset.toSource: `root` ${toString root} is a file, but it should be a directory instead. Potential solutions:
lib.fileset.toSource: `root` (${toString root}) is a file, but it should be a directory instead. Potential solutions:
- If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function.
- If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as ${toString (dirOf root)}, and set `fileset` to the file path.''
else if ! hasPrefix root fileset._internalBase then
throw ''
lib.fileset.toSource: `fileset` could contain files in ${toString fileset._internalBase}, which is not under the `root` ${toString root}. Potential solutions:
lib.fileset.toSource: `fileset` could contain files in ${toString fileset._internalBase}, which is not under the `root` (${toString root}). Potential solutions:
- Set `root` to ${toString fileset._internalBase} or any directory higher up. This changes the layout of the resulting store path.
- Set `fileset` to a file set that cannot contain files outside the `root` ${toString root}. This could change the files included in the result.''
- Set `fileset` to a file set that cannot contain files outside the `root` (${toString root}). This could change the files included in the result.''
else
builtins.seq sourceFilter
cleanSourceWith {
name = "source";
src = root;
filter = _toSourceFilter fileset;
filter = sourceFilter;
};
/*
The file set containing all files that are in either of two given file sets.
This is the same as [`unions`](#function-library-lib.fileset.unions),
but takes just two file sets instead of a list.
See also [Union (set theory)](https://en.wikipedia.org/wiki/Union_(set_theory)).
The given file sets are evaluated as lazily as possible,
with the first argument being evaluated first if needed.
Type:
union :: FileSet -> FileSet -> FileSet
Example:
# Create a file set containing the file `Makefile`
# and all files recursively in the `src` directory
union ./Makefile ./src
# Create a file set containing the file `Makefile`
# and the LICENSE file from the parent directory
union ./Makefile ../LICENSE
*/
union =
# The first file set.
# This argument can also be a path,
# which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
fileset1:
# The second file set.
# This argument can also be a path,
# which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
fileset2:
_unionMany
(_coerceMany "lib.fileset.union" [
{
context = "first argument";
value = fileset1;
}
{
context = "second argument";
value = fileset2;
}
]);
/*
The file set containing all files that are in any of the given file sets.
This is the same as [`union`](#function-library-lib.fileset.unions),
but takes a list of file sets instead of just two.
See also [Union (set theory)](https://en.wikipedia.org/wiki/Union_(set_theory)).
The given file sets are evaluated as lazily as possible,
with earlier elements being evaluated first if needed.
Type:
unions :: [ FileSet ] -> FileSet
Example:
# Create a file set containing selected files
unions [
# Include the single file `Makefile` in the current directory
# This errors if the file doesn't exist
./Makefile
# Recursively include all files in the `src/code` directory
# If this directory is empty this has no effect
./src/code
# Include the files `run.sh` and `unit.c` from the `tests` directory
./tests/run.sh
./tests/unit.c
# Include the `LICENSE` file from the parent directory
../LICENSE
]
*/
unions =
# A list of file sets.
# Must contain at least 1 element.
# The elements can also be paths,
# which get [implicitly coerced to file sets](#sec-fileset-path-coercion).
filesets:
if ! isList filesets then
throw "lib.fileset.unions: Expected argument to be a list, but got a ${typeOf filesets}."
else if filesets == [ ] then
# TODO: This could be supported, but requires an extra internal representation for the empty file set, which would be special for not having a base path.
throw "lib.fileset.unions: Expected argument to be a list with at least one element, but it contains no elements."
else
pipe filesets [
# Annotate the elements with context, used by _coerceMany for better errors
(imap0 (i: el: {
context = "element ${toString i}";
value = el;
}))
(_coerceMany "lib.fileset.unions")
_unionMany
];
}

View File

@ -14,6 +14,8 @@ let
inherit (lib.attrsets)
attrValues
mapAttrs
setAttrByPath
zipAttrsWith
;
inherit (lib.filesystem)
@ -22,8 +24,16 @@ let
inherit (lib.lists)
all
commonPrefix
drop
elemAt
filter
findFirstIndex
foldl'
head
length
sublist
tail
;
inherit (lib.path)
@ -33,6 +43,7 @@ let
inherit (lib.path.subpath)
components
join
;
inherit (lib.strings)
@ -50,28 +61,61 @@ in
rec {
# If you change the internal representation, make sure to:
# - Update this version
# - Adjust _coerce to also accept and coerce older versions
# - Increment this version
# - Add an additional migration function below
# - Update the description of the internal representation in ./README.md
_currentVersion = 0;
_currentVersion = 2;
# Migrations between versions. The 0th element converts from v0 to v1, and so on
migrations = [
# Convert v0 into v1: Add the _internalBase{Root,Components} attributes
(
filesetV0:
let
parts = splitRoot filesetV0._internalBase;
in
filesetV0 // {
_internalVersion = 1;
_internalBaseRoot = parts.root;
_internalBaseComponents = components parts.subpath;
}
)
# Convert v1 into v2: filesetTree's can now also omit attributes to signal paths not being included
(
filesetV1:
# This change is backwards compatible (but not forwards compatible, so we still need a new version)
filesetV1 // {
_internalVersion = 2;
}
)
];
# Create a fileset, see ./README.md#fileset
# Type: path -> filesetTree -> fileset
_create = base: tree: {
_type = "fileset";
_create = base: tree:
let
# Decompose the base into its components
# See ../path/README.md for why we're not just using `toString`
parts = splitRoot base;
in
{
_type = "fileset";
_internalVersion = _currentVersion;
_internalBase = base;
_internalTree = tree;
_internalVersion = _currentVersion;
_internalBase = base;
_internalBaseRoot = parts.root;
_internalBaseComponents = components parts.subpath;
_internalTree = tree;
# Double __ to make it be evaluated and ordered first
__noEval = throw ''
lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'';
};
# Double __ to make it be evaluated and ordered first
__noEval = throw ''
lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'';
};
# Coerce a value to a fileset, erroring when the value cannot be coerced.
# The string gives the context for error messages.
# Type: String -> Path -> fileset
# Type: String -> (fileset | Path) -> fileset
_coerce = context: value:
if value._type or "" == "fileset" then
if value._internalVersion > _currentVersion then
@ -80,22 +124,53 @@ rec {
- Internal version of the file set: ${toString value._internalVersion}
- Internal version of the library: ${toString _currentVersion}
Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.''
else if value._internalVersion < _currentVersion then
let
# Get all the migration functions necessary to convert from the old to the current version
migrationsToApply = sublist value._internalVersion (_currentVersion - value._internalVersion) migrations;
in
foldl' (value: migration: migration value) value migrationsToApply
else
value
else if ! isPath value then
if isStringLike value then
throw ''
${context} "${toString value}" is a string-like value, but it should be a path instead.
${context} ("${toString value}") is a string-like value, but it should be a path instead.
Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
else
throw ''
${context} is of type ${typeOf value}, but it should be a path instead.''
else if ! pathExists value then
throw ''
${context} ${toString value} does not exist.''
${context} (${toString value}) does not exist.''
else
_singleton value;
# Coerce many values to filesets, erroring when any value cannot be coerced,
# or if the filesystem root of the values doesn't match.
# Type: String -> [ { context :: String, value :: fileset | Path } ] -> [ fileset ]
_coerceMany = functionContext: list:
let
filesets = map ({ context, value }:
_coerce "${functionContext}: ${context}" value
) list;
firstBaseRoot = (head filesets)._internalBaseRoot;
# Finds the first element with a filesystem root different than the first element, if any
differentIndex = findFirstIndex (fileset:
firstBaseRoot != fileset._internalBaseRoot
) null filesets;
in
if differentIndex != null then
throw ''
${functionContext}: Filesystem roots are not the same:
${(head list).context}: root "${toString firstBaseRoot}"
${(elemAt list differentIndex).context}: root "${toString (elemAt filesets differentIndex)._internalBaseRoot}"
Different roots are not supported.''
else
filesets;
# Create a file set from a path.
# Type: Path -> fileset
_singleton = path:
@ -109,50 +184,23 @@ rec {
# - _internalBase: ./.
# - _internalTree: {
# "default.nix" = <type>;
# # Other directory entries
# <name> = null;
# }
# See ./README.md#single-files
_create (dirOf path)
(_nestTree
(dirOf path)
[ (baseNameOf path) ]
type
);
{
${baseNameOf path} = type;
};
/*
Nest a filesetTree under some extra components, while filling out all the other directory entries that aren't included with null
_nestTree ./. [ "foo" "bar" ] tree == {
foo = {
bar = tree;
<other-entries> = null;
}
<other-entries> = null;
}
Type: Path -> [ String ] -> filesetTree -> filesetTree
*/
_nestTree = targetBase: extraComponents: tree:
let
recurse = index: focusPath:
if index == length extraComponents then
tree
else
mapAttrs (_: _: null) (readDir focusPath)
// {
${elemAt extraComponents index} = recurse (index + 1) (append focusPath (elemAt extraComponents index));
};
in
recurse 0 targetBase;
# Expand "directory" filesetTree representation to the equivalent { <name> = filesetTree; }
# Expand a directory representation to an equivalent one in attribute set form.
# All directory entries are included in the result.
# Type: Path -> filesetTree -> { <name> = filesetTree; }
_directoryEntries = path: value:
if isAttrs value then
value
if value == "directory" then
readDir path
else
readDir path;
# Set all entries not present to null
mapAttrs (name: value: null) (readDir path)
// value;
/*
Simplify a filesetTree recursively:
@ -193,17 +241,13 @@ rec {
# which has the effect that they aren't included in the result
tree = _simplifyTree fileset._internalBase fileset._internalTree;
# Decompose the base into its components
# See ../path/README.md for why we're not just using `toString`
baseComponents = components (splitRoot fileset._internalBase).subpath;
# The base path as a string with a single trailing slash
baseString =
if baseComponents == [] then
if fileset._internalBaseComponents == [] then
# Need to handle the filesystem root specially
"/"
else
"/" + concatStringsSep "/" baseComponents + "/";
"/" + concatStringsSep "/" fileset._internalBaseComponents + "/";
baseLength = stringLength baseString;
@ -266,9 +310,73 @@ rec {
in
# Special case because the code below assumes that the _internalBase is always included in the result
# which shouldn't be done when we have no files at all in the base
# This also forces the tree before returning the filter, leads to earlier error messages
if tree == null then
empty
else
nonEmpty;
# Computes the union of a list of filesets.
# The filesets must already be coerced and validated to be in the same filesystem root
# Type: [ Fileset ] -> Fileset
_unionMany = filesets:
let
first = head filesets;
# To be able to union filesetTree's together, they need to have the same base path.
# Base paths can be unioned by taking their common prefix,
# e.g. such that `union /foo/bar /foo/baz` has the base path `/foo`
# A list of path components common to all base paths.
# Note that commonPrefix can only be fully evaluated,
# so this cannot cause a stack overflow due to a build-up of unevaluated thunks.
commonBaseComponents = foldl'
(components: el: commonPrefix components el._internalBaseComponents)
first._internalBaseComponents
# We could also not do the `tail` here to avoid a list allocation,
# but then we'd have to pay for a potentially expensive
# but unnecessary `commonPrefix` call
(tail filesets);
# The common base path assembled from a filesystem root and the common components
commonBase = append first._internalBaseRoot (join commonBaseComponents);
# A list of filesetTree's that all have the same base path
# This is achieved by nesting the trees into the components they have over the common base path
# E.g. `union /foo/bar /foo/baz` has the base path /foo
# So the tree under `/foo/bar` gets nested under `{ bar = ...; ... }`,
# while the tree under `/foo/baz` gets nested under `{ baz = ...; ... }`
# Therefore allowing combined operations over them.
trees = map (fileset:
setAttrByPath
(drop (length commonBaseComponents) fileset._internalBaseComponents)
fileset._internalTree
) filesets;
# Folds all trees together into a single one using _unionTree
# We do not use a fold here because it would cause a thunk build-up
# which could cause a stack overflow for a large number of trees
resultTree = _unionTrees trees;
in
_create commonBase resultTree;
# The union of multiple filesetTree's with the same base path.
# Later elements are only evaluated if necessary.
# Type: [ filesetTree ] -> filesetTree
_unionTrees = trees:
let
stringIndex = findFirstIndex isString null trees;
withoutNull = filter (tree: tree != null) trees;
in
if stringIndex != null then
# If there's a string, it's always a fully included tree (dir or file),
# no need to look at other elements
elemAt trees stringIndex
else if withoutNull == [ ] then
# If all trees are null, then the resulting tree is also null
null
else
# The non-null elements have to be attribute sets representing partial trees
# We need to recurse into those
zipAttrsWith (name: _unionTrees) withoutNull;
}

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# shellcheck disable=SC2016
# Tests lib.fileset
# Run:
@ -50,27 +51,37 @@ with lib;
with internal;
with lib.fileset;'
# Check that a nix expression evaluates successfully (strictly, coercing to json, read-write-mode).
# The expression has `lib.fileset` in scope.
# If a second argument is provided, the result is checked against it as a regex.
# Otherwise, the result is output.
# Usage: expectSuccess NIX [REGEX]
expectSuccess() {
local expr=$1
if [[ "$#" -gt 1 ]]; then
local expectedResultRegex=$2
# Check that two nix expression successfully evaluate to the same value.
# The expressions have `lib.fileset` in scope.
# Usage: expectEqual NIX NIX
expectEqual() {
local actualExpr=$1
local expectedExpr=$2
if ! actualResult=$(nix-instantiate --eval --strict --show-trace \
--expr "$prefixExpression ($actualExpr)"); then
die "$actualExpr failed to evaluate, but it was expected to succeed"
fi
if ! expectedResult=$(nix-instantiate --eval --strict --show-trace \
--expr "$prefixExpression ($expectedExpr)"); then
die "$expectedExpr failed to evaluate, but it was expected to succeed"
fi
if [[ "$actualResult" != "$expectedResult" ]]; then
die "$actualExpr should have evaluated to $expectedExpr:\n$expectedResult\n\nbut it evaluated to\n$actualResult"
fi
}
# Check that a nix expression evaluates successfully to a store path and returns it (without quotes).
# The expression has `lib.fileset` in scope.
# Usage: expectStorePath NIX
expectStorePath() {
local expr=$1
if ! result=$(nix-instantiate --eval --strict --json --read-write-mode --show-trace \
--expr "$prefixExpression $expr"); then
--expr "$prefixExpression ($expr)"); then
die "$expr failed to evaluate, but it was expected to succeed"
fi
if [[ -v expectedResultRegex ]]; then
if [[ ! "$result" =~ $expectedResultRegex ]]; then
die "$expr should have evaluated to this regex pattern:\n\n$expectedResultRegex\n\nbut this was the actual result:\n\n$result"
fi
else
echo "$result"
fi
# This is safe because we assume to get back a store path in a string
crudeUnquoteJSON <<< "$result"
}
# Check that a nix expression fails to evaluate (strictly, coercing to json, read-write-mode).
@ -114,18 +125,19 @@ checkFileset() (
local fileset=$1
# Process the tree into separate arrays for included paths, excluded paths and excluded files.
# Also create all the paths in the local directory
local -a included=()
local -a excluded=()
local -a excludedFiles=()
# Track which paths need to be created
local -a dirsToCreate=()
local -a filesToCreate=()
for p in "${!tree[@]}"; do
# If keys end with a `/` we treat them as directories, otherwise files
if [[ "$p" =~ /$ ]]; then
mkdir -p "$p"
dirsToCreate+=("$p")
isFile=
else
mkdir -p "$(dirname "$p")"
touch "$p"
filesToCreate+=("$p")
isFile=1
fi
case "${tree[$p]}" in
@ -143,6 +155,19 @@ checkFileset() (
esac
done
# Create all the necessary paths.
# This is done with only a fixed number of processes,
# in order to not be too slow
# Though this does mean we're a bit limited with how many files can be created
if (( ${#dirsToCreate[@]} != 0 )); then
mkdir -p "${dirsToCreate[@]}"
fi
if (( ${#filesToCreate[@]} != 0 )); then
readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}")
mkdir -p "${parentsToCreate[@]}"
touch "${filesToCreate[@]}"
fi
# Start inotifywait in the background to monitor all excluded files (if any)
if [[ -n "$canMonitorFiles" ]] && (( "${#excludedFiles[@]}" != 0 )); then
coproc watcher {
@ -154,6 +179,7 @@ checkFileset() (
}
# This will trigger when this subshell exits, no matter if successful or not
# After exiting the subshell, the parent shell will continue executing
# shellcheck disable=SC2154
trap 'kill "${watcher_PID}"' exit
# Synchronously wait until inotifywait is ready
@ -164,8 +190,7 @@ checkFileset() (
# Call toSource with the fileset, triggering open events for all files that are added to the store
expression="toSource { root = ./.; fileset = $fileset; }"
# crudeUnquoteJSON is safe because we get back a store path in a string
storePath=$(expectSuccess "$expression" | crudeUnquoteJSON)
storePath=$(expectStorePath "$expression")
# Remove all files immediately after, triggering delete_self events for all of them
rm -rf -- *
@ -211,7 +236,7 @@ checkFileset() (
#### Error messages #####
# Absolute paths in strings cannot be passed as `root`
expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` "/nix/store/foobar" is a string-like value, but it should be a path instead.
expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` \("/nix/store/foobar"\) is a string-like value, but it should be a path instead.
\s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
# Only paths are accepted as `root`
@ -221,56 +246,65 @@ expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `r
mkdir -p {foo,bar}/mock-root
expectFailure 'with ((import <nixpkgs/lib>).extend (import <nixpkgs/lib/fileset/mock-splitRoot.nix>)).fileset;
toSource { root = ./foo/mock-root; fileset = ./bar/mock-root; }
' 'lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` "'"$work"'/foo/mock-root":
' 'lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` \("'"$work"'/foo/mock-root"\):
\s*`root`: root "'"$work"'/foo/mock-root"
\s*`fileset`: root "'"$work"'/bar/mock-root"
\s*Different roots are not supported.'
rm -rf *
# `root` needs to exist
expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `root` '"$work"'/a does not exist.'
expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `root` \('"$work"'/a\) does not exist.'
# `root` needs to be a file
touch a
expectFailure 'toSource { root = ./a; fileset = ./a; }' 'lib.fileset.toSource: `root` '"$work"'/a is a file, but it should be a directory instead. Potential solutions:
expectFailure 'toSource { root = ./a; fileset = ./a; }' 'lib.fileset.toSource: `root` \('"$work"'/a\) is a file, but it should be a directory instead. Potential solutions:
\s*- If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function.
\s*- If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as '"$work"', and set `fileset` to the file path.'
rm -rf *
# The fileset argument should be evaluated, even if the directory is empty
expectFailure 'toSource { root = ./.; fileset = abort "This should be evaluated"; }' 'evaluation aborted with the following error message: '\''This should be evaluated'\'
# Only paths under `root` should be able to influence the result
mkdir a
expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` '"$work"'/a. Potential solutions:
expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` \('"$work"'/a\). Potential solutions:
\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path.
\s*- Set `fileset` to a file set that cannot contain files outside the `root` '"$work"'/a. This could change the files included in the result.'
\s*- Set `fileset` to a file set that cannot contain files outside the `root` \('"$work"'/a\). This could change the files included in the result.'
rm -rf *
# Path coercion only works for paths
expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a path instead.'
expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` "/some/path" is a string-like value, but it should be a path instead.
expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` \("/some/path"\) is a string-like value, but it should be a path instead.
\s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
# Path coercion errors for non-existent paths
expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` '"$work"'/a does not exist.'
expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) does not exist.'
# File sets cannot be evaluated directly
expectFailure '_create ./. null' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'
expectFailure 'union ./. ./.' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'
# Past versions of the internal representation are supported
expectEqual '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 0; _internalBase = ./.; }' \
'{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalVersion = 2; _type = "fileset"; }'
expectEqual '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 1; }' \
'{ _type = "fileset"; _internalVersion = 2; }'
# Future versions of the internal representation are unsupported
expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 1; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
\s*- Internal version of the file set: 1
\s*- Internal version of the library: 0
expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 3; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
\s*- Internal version of the file set: 3
\s*- Internal version of the library: 2
\s*Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.'
# _create followed by _coerce should give the inputs back without any validation
expectSuccess '{
inherit (_coerce "<test>" (_create "base" "tree"))
expectEqual '{
inherit (_coerce "<test>" (_create ./. "directory"))
_internalVersion _internalBase _internalTree;
}' '\{"_internalBase":"base","_internalTree":"tree","_internalVersion":0\}'
}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 2; }'
#### Resulting store path ####
# The store path name should be "source"
expectSuccess 'toSource { root = ./.; fileset = ./.; }' '"'"${NIX_STORE_DIR:-/nix/store}"'/.*-source"'
expectEqual 'toSource { root = ./.; fileset = ./.; }' 'sources.cleanSourceWith { name = "source"; src = ./.; }'
# We should be able to import an empty directory and end up with an empty result
tree=(
@ -341,9 +375,104 @@ checkFileset './c'
# Test the source filter for the somewhat special case of files in the filesystem root
# We can't easily test this with the above functions because we can't write to the filesystem root and we don't want to make any assumptions which files are there in the sandbox
expectSuccess '_toSourceFilter (_create /. null) "/foo" ""' 'false'
expectSuccess '_toSourceFilter (_create /. { foo = "regular"; }) "/foo" ""' 'true'
expectSuccess '_toSourceFilter (_create /. { foo = null; }) "/foo" ""' 'false'
expectEqual '_toSourceFilter (_create /. null) "/foo" ""' 'false'
expectEqual '_toSourceFilter (_create /. { foo = "regular"; }) "/foo" ""' 'true'
expectEqual '_toSourceFilter (_create /. { foo = null; }) "/foo" ""' 'false'
## lib.fileset.union, lib.fileset.unions
# Different filesystem roots in root and fileset are not supported
mkdir -p {foo,bar}/mock-root
expectFailure 'with ((import <nixpkgs/lib>).extend (import <nixpkgs/lib/fileset/mock-splitRoot.nix>)).fileset;
toSource { root = ./.; fileset = union ./foo/mock-root ./bar/mock-root; }
' 'lib.fileset.union: Filesystem roots are not the same:
\s*first argument: root "'"$work"'/foo/mock-root"
\s*second argument: root "'"$work"'/bar/mock-root"
\s*Different roots are not supported.'
expectFailure 'with ((import <nixpkgs/lib>).extend (import <nixpkgs/lib/fileset/mock-splitRoot.nix>)).fileset;
toSource { root = ./.; fileset = unions [ ./foo/mock-root ./bar/mock-root ]; }
' 'lib.fileset.unions: Filesystem roots are not the same:
\s*element 0: root "'"$work"'/foo/mock-root"
\s*element 1: root "'"$work"'/bar/mock-root"
\s*Different roots are not supported.'
rm -rf *
# Coercion errors show the correct context
expectFailure 'toSource { root = ./.; fileset = union ./a ./.; }' 'lib.fileset.union: first argument \('"$work"'/a\) does not exist.'
expectFailure 'toSource { root = ./.; fileset = union ./. ./b; }' 'lib.fileset.union: second argument \('"$work"'/b\) does not exist.'
expectFailure 'toSource { root = ./.; fileset = unions [ ./a ./. ]; }' 'lib.fileset.unions: element 0 \('"$work"'/a\) does not exist.'
expectFailure 'toSource { root = ./.; fileset = unions [ ./. ./b ]; }' 'lib.fileset.unions: element 1 \('"$work"'/b\) does not exist.'
# unions needs a list with at least 1 element
expectFailure 'toSource { root = ./.; fileset = unions null; }' 'lib.fileset.unions: Expected argument to be a list, but got a null.'
expectFailure 'toSource { root = ./.; fileset = unions [ ]; }' 'lib.fileset.unions: Expected argument to be a list with at least one element, but it contains no elements.'
# The tree of later arguments should not be evaluated if a former argument already includes all files
tree=()
checkFileset 'union ./. (_create ./. (abort "This should not be used!"))'
checkFileset 'unions [ ./. (_create ./. (abort "This should not be used!")) ]'
# union doesn't include files that weren't specified
tree=(
[x]=1
[y]=1
[z]=0
)
checkFileset 'union ./x ./y'
checkFileset 'unions [ ./x ./y ]'
# Also for directories
tree=(
[x/a]=1
[x/b]=1
[y/a]=1
[y/b]=1
[z/a]=0
[z/b]=0
)
checkFileset 'union ./x ./y'
checkFileset 'unions [ ./x ./y ]'
# And for very specific paths
tree=(
[x/a]=1
[x/b]=0
[y/a]=0
[y/b]=1
[z/a]=0
[z/b]=0
)
checkFileset 'union ./x/a ./y/b'
checkFileset 'unions [ ./x/a ./y/b ]'
# unions or chained union's can include more paths
tree=(
[x/a]=1
[x/b]=1
[y/a]=1
[y/b]=0
[z/a]=0
[z/b]=1
)
checkFileset 'unions [ ./x/a ./x/b ./y/a ./z/b ]'
checkFileset 'union (union ./x/a ./x/b) (union ./y/a ./z/b)'
checkFileset 'union (union (union ./x/a ./x/b) ./y/a) ./z/b'
# unions should not stack overflow, even if many elements are passed
tree=()
for i in $(seq 1000); do
tree[$i/a]=1
tree[$i/b]=0
done
(
# Locally limit the maximum stack size to 100 * 1024 bytes
# If unions was implemented recursively, this would stack overflow
ulimit -s 100
checkFileset 'unions (mapAttrsToList (name: _: ./. + "/${name}/a") (builtins.readDir ./.))'
)
# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets