diff --git a/pkgs/build-support/kernel/make-initrd-ng-tool.nix b/pkgs/build-support/kernel/make-initrd-ng-tool.nix new file mode 100644 index 000000000000..66ffc09d43cf --- /dev/null +++ b/pkgs/build-support/kernel/make-initrd-ng-tool.nix @@ -0,0 +1,9 @@ +{ rustPlatform }: + +rustPlatform.buildRustPackage { + pname = "make-initrd-ng"; + version = "0.1.0"; + + src = ./make-initrd-ng; + cargoLock.lockFile = ./make-initrd-ng/Cargo.lock; +} diff --git a/pkgs/build-support/kernel/make-initrd-ng.nix b/pkgs/build-support/kernel/make-initrd-ng.nix new file mode 100644 index 000000000000..9fd202c44847 --- /dev/null +++ b/pkgs/build-support/kernel/make-initrd-ng.nix @@ -0,0 +1,79 @@ +let + # Some metadata on various compression programs, relevant to naming + # the initramfs file and, if applicable, generating a u-boot image + # from it. + compressors = import ./initrd-compressor-meta.nix; + # Get the basename of the actual compression program from the whole + # compression command, for the purpose of guessing the u-boot + # compression type and filename extension. + compressorName = fullCommand: builtins.elemAt (builtins.match "([^ ]*/)?([^ ]+).*" fullCommand) 1; +in +{ stdenvNoCC, perl, cpio, ubootTools, lib, pkgsBuildHost, makeInitrdNGTool, patchelf, runCommand, glibc +# Name of the derivation (not of the resulting file!) +, name ? "initrd" + +# Program used to compress the cpio archive; use "cat" for no compression. +# This can also be a function which takes a package set and returns the path to the compressor, +# such as `pkgs: "${pkgs.lzop}/bin/lzop"`. +, compressor ? "gzip" +, _compressorFunction ? + if lib.isFunction compressor then compressor + else if ! builtins.hasContext compressor && builtins.hasAttr compressor compressors then compressors.${compressor}.executable + else _: compressor +, _compressorExecutable ? _compressorFunction pkgsBuildHost +, _compressorName ? compressorName _compressorExecutable +, _compressorMeta ? compressors.${_compressorName} or {} + +# List of arguments to pass to the compressor program, or null to use its defaults +, compressorArgs ? null +, _compressorArgsReal ? if compressorArgs == null then _compressorMeta.defaultArgs or [] else compressorArgs + +# Filename extension to use for the compressed initramfs. This is +# included for clarity, but $out/initrd will always be a symlink to +# the final image. +# If this isn't guessed, you may want to complete the metadata above and send a PR :) +, extension ? _compressorMeta.extension or + (throw "Unrecognised compressor ${_compressorName}, please specify filename extension") + +# List of { object = path_or_derivation; symlink = "/path"; } +# The paths are copied into the initramfs in their nix store path +# form, then linked at the root according to `symlink`. +, contents + +# List of uncompressed cpio files to prepend to the initramfs. This +# can be used to add files in specified paths without them becoming +# symlinks to store paths. +, prepend ? [] + +# Whether to wrap the initramfs in a u-boot image. +, makeUInitrd ? stdenvNoCC.hostPlatform.linux-kernel.target == "uImage" + +# If generating a u-boot image, the architecture to use. The default +# guess may not align with u-boot's nomenclature correctly, so it can +# be overridden. +# See https://gitlab.denx.de/u-boot/u-boot/-/blob/9bfb567e5f1bfe7de8eb41f8c6d00f49d2b9a426/common/image.c#L81-106 for a list. +, uInitrdArch ? stdenvNoCC.hostPlatform.linuxArch + +# The name of the compression, as recognised by u-boot. +# See https://gitlab.denx.de/u-boot/u-boot/-/blob/9bfb567e5f1bfe7de8eb41f8c6d00f49d2b9a426/common/image.c#L195-204 for a list. +# If this isn't guessed, you may want to complete the metadata above and send a PR :) +, uInitrdCompression ? _compressorMeta.ubootName or + (throw "Unrecognised compressor ${_compressorName}, please specify uInitrdCompression") +}: runCommand name { + compress = "${_compressorExecutable} ${lib.escapeShellArgs _compressorArgsReal}"; + passthru = { + compressorExecutableFunction = _compressorFunction; + compressorArgs = _compressorArgsReal; + }; + + passAsFile = ["contents"]; + contents = lib.concatMapStringsSep "\n" ({ object, symlink, ... }: "${object}\n${if symlink == null then "" else symlink}") contents + "\n"; + + nativeBuildInputs = [makeInitrdNGTool patchelf glibc cpio]; +} '' + mkdir ./root + make-initrd-ng "$contentsPath" ./root + mkdir "$out" + (cd root && find * .[^.*] -exec touch -h -d '@1' '{}' +) + (cd root && find * .[^.*] -print0 | sort -z | cpio -o -H newc -R +0:+0 --reproducible --null | eval -- $compress >> "$out/initrd") +'' diff --git a/pkgs/build-support/kernel/make-initrd-ng/Cargo.lock b/pkgs/build-support/kernel/make-initrd-ng/Cargo.lock new file mode 100644 index 000000000000..75e732029b51 --- /dev/null +++ b/pkgs/build-support/kernel/make-initrd-ng/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "make-initrd-ng" +version = "0.1.0" diff --git a/pkgs/build-support/kernel/make-initrd-ng/Cargo.toml b/pkgs/build-support/kernel/make-initrd-ng/Cargo.toml new file mode 100644 index 000000000000..9076f6b15617 --- /dev/null +++ b/pkgs/build-support/kernel/make-initrd-ng/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "make-initrd-ng" +version = "0.1.0" +authors = ["Will Fancher "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/pkgs/build-support/kernel/make-initrd-ng/README.md b/pkgs/build-support/kernel/make-initrd-ng/README.md new file mode 100644 index 000000000000..741eba67e43f --- /dev/null +++ b/pkgs/build-support/kernel/make-initrd-ng/README.md @@ -0,0 +1,79 @@ +# What is this for? + +NixOS's traditional initrd is generated by listing the paths that +should be included in initrd and copying the full runtime closure of +those paths into the archive. For most things, like almost any +executable, this involves copying the entirety of huge packages like +glibc, when only things like the shared library files are needed. To +solve this, NixOS does a variety of patchwork to edit the files being +copied in so they only refer to small, patched up paths. For instance, +executables and their shared library dependencies are copied into an +`extraUtils` derivation, and every ELF file is patched to refer to +files in that output. + +The problem with this is that it is often difficult to correctly patch +some things. For instance, systemd bakes the path to the `mount` +command into the binary, so patchelf is no help. Instead, it's very +often easier to simply copy the desired files to their original store +locations in initrd and not copy their entire runtime closure. This +does mean that it is the burden of the developer to ensure that all +necessary dependencies are copied in, as closures won't be +consulted. However, it is rare that full closures are actually +desirable, so in the traditional initrd, the developer was likely to +do manual work on patching the dependencies explicitly anyway. + +# How it works + +This program is similar to its inspiration (`find-libs` from the +traditional initrd), except that it also handles symlinks and +directories according to certain rules. As input, it receives a +sequence of pairs of paths. The first path is an object to copy into +initrd. The second path (if not empty) is the path to a symlink that +should be placed in the initrd, pointing to that object. How that +object is copied depends on its type. + +1. A regular file is copied directly to the same absolute path in the + initrd. + + - If it is *also* an ELF file, then all of its direct shared + library dependencies are also listed as objects to be copied. + +2. A directory's direct children are listed as objects to be copied, + and a directory at the same absolute path in the initrd is created. + +3. A symlink's target is listed as an object to be copied. + +There are a couple of quirks to mention here. First, the term "object" +refers to the final file path that the developer intends to have +copied into initrd. This means any parent directory is not considered +an object just because its child was listed as an object in the +program input; instead those intermediate directories are simply +created in support of the target object. Second, shared libraries, +directory children, and symlink targets aren't immediately recursed, +because they simply get listed as objects themselves, and are +therefore traversed when they themselves are processed. Finally, +symlinks in the intermediate directories leading to an object are +preserved, meaning an input object `/a/symlink/b` will just result in +initrd containing `/a/symlink -> /target/b` and `/target/b`, even if +`/target` has other children. Preserving symlinks in this manner is +important for things like systemd. + +These rules automate the most important and obviously necessary +copying that needs to be done in most cases, allowing programs and +configuration files to go unpatched, while keeping the content of the +initrd to a minimum. + +# Why Rust? + +- A prototype of this logic was written in Bash, in an attempt to keep + with its `find-libs` ancestor, but that program was difficult to + write, and ended up taking several minutes to run. This program runs + in less than a second, and the code is substantially easier to work + with. + +- This will not require end users to install a rust toolchain to use + NixOS, as long as this tool is cached by Hydra. And if you're + bootstrapping NixOS from source, rustc is already required anyway. + +- Rust was favored over Python for its type system, and because if you + want to go fast, why not go *really fast*? diff --git a/pkgs/build-support/kernel/make-initrd-ng/src/main.rs b/pkgs/build-support/kernel/make-initrd-ng/src/main.rs new file mode 100644 index 000000000000..1342734590f7 --- /dev/null +++ b/pkgs/build-support/kernel/make-initrd-ng/src/main.rs @@ -0,0 +1,208 @@ +use std::collections::{HashSet, VecDeque}; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::hash::Hash; +use std::io::{BufReader, BufRead, Error, ErrorKind}; +use std::os::unix; +use std::path::{Component, Path, PathBuf}; +use std::process::{Command, Stdio}; + +struct NonRepeatingQueue { + queue: VecDeque, + seen: HashSet, +} + +impl NonRepeatingQueue { + fn new() -> NonRepeatingQueue { + NonRepeatingQueue { + queue: VecDeque::new(), + seen: HashSet::new(), + } + } +} + +impl NonRepeatingQueue { + fn push_back(&mut self, value: T) -> bool { + if self.seen.contains(&value) { + false + } else { + self.seen.insert(value.clone()); + self.queue.push_back(value); + true + } + } + + fn pop_front(&mut self) -> Option { + self.queue.pop_front() + } +} + +fn patch_elf, P: AsRef>(mode: S, path: P) -> Result { + let output = Command::new("patchelf") + .arg(&mode) + .arg(&path) + .stderr(Stdio::inherit()) + .output()?; + if output.status.success() { + Ok(String::from_utf8(output.stdout).expect("Failed to parse output")) + } else { + Err(Error::new(ErrorKind::Other, format!("failed: patchelf {:?} {:?}", OsStr::new(&mode), OsStr::new(&path)))) + } +} + +fn copy_file + AsRef, S: AsRef>( + source: P, + target: S, + queue: &mut NonRepeatingQueue>, +) -> Result<(), Error> { + fs::copy(&source, target)?; + + if !Command::new("ldd").arg(&source).output()?.status.success() { + //stdout(Stdio::inherit()).stderr(Stdio::inherit()). + println!("{:?} is not dynamically linked. Not recursing.", OsStr::new(&source)); + return Ok(()); + } + + let rpath_string = patch_elf("--print-rpath", &source)?; + let needed_string = patch_elf("--print-needed", &source)?; + // Shared libraries don't have an interpreter + if let Ok(interpreter_string) = patch_elf("--print-interpreter", &source) { + queue.push_back(Box::from(Path::new(&interpreter_string.trim()))); + } + + let rpath = rpath_string.trim().split(":").map(|p| Box::::from(Path::new(p))).collect::>(); + + for line in needed_string.lines() { + let mut found = false; + for path in &rpath { + let lib = path.join(line); + if lib.exists() { + // No need to recurse. The queue will bring it back round. + queue.push_back(Box::from(lib.as_path())); + found = true; + break; + } + } + if !found { + // glibc makes it tricky to make this an error because + // none of the files have a useful rpath. + println!("Warning: Couldn't satisfy dependency {} for {:?}", line, OsStr::new(&source)); + } + } + + Ok(()) +} + +fn queue_dir>( + source: P, + queue: &mut NonRepeatingQueue>, +) -> Result<(), Error> { + for entry in fs::read_dir(source)? { + let entry = entry?; + // No need to recurse. The queue will bring us back round here on its own. + queue.push_back(Box::from(entry.path().as_path())); + } + + Ok(()) +} + +fn handle_path( + root: &Path, + p: &Path, + queue: &mut NonRepeatingQueue>, +) -> Result<(), Error> { + let mut source = PathBuf::new(); + let mut target = Path::new(root).to_path_buf(); + let mut iter = p.components().peekable(); + while let Some(comp) = iter.next() { + match comp { + Component::Prefix(_) => panic!("This tool is not meant for Windows"), + Component::RootDir => { + target.clear(); + target.push(root); + source.clear(); + source.push("/"); + } + Component::CurDir => {} + Component::ParentDir => { + // Don't over-pop the target if the path has too many ParentDirs + if source.pop() { + target.pop(); + } + } + Component::Normal(name) => { + target.push(name); + source.push(name); + let typ = fs::symlink_metadata(&source)?.file_type(); + if typ.is_file() && !target.exists() { + copy_file(&source, &target, queue)?; + } else if typ.is_symlink() { + let link_target = fs::read_link(&source)?; + + // Create the link, then push its target to the queue + if !target.exists() { + unix::fs::symlink(&link_target, &target)?; + } + source.pop(); + source.push(link_target); + while let Some(c) = iter.next() { + source.push(c); + } + let link_target_path = source.as_path(); + if link_target_path.exists() { + queue.push_back(Box::from(link_target_path)); + } + break; + } else if typ.is_dir() { + if !target.exists() { + fs::create_dir(&target)?; + } + + // Only recursively copy if the directory is the target object + if iter.peek().is_none() { + queue_dir(&source, queue)?; + } + } + } + } + } + + Ok(()) +} + +fn main() -> Result<(), Error> { + let args: Vec = env::args().collect(); + let input = fs::File::open(&args[1])?; + let output = &args[2]; + let out_path = Path::new(output); + + let mut queue = NonRepeatingQueue::>::new(); + + let mut lines = BufReader::new(input).lines(); + while let Some(obj) = lines.next() { + // Lines should always come in pairs + let obj = obj?; + let sym = lines.next().unwrap()?; + + let obj_path = Path::new(&obj); + queue.push_back(Box::from(obj_path)); + if !sym.is_empty() { + println!("{} -> {}", &sym, &obj); + // We don't care about preserving symlink structure here + // nearly as much as for the actual objects. + let link_string = format!("{}/{}", output, sym); + let link_path = Path::new(&link_string); + let mut link_parent = link_path.to_path_buf(); + link_parent.pop(); + fs::create_dir_all(link_parent)?; + unix::fs::symlink(obj_path, link_path)?; + } + } + while let Some(obj) = queue.pop_front() { + println!("{:?}", obj); + handle_path(out_path, &*obj, &mut queue)?; + } + + Ok(()) +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 0d71f7594c98..011cd9b0b527 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -732,6 +732,9 @@ with pkgs; makeInitrd = callPackage ../build-support/kernel/make-initrd.nix; # Args intentionally left out + makeInitrdNG = callPackage ../build-support/kernel/make-initrd-ng.nix; + makeInitrdNGTool = callPackage ../build-support/kernel/make-initrd-ng-tool.nix {}; + makeWrapper = makeSetupHook { deps = [ dieHook ]; substitutions = {