nixos/qemu-vm: refactor bootDisk generation using make-disk-image

This commit is contained in:
Raito Bezarius 2022-12-20 21:18:10 +01:00
parent 13ea5dc163
commit 76c7b656bf

View File

@ -55,6 +55,11 @@ let
};
selectPartitionTableLayout = { useEFIBoot, useDefaultFilesystems }:
if useDefaultFilesystems then
if useEFIBoot then "efi" else "legacy"
else "none";
driveCmdline = idx: { file, driveExtraOpts, deviceExtraOpts, ... }:
let
drvId = "drive${toString idx}";
@ -98,7 +103,6 @@ let
addDeviceNames =
imap1 (idx: drive: drive // { device = driveDeviceName idx; });
# Shell script to start the VM.
startVM =
''
@ -111,8 +115,23 @@ let
NIX_DISK_IMAGE=$(readlink -f "''${NIX_DISK_IMAGE:-${toString config.virtualisation.diskImage}}") || test -z "$NIX_DISK_IMAGE"
if test -n "$NIX_DISK_IMAGE" && ! test -e "$NIX_DISK_IMAGE"; then
${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" \
${toString config.virtualisation.diskSize}M
echo "Disk image do not exist, creating the virtualisation disk image..."
# If we are using a bootloader and default filesystems layout.
# We have to reuse the system image layout as a backing image format (CoW)
# So we can write on the top of it.
# If we are not using the default FS layout, potentially, we are interested into
# performing operations in postDeviceCommands or at early boot on the raw device.
# We can still boot through QEMU direct kernel boot feature.
# CoW prevent size to be attributed to an image.
# FIXME: raise this issue to upstream.
${qemu}/bin/qemu-img create \
${concatStringsSep " \\\n" ([ "-f qcow2" ]
++ optional (cfg.useBootLoader && cfg.useDefaultFilesystems) "-F qcow2 -b ${systemImage}/nixos.qcow2"
++ optional (!(cfg.useBootLoader && cfg.useDefaultFilesystems)) "-o size=${toString config.virtualisation.diskSize}M"
++ [ "$NIX_DISK_IMAGE" ])}
echo "Virtualisation disk image created."
fi
# Create a directory for storing temporary data of the running VM.
@ -158,13 +177,13 @@ let
${qemu}/bin/qemu-img create -f qcow2 -F qcow2 -b ${bootDisk}/disk.img "$TMPDIR/disk.img"
fi
NIX_EFI_VARS=$(readlink -f "''${NIX_EFI_VARS:-${cfg.efiVars}}")
NIX_EFI_VARS=$(readlink -f "''${NIX_EFI_VARS:-${config.system.name}-efi-vars.fd}")
${lib.optionalString cfg.useEFIBoot
''
# VM needs writable EFI vars
if ! test -e "$NIX_EFI_VARS"; then
cp ${bootDisk}/efi-vars.fd "$NIX_EFI_VARS"
cp ${systemImage}/efi-vars.fd "$NIX_EFI_VARS"
chmod 0644 "$NIX_EFI_VARS"
fi
''}
@ -200,95 +219,29 @@ let
regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.additionalPaths; };
# Generate a hard disk image containing a /boot partition and GRUB
# in the MBR. Used when the `useBootLoader' option is set.
# Uses `runInLinuxVM` to create the image in a throwaway VM.
# See note [Disk layout with `useBootLoader`].
# FIXME: use nixos/lib/make-disk-image.nix.
bootDisk =
pkgs.vmTools.runInLinuxVM (
pkgs.runCommand "nixos-boot-disk"
{ preVM =
''
mkdir $out
diskImage=$out/disk.img
${qemu}/bin/qemu-img create -f qcow2 $diskImage "120M"
${lib.optionalString cfg.useEFIBoot ''
efiVars=$out/efi-vars.fd
cp ${cfg.efi.variables} $efiVars
chmod 0644 $efiVars
''}
'';
buildInputs = [ pkgs.util-linux ];
QEMU_OPTS = "-nographic -serial stdio -monitor none"
+ lib.optionalString cfg.useEFIBoot (
" -drive if=pflash,format=raw,unit=0,readonly=on,file=${cfg.efi.firmware}"
+ " -drive if=pflash,format=raw,unit=1,file=$efiVars");
}
''
# Create a /boot EFI partition with 120M and arbitrary but fixed GUIDs for reproducibility
${pkgs.gptfdisk}/bin/sgdisk \
--set-alignment=1 --new=1:34:2047 --change-name=1:BIOSBootPartition --typecode=1:ef02 \
--set-alignment=512 --largest-new=2 --change-name=2:EFISystem --typecode=2:ef00 \
--attributes=1:set:1 \
--attributes=2:set:2 \
--disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C1 \
--partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
--partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \
--hybrid 2 \
--recompute-chs /dev/vda
${optionalString (config.boot.loader.grub.device != "/dev/vda")
# In this throwaway VM, we only have the /dev/vda disk, but the
# actual VM described by `config` (used by `switch-to-configuration`
# below) may set `boot.loader.grub.device` to a different device
# that's nonexistent in the throwaway VM.
# Create a symlink for that device, so that the `grub-install`
# by `switch-to-configuration` will hit /dev/vda anyway.
''
ln -s /dev/vda ${config.boot.loader.grub.device}
''
}
${pkgs.dosfstools}/bin/mkfs.fat -F16 /dev/vda2
export MTOOLS_SKIP_CHECK=1
${pkgs.mtools}/bin/mlabel -i /dev/vda2 ::boot
# Mount /boot; load necessary modules first.
${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/nls/nls_cp437.ko.xz || true
${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/nls/nls_iso8859-1.ko.xz || true
${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/fat/fat.ko.xz || true
${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/fat/vfat.ko.xz || true
${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/efivarfs/efivarfs.ko.xz || true
mkdir /boot
mount /dev/vda2 /boot
${optionalString config.boot.loader.efi.canTouchEfiVariables ''
mount -t efivarfs efivarfs /sys/firmware/efi/efivars
''}
# This is needed for GRUB 0.97, which doesn't know about virtio devices.
mkdir /boot/grub
echo '(hd0) /dev/vda' > /boot/grub/device.map
# This is needed for systemd-boot to find ESP, and udev is not available here to create this
mkdir -p /dev/block
ln -s /dev/vda2 /dev/block/254:2
# Set up system profile (normally done by nixos-rebuild / nix-env --set)
mkdir -p /nix/var/nix/profiles
ln -s ${config.system.build.toplevel} /nix/var/nix/profiles/system-1-link
ln -s /nix/var/nix/profiles/system-1-link /nix/var/nix/profiles/system
# Install bootloader
touch /etc/NIXOS
export NIXOS_INSTALL_BOOTLOADER=1
${config.system.build.toplevel}/bin/switch-to-configuration boot
umount /boot
'' # */
);
# System image is akin to a complete NixOS install with
# a boot partition and root partition.
systemImage = import ../../lib/make-disk-image.nix {
inherit pkgs config lib;
additionalPaths = [ regInfo ];
format = "qcow2";
onlyNixStore = false;
partitionTableType = selectPartitionTableLayout { inherit (cfg) useDefaultFilesystems useEFIBoot; };
# Bootloader should be installed on the system image only if we are booting through bootloaders.
# Though, if a user is not using our default filesystems, it is possible to not have any ESP
# or a strange partition table that's incompatible with GRUB configuration.
# As a consequence, this may lead to disk image creation failures.
# To avoid this, we prefer to let the user find out about how to install the bootloader on its ESP/disk.
# Usually, this can be through building your own disk image.
# TODO: If a user is interested into a more fine grained heuristic for `installBootLoader`
# by examining the actual contents of `cfg.fileSystems`, please send a PR.
installBootLoader = cfg.useBootLoader && cfg.useDefaultFilesystems;
touchEFIVars = cfg.useEFIBoot;
diskSize = "auto";
additionalSpace = "0M";
copyChannel = false;
OVMF = cfg.efi.OVMF;
};
storeImage = import ../../lib/make-disk-image.nix {
inherit pkgs config lib;
@ -297,17 +250,42 @@ let
onlyNixStore = true;
partitionTableType = "none";
installBootLoader = false;
touchEFIVars = false;
diskSize = "auto";
additionalSpace = "0M";
copyChannel = false;
};
bootConfiguration =
if cfg.useDefaultFilesystems
then
if cfg.useBootLoader
then
if cfg.useEFIBoot then "efi_bootloading_with_default_fs"
else "legacy_bootloading_with_default_fs"
else
"direct_boot_with_default_fs"
else
"custom";
suggestedRootDevice = {
"efi_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}2";
"legacy_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}1";
"direct_boot_with_default_fs" = cfg.bootLoaderDevice;
# This will enforce a NixOS module type checking error
# to ask explicitly the user to set a rootDevice.
# As it will look like `rootDevice = lib.mkDefault null;` after
# all "computations".
"custom" = null;
}.${bootConfiguration};
in
{
imports = [
../profiles/qemu-guest.nix
(mkRenamedOptionModule [ "virtualisation" "pathsInNixDB" ] [ "virtualisation" "additionalPaths" ])
(mkRemovedOptionModule [ "virtualisation" "bootDevice" ] "This option was renamed to `virtualisation.rootDevice`, as it was incorrectly named and misleading. Take the time to review what you want to do and look at the new options like `virtualisation.{bootLoaderDevice, bootPartition}`, open an issue in case of issues.")
(mkRemovedOptionModule [ "virtualisation" "efiVars" ] "This option was removed, it is possible to provide a template UEFI variable with `virtualisation.efi.variables` ; if this option is important to you, open an issue")
(mkRemovedOptionModule [ "virtualisation" "persistBootDevice" ] "Boot device is always persisted if you use a bootloader through the root disk image ; if this does not work for your usecase, please examine carefully what `virtualisation.{bootLoaderDevice, rootDevice, bootPartition}` options offers you and open an issue explaining your need.`")
];
options = {
@ -362,13 +340,47 @@ in
'';
};
virtualisation.bootDevice =
virtualisation.bootLoaderDevice =
mkOption {
type = types.path;
default = lookupDriveDeviceName "root" cfg.qemu.drives;
defaultText = literalExpression ''lookupDriveDeviceName "root" cfg.qemu.drives'';
example = "/dev/vda";
description =
lib.mdDoc ''
The disk to be used for the root filesystem.
The disk to be used for the boot filesystem.
By default, it is the same disk as the root filesystem.
'';
};
virtualisation.bootPartition =
mkOption {
type = types.nullOr types.path;
default = if cfg.useEFIBoot then "${cfg.bootLoaderDevice}1" else null;
defaultText = literalExpression ''if cfg.useEFIBoot then "''${cfg.bootLoaderDevice}1" else null'';
example = "/dev/vda1";
description =
lib.mdDoc ''
The boot partition to be used to mount /boot filesystem.
In legacy boots, this should be null.
By default, in EFI boot, it is the first partition of the boot device.
'';
};
virtualisation.rootDevice =
mkOption {
type = types.nullOr types.path;
example = "/dev/vda2";
description =
lib.mdDoc ''
The disk or partition to be used for the root filesystem.
By default (read the source code for more details):
- under EFI with a bootloader: 2nd partition of the boot disk
- in legacy boot with a bootloader: 1st partition of the boot disk
- in direct boot (i.e. without a bootloader): whole disk
In case you are not using a default boot device or a default filesystem, you have to set explicitly your root device.
'';
};
@ -749,10 +761,22 @@ in
};
virtualisation.efi = {
OVMF = mkOption {
type = types.package;
default = (pkgs.OVMF.override {
secureBoot = cfg.useSecureBoot;
}).fd;
defaultText = ''(pkgs.OVMF.override {
secureBoot = cfg.useSecureBoot;
}).fd'';
description =
lib.mdDoc "OVMF firmware package, defaults to OVMF configured with secure boot if needed.";
};
firmware = mkOption {
type = types.path;
default = pkgs.OVMF.firmware;
defaultText = literalExpression "pkgs.OVMF.firmware";
default = cfg.efi.OVMF.firmware;
defaultText = literalExpression "cfg.efi.OVMF.firmware";
description =
lib.mdDoc ''
Firmware binary for EFI implementation, defaults to OVMF.
@ -761,8 +785,8 @@ in
variables = mkOption {
type = types.path;
default = pkgs.OVMF.variables;
defaultText = literalExpression "pkgs.OVMF.variables";
default = cfg.efi.OVMF.variables;
defaultText = literalExpression "cfg.efi.OVMF.variables";
description =
lib.mdDoc ''
Platform-specific flash binary for EFI variables, implementation-dependent to the EFI firmware.
@ -786,18 +810,17 @@ in
'';
};
virtualisation.efiVars =
virtualisation.useSecureBoot =
mkOption {
type = types.str;
default = "./${config.system.name}-efi-vars.fd";
defaultText = literalExpression ''"./''${config.system.name}-efi-vars.fd"'';
type = types.bool;
default = false;
description =
lib.mdDoc ''
Path to nvram image containing UEFI variables. The will be created
on startup if it does not exist.
Enable Secure Boot support in the EFI firmware.
'';
};
virtualisation.bios =
mkOption {
type = types.nullOr types.package;
@ -853,30 +876,13 @@ in
${opt.writableStore} = false;
'';
# Note [Disk layout with `useBootLoader`]
#
# If `useBootLoader = true`, we configure 2 drives:
# `/dev/?da` for the root disk, and `/dev/?db` for the boot disk
# which has the `/boot` partition and the boot loader.
# Concretely:
#
# * The second drive's image `disk.img` is created in `bootDisk = ...`
# using a throwaway VM. Note that there the disk is always `/dev/vda`,
# even though in the final VM it will be at `/dev/*b`.
# * The disks are attached in `virtualisation.qemu.drives`.
# Their order makes them appear as devices `a`, `b`, etc.
# * `fileSystems."/boot"` is adjusted to be on device `b`.
# * The disk.img is recreated each time the VM is booted unless
# virtualisation.persistBootDevice is set.
# If `useBootLoader`, GRUB goes to the second disk, see
# note [Disk layout with `useBootLoader`].
boot.loader.grub.device = mkVMOverride (
if cfg.useBootLoader
then driveDeviceName 2 # second disk
else cfg.bootDevice
);
# In UEFI boot, we use a EFI-only partition table layout, thus GRUB will fail when trying to install
# legacy and UEFI. In order to avoid this, we have to put "nodev" to force UEFI-only installs.
# Otherwise, we set the proper bootloader device for this.
# FIXME: make a sense of this mess wrt to multiple ESP present in the system, probably use boot.efiSysMountpoint?
boot.loader.grub.device = mkVMOverride (if cfg.useEFIBoot then "nodev" else cfg.bootLoaderDevice);
boot.loader.grub.gfxmodeBios = with cfg.resolution; "${toString x}x${toString y}";
virtualisation.rootDevice = mkDefault suggestedRootDevice;
boot.initrd.kernelModules = optionals (cfg.useNixStoreImage && !cfg.writableStore) [ "erofs" ];
@ -890,10 +896,10 @@ in
''
# If the disk image appears to be empty, run mke2fs to
# initialise.
FSTYPE=$(blkid -o value -s TYPE ${cfg.bootDevice} || true)
PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.bootDevice} || true)
FSTYPE=$(blkid -o value -s TYPE ${cfg.rootDevice} || true)
PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.rootDevice} || true)
if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
mke2fs -t ext4 ${cfg.bootDevice}
mke2fs -t ext4 ${cfg.rootDevice}
fi
'';
@ -939,8 +945,6 @@ in
optional cfg.writableStore "overlay"
++ optional (cfg.qemu.diskInterface == "scsi") "sym53c8xx";
virtualisation.bootDevice = mkDefault (driveDeviceName 1);
virtualisation.additionalPaths = [ config.system.build.toplevel ];
virtualisation.sharedDirectories = {
@ -997,7 +1001,7 @@ in
])
(mkIf cfg.useEFIBoot [
"-drive if=pflash,format=raw,unit=0,readonly=on,file=${cfg.efi.firmware}"
"-drive if=pflash,format=raw,unit=1,file=$NIX_EFI_VARS"
"-drive if=pflash,format=raw,unit=1,readonly=off,file=$NIX_EFI_VARS"
])
(mkIf (cfg.bios != null) [
"-bios ${cfg.bios}/bios.bin"
@ -1013,23 +1017,14 @@ in
file = ''"$NIX_DISK_IMAGE"'';
driveExtraOpts.cache = "writeback";
driveExtraOpts.werror = "report";
deviceExtraOpts.bootindex = "1";
}])
(mkIf cfg.useNixStoreImage [{
name = "nix-store";
file = ''"$TMPDIR"/store.img'';
deviceExtraOpts.bootindex = if cfg.useBootLoader then "3" else "2";
deviceExtraOpts.bootindex = "2";
driveExtraOpts.format = if cfg.writableStore then "qcow2" else "raw";
}])
(mkIf cfg.useBootLoader [
# The order of this list determines the device names, see
# note [Disk layout with `useBootLoader`].
{
name = "boot";
file = ''"$TMPDIR"/disk.img'';
driveExtraOpts.media = "disk";
deviceExtraOpts.bootindex = "1";
}
])
(imap0 (idx: _: {
file = "$(pwd)/empty${toString idx}.qcow2";
driveExtraOpts.werror = "report";
@ -1065,7 +1060,7 @@ in
device = "tmpfs";
fsType = "tmpfs";
} else {
device = cfg.bootDevice;
device = cfg.rootDevice;
fsType = "ext4";
autoFormat = true;
});
@ -1086,9 +1081,8 @@ in
options = [ "mode=0755" ];
neededForBoot = true;
};
# see note [Disk layout with `useBootLoader`]
"/boot" = lib.mkIf cfg.useBootLoader {
device = "${lookupDriveDeviceName "boot" cfg.qemu.drives}2"; # 2 for e.g. `vdb2`, as created in `bootDisk`
"/boot" = lib.mkIf (cfg.useBootLoader && cfg.bootPartition != null) {
device = cfg.bootPartition; # 1 for e.g. `vda1`, as created in `systemImage`
fsType = "vfat";
noCheck = true; # fsck fails on a r/o filesystem
};