summaryrefslogtreecommitdiffstats
path: root/src/vmspawn/vmspawn.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-12 03:50:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-12 03:50:40 +0000
commitfc53809803cd2bc2434e312b19a18fa36776da12 (patch)
treeb4b43bd6538f51965ce32856e9c053d0f90919c8 /src/vmspawn/vmspawn.c
parentAdding upstream version 255.5. (diff)
downloadsystemd-fc53809803cd2bc2434e312b19a18fa36776da12.tar.xz
systemd-fc53809803cd2bc2434e312b19a18fa36776da12.zip
Adding upstream version 256.upstream/256
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/vmspawn/vmspawn.c')
-rw-r--r--src/vmspawn/vmspawn.c1960
1 files changed, 1747 insertions, 213 deletions
diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c
index ebae681..326722d 100644
--- a/src/vmspawn/vmspawn.c
+++ b/src/vmspawn/vmspawn.c
@@ -1,59 +1,136 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include <net/if.h>
+#include <linux/if.h>
#include <getopt.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
-#include <sys/wait.h>
+#include <string.h>
+#include <sys/stat.h>
#include <unistd.h>
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
#include "alloc-util.h"
#include "architecture.h"
+#include "bootspec.h"
#include "build.h"
+#include "bus-internal.h"
+#include "bus-locator.h"
+#include "bus-wait-for-jobs.h"
+#include "chase.h"
#include "common-signal.h"
#include "copy.h"
#include "creds-util.h"
+#include "dirent-util.h"
+#include "discover-image.h"
+#include "dissect-image.h"
#include "escape.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
#include "fileio.h"
#include "format-util.h"
#include "fs-util.h"
+#include "gpt.h"
#include "hexdecoct.h"
#include "hostname-util.h"
+#include "io-util.h"
+#include "kernel-image.h"
#include "log.h"
#include "machine-credential.h"
+#include "macro.h"
#include "main-func.h"
+#include "mkdir.h"
+#include "netif-util.h"
#include "pager.h"
#include "parse-argument.h"
#include "parse-util.h"
+#include "path-lookup.h"
#include "path-util.h"
+#include "pidref.h"
#include "pretty-print.h"
#include "process-util.h"
-#include "sd-event.h"
+#include "ptyfwd.h"
+#include "random-util.h"
+#include "rm-rf.h"
#include "signal-util.h"
#include "socket-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
#include "strv.h"
+#include "time-util.h"
#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "vmspawn-mount.h"
+#include "vmspawn-register.h"
+#include "vmspawn-scope.h"
#include "vmspawn-settings.h"
#include "vmspawn-util.h"
+#define VM_TAP_HASH_KEY SD_ID128_MAKE(01,d0,c6,4c,2b,df,24,fb,c0,f8,b2,09,7d,59,b2,93)
+
+typedef struct SSHInfo {
+ unsigned cid;
+ char *private_key_path;
+ unsigned port;
+} SSHInfo;
+
+static bool arg_quiet = false;
static PagerFlags arg_pager_flags = 0;
+static char *arg_directory = NULL;
static char *arg_image = NULL;
static char *arg_machine = NULL;
-static char *arg_qemu_smp = NULL;
-static uint64_t arg_qemu_mem = 2ULL * 1024ULL * 1024ULL * 1024ULL;
-static int arg_qemu_kvm = -1;
-static int arg_qemu_vsock = -1;
-static uint64_t arg_vsock_cid = UINT64_MAX;
-static bool arg_qemu_gui = false;
+static char *arg_cpus = NULL;
+static uint64_t arg_ram = UINT64_C(2) * U64_GB;
+static int arg_kvm = -1;
+static int arg_vsock = -1;
+static unsigned arg_vsock_cid = VMADDR_CID_ANY;
+static int arg_tpm = -1;
+static char *arg_linux = NULL;
+static char **arg_initrds = NULL;
+static ConsoleMode arg_console_mode = CONSOLE_INTERACTIVE;
+static NetworkStack arg_network_stack = NETWORK_STACK_NONE;
static int arg_secure_boot = -1;
-static MachineCredential *arg_credentials = NULL;
-static size_t arg_n_credentials = 0;
+static MachineCredentialContext arg_credentials = {};
+static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
+static RuntimeMountContext arg_runtime_mounts = {};
static SettingsMask arg_settings_mask = 0;
-static char **arg_parameters = NULL;
-
+static char *arg_firmware = NULL;
+static char *arg_runtime_directory = NULL;
+static char *arg_forward_journal = NULL;
+static bool arg_runtime_directory_created = false;
+static bool arg_privileged = false;
+static bool arg_register = false;
+static sd_id128_t arg_uuid = {};
+static char **arg_kernel_cmdline_extra = NULL;
+static char **arg_extra_drives = NULL;
+static char *arg_background = NULL;
+static bool arg_pass_ssh_key = true;
+static char *arg_ssh_key_type = NULL;
+static bool arg_discard_disk = true;
+struct ether_addr arg_network_provided_mac = {};
+
+STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
-STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep);
-STATIC_DESTRUCTOR_REGISTER(arg_parameters, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cpus, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_runtime_directory, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_credentials, machine_credential_context_done);
+STATIC_DESTRUCTOR_REGISTER(arg_firmware, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_linux, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_initrds, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done);
+STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_extra_drives, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_background, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_ssh_key_type, freep);
static int help(void) {
_cleanup_free_ char *link = NULL;
@@ -67,29 +144,56 @@ static int help(void) {
printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
"%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
- " -h --help Show this help\n"
- " --version Print version string\n"
- " --no-pager Do not pipe output into a pager\n\n"
- "%3$sImage:%4$s\n"
- " -i --image=PATH Root file system disk image (or device node) for\n"
- " the virtual machine\n\n"
- "%3$sHost Configuration:%4$s\n"
- " --qemu-smp=SMP Configure guest's SMP settings\n"
- " --qemu-mem=MEM Configure guest's RAM size\n"
- " --qemu-kvm=BOOL Configure whether to use KVM or not\n"
- " --qemu-vsock=BOOL Configure whether to use qemu with a vsock or not\n"
- " --vsock-cid= Specify the CID to use for the qemu guest's vsock\n"
- " --qemu-gui Start QEMU in graphical mode\n"
- " --secure-boot=BOOL Configure whether to search for firmware which\n"
- " supports Secure Boot\n\n"
- "%3$sSystem Identity:%4$s\n"
- " -M --machine=NAME Set the machine name for the container\n"
- "%3$sCredentials:%4$s\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " -q --quiet Do not show status information\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\n%3$sImage:%4$s\n"
+ " -D --directory=PATH Root directory for the VM\n"
+ " -i --image=FILE|DEVICE Root file system disk image or device for the VM\n"
+ "\n%3$sHost Configuration:%4$s\n"
+ " --cpus=CPUS Configure number of CPUs in guest\n"
+ " --ram=BYTES Configure guest's RAM size\n"
+ " --kvm=BOOL Enable use of KVM\n"
+ " --vsock=BOOL Override autodetection of VSOCK support\n"
+ " --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n"
+ " --tpm=BOOL Enable use of a virtual TPM\n"
+ " --linux=PATH Specify the linux kernel for direct kernel boot\n"
+ " --initrd=PATH Specify the initrd for direct kernel boot\n"
+ " -n --network-tap Create a TAP device for networking\n"
+ " --network-user-mode Use user mode networking\n"
+ " --secure-boot=BOOL Enable searching for firmware supporting SecureBoot\n"
+ " --firmware=PATH|list Select firmware definition file (or list available)\n"
+ " --discard-disk=BOOL Control processing of discard requests\n"
+ "\n%3$sSystem Identity:%4$s\n"
+ " -M --machine=NAME Set the machine name for the VM\n"
+ " --uuid=UUID Set a specific machine UUID for the VM\n"
+ "\n%3$sProperties:%4$s\n"
+ " --register=BOOLEAN Register VM with systemd-machined\n"
+ "\n%3$sUser Namespacing:%4$s\n"
+ " --private-users=UIDBASE[:NUIDS]\n"
+ " Configure the UID/GID range to map into the\n"
+ " virtiofsd namespace\n"
+ "\n%3$sMounts:%4$s\n"
+ " --bind=SOURCE[:TARGET]\n"
+ " Mount a file or directory from the host into the VM\n"
+ " --bind-ro=SOURCE[:TARGET]\n"
+ " Mount a file or directory, but read-only\n"
+ " --extra-drive=PATH Adds an additional disk to the virtual machine\n"
+ "\n%3$sIntegration:%4$s\n"
+ " --forward-journal=FILE|DIR\n"
+ " Forward the VM's journal to the host\n"
+ " --pass-ssh-key=BOOL Create an SSH key to access the VM\n"
+ " --ssh-key-type=TYPE Choose what type of SSH key to pass\n"
+ "\n%3$sInput/Output:%4$s\n"
+ " --console=MODE Console mode (interactive, native, gui)\n"
+ " --background=COLOR Set ANSI color for background\n"
+ "\n%3$sCredentials:%4$s\n"
" --set-credential=ID:VALUE\n"
- " Pass a credential with literal value to container.\n"
+ " Pass a credential with literal value to the VM\n"
" --load-credential=ID:PATH\n"
- " Load credential to pass to container from file or\n"
- " AF_UNIX stream socket.\n"
+ " Load credential for the VM from file or AF_UNIX\n"
+ " stream socket.\n"
"\nSee the %2$s for details.\n",
program_invocation_short_name,
link,
@@ -101,36 +205,91 @@ static int help(void) {
return 0;
}
+static int parse_environment(void) {
+ const char *e;
+ int r;
+
+ e = getenv("SYSTEMD_VMSPAWN_NETWORK_MAC");
+ if (e) {
+ r = parse_ether_addr(e, &arg_network_provided_mac);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse provided MAC address via environment variable");
+ }
+
+ return 0;
+}
+
static int parse_argv(int argc, char *argv[]) {
enum {
ARG_VERSION = 0x100,
ARG_NO_PAGER,
- ARG_QEMU_SMP,
- ARG_QEMU_MEM,
- ARG_QEMU_KVM,
- ARG_QEMU_VSOCK,
+ ARG_CPUS,
+ ARG_RAM,
+ ARG_KVM,
+ ARG_VSOCK,
ARG_VSOCK_CID,
+ ARG_TPM,
+ ARG_LINUX,
+ ARG_INITRD,
ARG_QEMU_GUI,
+ ARG_NETWORK_USER_MODE,
+ ARG_UUID,
+ ARG_REGISTER,
+ ARG_BIND,
+ ARG_BIND_RO,
+ ARG_EXTRA_DRIVE,
ARG_SECURE_BOOT,
+ ARG_PRIVATE_USERS,
+ ARG_FORWARD_JOURNAL,
+ ARG_PASS_SSH_KEY,
+ ARG_SSH_KEY_TYPE,
ARG_SET_CREDENTIAL,
ARG_LOAD_CREDENTIAL,
+ ARG_FIRMWARE,
+ ARG_DISCARD_DISK,
+ ARG_CONSOLE,
+ ARG_BACKGROUND,
};
static const struct option options[] = {
- { "help", no_argument, NULL, 'h' },
- { "version", no_argument, NULL, ARG_VERSION },
- { "no-pager", no_argument, NULL, ARG_NO_PAGER },
- { "image", required_argument, NULL, 'i' },
- { "machine", required_argument, NULL, 'M' },
- { "qemu-smp", required_argument, NULL, ARG_QEMU_SMP },
- { "qemu-mem", required_argument, NULL, ARG_QEMU_MEM },
- { "qemu-kvm", required_argument, NULL, ARG_QEMU_KVM },
- { "qemu-vsock", required_argument, NULL, ARG_QEMU_VSOCK },
- { "vsock-cid", required_argument, NULL, ARG_VSOCK_CID },
- { "qemu-gui", no_argument, NULL, ARG_QEMU_GUI },
- { "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
- { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
- { "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "quiet", no_argument, NULL, 'q' },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "image", required_argument, NULL, 'i' },
+ { "directory", required_argument, NULL, 'D' },
+ { "machine", required_argument, NULL, 'M' },
+ { "cpus", required_argument, NULL, ARG_CPUS },
+ { "qemu-smp", required_argument, NULL, ARG_CPUS }, /* Compat alias */
+ { "ram", required_argument, NULL, ARG_RAM },
+ { "qemu-mem", required_argument, NULL, ARG_RAM }, /* Compat alias */
+ { "kvm", required_argument, NULL, ARG_KVM },
+ { "qemu-kvm", required_argument, NULL, ARG_KVM }, /* Compat alias */
+ { "vsock", required_argument, NULL, ARG_VSOCK },
+ { "qemu-vsock", required_argument, NULL, ARG_VSOCK }, /* Compat alias */
+ { "vsock-cid", required_argument, NULL, ARG_VSOCK_CID },
+ { "tpm", required_argument, NULL, ARG_TPM },
+ { "linux", required_argument, NULL, ARG_LINUX },
+ { "initrd", required_argument, NULL, ARG_INITRD },
+ { "console", required_argument, NULL, ARG_CONSOLE },
+ { "qemu-gui", no_argument, NULL, ARG_QEMU_GUI }, /* compat option */
+ { "network-tap", no_argument, NULL, 'n' },
+ { "network-user-mode", no_argument, NULL, ARG_NETWORK_USER_MODE },
+ { "uuid", required_argument, NULL, ARG_UUID },
+ { "register", required_argument, NULL, ARG_REGISTER },
+ { "bind", required_argument, NULL, ARG_BIND },
+ { "bind-ro", required_argument, NULL, ARG_BIND_RO },
+ { "extra-drive", required_argument, NULL, ARG_EXTRA_DRIVE },
+ { "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
+ { "private-users", required_argument, NULL, ARG_PRIVATE_USERS },
+ { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL },
+ { "pass-ssh-key", required_argument, NULL, ARG_PASS_SSH_KEY },
+ { "ssh-key-type", required_argument, NULL, ARG_SSH_KEY_TYPE },
+ { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
+ { "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
+ { "firmware", required_argument, NULL, ARG_FIRMWARE },
+ { "discard-disk", required_argument, NULL, ARG_DISCARD_DISK },
+ { "background", required_argument, NULL, ARG_BACKGROUND },
{}
};
@@ -140,7 +299,7 @@ static int parse_argv(int argc, char *argv[]) {
assert(argv);
optind = 0;
- while ((c = getopt_long(argc, argv, "+hi:M", options, NULL)) >= 0)
+ while ((c = getopt_long(argc, argv, "+hD:i:M:nq", options, NULL)) >= 0)
switch (c) {
case 'h':
return help();
@@ -148,6 +307,18 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_VERSION:
return version();
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'D':
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_directory);
+ if (r < 0)
+ return r;
+
+ arg_settings_mask |= SETTING_DIRECTORY;
+ break;
+
case 'i':
r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
if (r < 0)
@@ -174,57 +345,163 @@ static int parse_argv(int argc, char *argv[]) {
arg_pager_flags |= PAGER_DISABLE;
break;
- case ARG_QEMU_SMP:
- r = free_and_strdup_warn(&arg_qemu_smp, optarg);
+ case ARG_CPUS:
+ r = free_and_strdup_warn(&arg_cpus, optarg);
if (r < 0)
return r;
break;
- case ARG_QEMU_MEM:
- r = parse_size(optarg, 1024, &arg_qemu_mem);
+ case ARG_RAM:
+ r = parse_size(optarg, 1024, &arg_ram);
if (r < 0)
- return log_error_errno(r, "Failed to parse --qemu-mem=%s: %m", optarg);
+ return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
break;
- case ARG_QEMU_KVM:
- r = parse_tristate(optarg, &arg_qemu_kvm);
+ case ARG_KVM:
+ r = parse_tristate(optarg, &arg_kvm);
if (r < 0)
- return log_error_errno(r, "Failed to parse --qemu-kvm=%s: %m", optarg);
+ return log_error_errno(r, "Failed to parse --kvm=%s: %m", optarg);
break;
- case ARG_QEMU_VSOCK:
- r = parse_tristate(optarg, &arg_qemu_vsock);
+ case ARG_VSOCK:
+ r = parse_tristate(optarg, &arg_vsock);
if (r < 0)
- return log_error_errno(r, "Failed to parse --qemu-vsock=%s: %m", optarg);
+ return log_error_errno(r, "Failed to parse --vsock=%s: %m", optarg);
break;
- case ARG_VSOCK_CID: {
- unsigned cid;
+ case ARG_VSOCK_CID:
if (isempty(optarg))
- cid = VMADDR_CID_ANY;
+ arg_vsock_cid = VMADDR_CID_ANY;
else {
- r = safe_atou_bounded(optarg, 3, UINT_MAX - 1, &cid);
- if (r == -ERANGE)
- return log_error_errno(r, "Invalid value for --vsock-cid=: %m");
+ unsigned cid;
+
+ r = vsock_parse_cid(optarg, &cid);
if (r < 0)
- return log_error_errno(r, "Failed to parse --vsock-cid=%s: %m", optarg);
+ return log_error_errno(r, "Failed to parse --vsock-cid: %s", optarg);
+ if (!VSOCK_CID_IS_REGULAR(cid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified CID is not regular, refusing: %u", cid);
+
+ arg_vsock_cid = cid;
}
- arg_vsock_cid = (uint64_t)cid;
+ break;
+
+ case ARG_TPM:
+ r = parse_tristate(optarg, &arg_tpm);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --tpm=%s: %m", optarg);
+ break;
+
+ case ARG_LINUX:
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_linux);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_INITRD: {
+ _cleanup_free_ char *initrd_path = NULL;
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &initrd_path);
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&arg_initrds, TAKE_PTR(initrd_path));
+ if (r < 0)
+ return log_oom();
+
break;
}
+ case ARG_CONSOLE:
+ arg_console_mode = console_mode_from_string(optarg);
+ if (arg_console_mode < 0)
+ return log_error_errno(arg_console_mode, "Failed to parse specified console mode: %s", optarg);
+
+ break;
+
case ARG_QEMU_GUI:
- arg_qemu_gui = true;
+ arg_console_mode = CONSOLE_GUI;
+ break;
+
+ case 'n':
+ arg_network_stack = NETWORK_STACK_TAP;
break;
+ case ARG_NETWORK_USER_MODE:
+ arg_network_stack = NETWORK_STACK_USER;
+ break;
+
+ case ARG_UUID:
+ r = id128_from_string_nonzero(optarg, &arg_uuid);
+ if (r == -ENXIO)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Machine UUID may not be all zeroes.");
+ if (r < 0)
+ return log_error_errno(r, "Invalid UUID: %s", optarg);
+
+ arg_settings_mask |= SETTING_MACHINE_ID;
+ break;
+
+ case ARG_REGISTER:
+ r = parse_boolean_argument("--register=", optarg, &arg_register);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_BIND:
+ case ARG_BIND_RO:
+ r = runtime_mount_parse(&arg_runtime_mounts, optarg, c == ARG_BIND_RO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_BIND_MOUNTS;
+ break;
+
+ case ARG_EXTRA_DRIVE: {
+ _cleanup_free_ char *drive_path = NULL;
+
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &drive_path);
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&arg_extra_drives, TAKE_PTR(drive_path));
+ if (r < 0)
+ return log_oom();
+ break;
+ }
+
case ARG_SECURE_BOOT:
r = parse_tristate(optarg, &arg_secure_boot);
if (r < 0)
return log_error_errno(r, "Failed to parse --secure-boot=%s: %m", optarg);
break;
+ case ARG_PRIVATE_USERS:
+ r = parse_userns_uid_range(optarg, &arg_uid_shift, &arg_uid_range);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_FORWARD_JOURNAL:
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_PASS_SSH_KEY:
+ r = parse_boolean_argument("--pass-ssh-key=", optarg, &arg_pass_ssh_key);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_SSH_KEY_TYPE:
+ if (!string_is_safe(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid value for --arg-ssh-key-type=: %s", optarg);
+
+ r = free_and_strdup_warn(&arg_ssh_key_type, optarg);
+ if (r < 0)
+ return r;
+ break;
+
case ARG_SET_CREDENTIAL: {
- r = machine_credential_set(&arg_credentials, &arg_n_credentials, optarg);
+ r = machine_credential_set(&arg_credentials, optarg);
if (r < 0)
return r;
arg_settings_mask |= SETTING_CREDENTIALS;
@@ -232,7 +509,7 @@ static int parse_argv(int argc, char *argv[]) {
}
case ARG_LOAD_CREDENTIAL: {
- r = machine_credential_load(&arg_credentials, &arg_n_credentials, optarg);
+ r = machine_credential_load(&arg_credentials, optarg);
if (r < 0)
return r;
@@ -240,6 +517,43 @@ static int parse_argv(int argc, char *argv[]) {
break;
}
+ case ARG_FIRMWARE:
+ if (streq(optarg, "list")) {
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = list_ovmf_config(&l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list firmwares: %m");
+
+ bool nl = false;
+ fputstrv(stdout, l, "\n", &nl);
+ if (nl)
+ putchar('\n');
+
+ return 0;
+ }
+
+ if (!isempty(optarg) && !path_is_absolute(optarg) && !startswith(optarg, "./"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Absolute path or path starting with './' required.");
+
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_firmware);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case ARG_DISCARD_DISK:
+ r = parse_boolean_argument("--discard-disk=", optarg, &arg_discard_disk);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_BACKGROUND:
+ r = free_and_strdup_warn(&arg_background, optarg);
+ if (r < 0)
+ return r;
+ break;
+
case '?':
return -EINVAL;
@@ -248,9 +562,8 @@ static int parse_argv(int argc, char *argv[]) {
}
if (argc > optind) {
- strv_free(arg_parameters);
- arg_parameters = strv_copy(argv + optind);
- if (!arg_parameters)
+ arg_kernel_cmdline_extra = strv_copy(argv + optind);
+ if (!arg_kernel_cmdline_extra)
return log_oom();
arg_settings_mask |= SETTING_START_MODE;
@@ -274,11 +587,11 @@ static int open_vsock(void) {
r = bind(vsock_fd, &bind_addr.sa, sizeof(bind_addr.vm));
if (r < 0)
- return log_error_errno(errno, "Failed to bind to vsock to address %u:%u: %m", bind_addr.vm.svm_cid, bind_addr.vm.svm_port);
+ return log_error_errno(errno, "Failed to bind to VSOCK address %u:%u: %m", bind_addr.vm.svm_cid, bind_addr.vm.svm_port);
r = listen(vsock_fd, SOMAXCONN_DELUXE);
if (r < 0)
- return log_error_errno(errno, "Failed to listen on vsock: %m");
+ return log_error_errno(errno, "Failed to listen on VSOCK: %m");
return TAKE_FD(vsock_fd);
}
@@ -352,13 +665,13 @@ static int vmspawn_dispatch_vsock_connections(sd_event_source *source, int fd, u
assert(userdata);
if (revents != EPOLLIN) {
- log_warning("Got unexpected poll event for vsock fd.");
+ log_warning("Got unexpected poll event for VSOCK fd.");
return 0;
}
conn_fd = accept4(fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
if (conn_fd < 0) {
- log_warning_errno(errno, "Failed to accept connection from vsock fd (%m), ignoring...");
+ log_warning_errno(errno, "Failed to accept connection from VSOCK fd (%m), ignoring...");
return 0;
}
@@ -377,25 +690,84 @@ static int vmspawn_dispatch_vsock_connections(sd_event_source *source, int fd, u
return 0;
}
-static int setup_notify_parent(sd_event *event, int fd, int *exit_status, sd_event_source **notify_event_source) {
+static int setup_notify_parent(sd_event *event, int fd, int *exit_status, sd_event_source **ret_notify_event_source) {
int r;
- r = sd_event_add_io(event, notify_event_source, fd, EPOLLIN, vmspawn_dispatch_vsock_connections, exit_status);
+ assert(event);
+ assert(fd >= 0);
+ assert(exit_status);
+ assert(ret_notify_event_source);
+
+ r = sd_event_add_io(event, ret_notify_event_source, fd, EPOLLIN, vmspawn_dispatch_vsock_connections, exit_status);
if (r < 0)
return log_error_errno(r, "Failed to allocate notify socket event source: %m");
- (void) sd_event_source_set_description(*notify_event_source, "vmspawn-notify-sock");
+ (void) sd_event_source_set_description(*ret_notify_event_source, "vmspawn-notify-sock");
+
+ return 0;
+}
+
+static int bus_open_in_machine(sd_bus **ret, unsigned cid, unsigned port, const char *private_key_path) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *ssh_escaped = NULL, *bus_address = NULL;
+ char port_str[DECIMAL_STR_MAX(unsigned)], cid_str[DECIMAL_STR_MAX(unsigned)];
+ int r;
+
+ assert(ret);
+ assert(private_key_path);
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ const char *ssh = secure_getenv("SYSTEMD_SSH") ?: "ssh";
+ ssh_escaped = bus_address_escape(ssh);
+ if (!ssh_escaped)
+ return -ENOMEM;
+
+ xsprintf(port_str, "%u", port);
+ xsprintf(cid_str, "%u", cid);
+
+ bus_address = strjoin(
+ "unixexec:path=", ssh_escaped,
+ /* -x: Disable X11 forwarding
+ * -T: Disable PTY allocation */
+ ",argv1=-xT",
+ ",argv2=-o,argv3=IdentitiesOnly yes",
+ ",argv4=-o,argv5=IdentityFile=", private_key_path,
+ ",argv6=-p,argv7=", port_str,
+ ",argv8=--",
+ ",argv9=root@vsock/", cid_str,
+ ",argv10=systemd-stdio-bridge"
+ );
+ if (!bus_address)
+ return -ENOMEM;
+ free_and_replace(bus->address, bus_address);
+ bus->bus_client = true;
+ bus->trusted = true;
+ bus->runtime_scope = RUNTIME_SCOPE_SYSTEM;
+ bus->is_local = false;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
return 0;
}
static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
- pid_t pid;
+ PidRef *pidref = userdata;
+ int r;
- pid = PTR_TO_PID(userdata);
- if (pid > 0) {
- /* TODO: actually talk to qemu and ask the guest to shutdown here */
- if (kill(pid, SIGKILL) >= 0) {
+ /* Backup method to shut down the VM when D-BUS access over SSH is not available */
+
+ if (pidref) {
+ r = pidref_kill(pidref, SIGKILL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to kill qemu, terminating: %m");
+ else {
log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
sd_event_source_set_userdata(s, NULL);
return 0;
@@ -406,6 +778,61 @@ static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo
return 0;
}
+static int forward_signal_to_vm_pid1(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ SSHInfo *ssh_info = ASSERT_PTR(userdata);
+ const char *vm_pid1;
+ int r;
+
+ assert(s);
+ assert(si);
+
+ r = bus_open_in_machine(&bus, ssh_info->cid, ssh_info->port, ssh_info->private_key_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to VM to forward signal: %m");
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch job: %m");
+
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "GetUnitByPID",
+ &error,
+ NULL,
+ "");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get init process of VM: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &vm_pid1);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, vm_pid1, /* quiet */ false, NULL);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi",
+ vm_pid1,
+ "leader",
+ si->ssi_signo);
+ if (r < 0)
+ return log_error_errno(r, "Failed to forward signal to PID 1 of the VM: %s", bus_error_message(&error, r));
+ log_info("Sent signal %"PRIu32" to the VM's PID 1.", si->ssi_signo);
+
+ return 0;
+}
+
static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
sd_event_exit(sd_event_source_get_event(s), 0);
return 0;
@@ -426,7 +853,6 @@ static int cmdline_add_vsock(char ***cmdline, int vsock_fd) {
assert(addr_len >= sizeof addr.vm);
assert(addr.vm.svm_family == AF_VSOCK);
- log_info("Using vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port);
r = strv_extendf(cmdline, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port);
if (r < 0)
return r;
@@ -434,22 +860,497 @@ static int cmdline_add_vsock(char ***cmdline, int vsock_fd) {
return 0;
}
-static int run_virtual_machine(void) {
+static int start_tpm(
+ sd_bus *bus,
+ const char *scope,
+ const char *swtpm,
+ char **ret_state_tempdir) {
+
+ _cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL;
+ _cleanup_free_ char *scope_prefix = NULL;
+ _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
+ .socket_type = SOCK_STREAM,
+ };
+ int r;
+
+ assert(bus);
+ assert(scope);
+ assert(swtpm);
+ assert(ret_state_tempdir);
+
+ r = unit_name_to_prefix(scope, &scope_prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
+
+ ssp.unit_name_prefix = strjoin(scope_prefix, "-tpm");
+ if (!ssp.unit_name_prefix)
+ return log_oom();
+
+ state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix);
+ if (!state_dir)
+ return log_oom();
+
+ if (arg_runtime_directory_created) {
+ ssp.runtime_directory = path_join("systemd/vmspawn", ssp.unit_name_prefix);
+ if (!ssp.runtime_directory)
+ return log_oom();
+ }
+
+ ssp.listen_address = path_join(state_dir, "sock");
+ if (!ssp.listen_address)
+ return log_oom();
+
+ _cleanup_free_ char *swtpm_setup = NULL;
+ r = find_executable("swtpm_setup", &swtpm_setup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find swtpm_setup binary: %m");
+
+ ssp.exec_start_pre = strv_new(swtpm_setup, "--tpm-state", state_dir, "--tpm2", "--pcr-banks", "sha256");
+ if (!ssp.exec_start_pre)
+ return log_oom();
+
+ ssp.exec_start = strv_new(swtpm, "socket", "--tpm2", "--tpmstate");
+ if (!ssp.exec_start)
+ return log_oom();
+
+ r = strv_extendf(&ssp.exec_start, "dir=%s", state_dir);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend_many(&ssp.exec_start, "--ctrl", "type=unixio,fd=3");
+ if (r < 0)
+ return log_oom();
+
+ r = start_socket_service_pair(bus, scope, &ssp);
+ if (r < 0)
+ return r;
+
+ *ret_state_tempdir = TAKE_PTR(state_dir);
+ return 0;
+}
+
+static int start_systemd_journal_remote(sd_bus *bus, const char *scope, unsigned port, const char *sd_journal_remote, char **ret_listen_address) {
+ _cleanup_free_ char *scope_prefix = NULL;
+ _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
+ .socket_type = SOCK_STREAM,
+ };
+ int r;
+
+ assert(bus);
+ assert(scope);
+ assert(sd_journal_remote);
+
+ r = unit_name_to_prefix(scope, &scope_prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
+
+ ssp.unit_name_prefix = strjoin(scope_prefix, "-forward-journal");
+ if (!ssp.unit_name_prefix)
+ return log_oom();
+
+ r = asprintf(&ssp.listen_address, "vsock:2:%u", port);
+ if (r < 0)
+ return log_oom();
+
+ ssp.exec_start = strv_new(
+ sd_journal_remote,
+ "--output", arg_forward_journal,
+ "--split-mode", endswith(arg_forward_journal, ".journal") ? "none" : "host");
+ if (!ssp.exec_start)
+ return log_oom();
+
+ r = start_socket_service_pair(bus, scope, &ssp);
+ if (r < 0)
+ return r;
+
+ if (ret_listen_address)
+ *ret_listen_address = TAKE_PTR(ssp.listen_address);
+
+ return 0;
+}
+
+static int discover_root(char **ret) {
+ int r;
+ _cleanup_(dissected_image_unrefp) DissectedImage *image = NULL;
+ _cleanup_free_ char *root = NULL;
+
+ assert(ret);
+
+ r = dissect_image_file_and_warn(
+ arg_image,
+ /* verity= */ NULL,
+ /* mount_options= */ NULL,
+ /* image_policy= */ NULL,
+ /* flags= */ 0,
+ &image);
+ if (r < 0)
+ return r;
+
+ if (image->partitions[PARTITION_ROOT].found)
+ root = strjoin("root=PARTUUID=", SD_ID128_TO_UUID_STRING(image->partitions[PARTITION_ROOT].uuid));
+ else if (image->partitions[PARTITION_USR].found)
+ root = strjoin("mount.usr=PARTUUID=", SD_ID128_TO_UUID_STRING(image->partitions[PARTITION_USR].uuid));
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Cannot perform a direct kernel boot without a root or usr partition, refusing");
+
+ if (!root)
+ return log_oom();
+
+ *ret = TAKE_PTR(root);
+ return 0;
+}
+
+static int find_virtiofsd(char **ret) {
+ int r;
+ _cleanup_free_ char *virtiofsd = NULL;
+
+ assert(ret);
+
+ r = find_executable("virtiofsd", &virtiofsd);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Error while searching for virtiofsd: %m");
+
+ if (!virtiofsd) {
+ FOREACH_STRING(file, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
+ if (access(file, X_OK) >= 0) {
+ virtiofsd = strdup(file);
+ if (!virtiofsd)
+ return log_oom();
+ break;
+ }
+
+ if (!IN_SET(errno, ENOENT, EACCES))
+ return log_error_errno(errno, "Error while searching for virtiofsd: %m");
+ }
+ }
+
+ if (!virtiofsd)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to find virtiofsd binary.");
+
+ *ret = TAKE_PTR(virtiofsd);
+ return 0;
+}
+
+static int start_virtiofsd(sd_bus *bus, const char *scope, const char *directory, bool uidmap, char **ret_state_tempdir, char **ret_sock_name) {
+ _cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL;
+ _cleanup_free_ char *virtiofsd = NULL, *sock_name = NULL, *scope_prefix = NULL;
+ _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
+ .socket_type = SOCK_STREAM,
+ };
+ static unsigned virtiofsd_instance = 0;
+ int r;
+
+ assert(bus);
+ assert(scope);
+ assert(directory);
+ assert(ret_state_tempdir);
+ assert(ret_sock_name);
+
+ r = find_virtiofsd(&virtiofsd);
+ if (r < 0)
+ return r;
+
+ r = unit_name_to_prefix(scope, &scope_prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
+
+ if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0)
+ return log_oom();
+
+ state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix);
+ if (!state_dir)
+ return log_oom();
+
+ if (arg_runtime_directory_created) {
+ ssp.runtime_directory = strjoin("systemd/vmspawn/", ssp.unit_name_prefix);
+ if (!ssp.runtime_directory)
+ return log_oom();
+ }
+
+ if (asprintf(&sock_name, "sock-%"PRIx64, random_u64()) < 0)
+ return log_oom();
+
+ ssp.listen_address = path_join(state_dir, sock_name);
+ if (!ssp.listen_address)
+ return log_oom();
+
+ /* QEMU doesn't support submounts so don't announce them */
+ ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts");
+ if (!ssp.exec_start)
+ return log_oom();
+
+ if (uidmap && arg_uid_shift != UID_INVALID) {
+ r = strv_extend(&ssp.exec_start, "--uid-map");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&ssp.exec_start, ":0:" UID_FMT ":" UID_FMT ":", arg_uid_shift, arg_uid_range);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&ssp.exec_start, "--gid-map");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&ssp.exec_start, ":0:" GID_FMT ":" GID_FMT ":", arg_uid_shift, arg_uid_range);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = start_socket_service_pair(bus, scope, &ssp);
+ if (r < 0)
+ return r;
+
+ *ret_state_tempdir = TAKE_PTR(state_dir);
+ *ret_sock_name = TAKE_PTR(sock_name);
+
+ return 0;
+}
+
+static int kernel_cmdline_maybe_append_root(void) {
+ int r;
+ bool cmdline_contains_root = strv_find_startswith(arg_kernel_cmdline_extra, "root=")
+ || strv_find_startswith(arg_kernel_cmdline_extra, "mount.usr=");
+
+ if (!cmdline_contains_root) {
+ _cleanup_free_ char *root = NULL;
+
+ r = discover_root(&root);
+ if (r < 0)
+ return r;
+
+ log_debug("Determined root file system %s from dissected image", root);
+
+ r = strv_consume(&arg_kernel_cmdline_extra, TAKE_PTR(root));
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int discover_boot_entry(const char *root, char **ret_linux, char ***ret_initrds) {
+ _cleanup_(boot_config_free) BootConfig config = BOOT_CONFIG_NULL;
+ _cleanup_free_ char *esp_path = NULL, *xbootldr_path = NULL;
+ int r;
+
+ assert(root);
+ assert(ret_linux);
+ assert(ret_initrds);
+
+ esp_path = path_join(root, "efi");
+ if (!esp_path)
+ return log_oom();
+
+ xbootldr_path = path_join(root, "boot");
+ if (!xbootldr_path)
+ return log_oom();
+
+ r = boot_config_load(&config, esp_path, xbootldr_path);
+ if (r < 0)
+ return r;
+
+ r = boot_config_select_special_entries(&config, /* skip_efivars= */ true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find special boot config entries: %m");
+
+ const BootEntry *boot_entry = boot_config_default_entry(&config);
+
+ if (boot_entry && !IN_SET(boot_entry->type, BOOT_ENTRY_UNIFIED, BOOT_ENTRY_CONF))
+ boot_entry = NULL;
+
+ /* If we cannot determine a default entry search for UKIs (Type #2 EFI Unified Kernel Images)
+ * then .conf files (Type #1 Boot Loader Specification Entries).
+ * https://uapi-group.org/specifications/specs/boot_loader_specification */
+ if (!boot_entry)
+ FOREACH_ARRAY(entry, config.entries, config.n_entries)
+ if (entry->type == BOOT_ENTRY_UNIFIED) {
+ boot_entry = entry;
+ break;
+ }
+
+ if (!boot_entry)
+ FOREACH_ARRAY(entry, config.entries, config.n_entries)
+ if (entry->type == BOOT_ENTRY_CONF) {
+ boot_entry = entry;
+ break;
+ }
+
+ if (!boot_entry)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to discover any boot entries.");
+
+ log_debug("Discovered boot entry %s (%s)", boot_entry->id, boot_entry_type_to_string(boot_entry->type));
+
+ _cleanup_free_ char *linux_kernel = NULL;
+ _cleanup_strv_free_ char **initrds = NULL;
+ if (boot_entry->type == BOOT_ENTRY_UNIFIED) {
+ linux_kernel = path_join(boot_entry->root, boot_entry->kernel);
+ if (!linux_kernel)
+ return log_oom();
+ } else if (boot_entry->type == BOOT_ENTRY_CONF) {
+ linux_kernel = path_join(boot_entry->root, boot_entry->kernel);
+ if (!linux_kernel)
+ return log_oom();
+
+ STRV_FOREACH(initrd, boot_entry->initrd) {
+ _cleanup_free_ char *initrd_path = path_join(boot_entry->root, *initrd);
+ if (!initrd_path)
+ return log_oom();
+
+ r = strv_consume(&initrds, TAKE_PTR(initrd_path));
+ if (r < 0)
+ return log_oom();
+ }
+ } else
+ assert_not_reached();
+
+ *ret_linux = TAKE_PTR(linux_kernel);
+ *ret_initrds = TAKE_PTR(initrds);
+
+ return 0;
+}
+
+static int merge_initrds(char **ret) {
+ _cleanup_(rm_rf_physical_and_freep) char *merged_initrd = NULL;
+ _cleanup_close_ int ofd = -EBADF;
+ int r;
+
+ assert(ret);
+
+ r = tempfn_random_child(NULL, "vmspawn-initrd-", &merged_initrd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create temporary file: %m");
+
+ ofd = open(merged_initrd, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600);
+ if (ofd < 0)
+ return log_error_errno(errno, "Failed to create regular file %s: %m", merged_initrd);
+
+ STRV_FOREACH(i, arg_initrds) {
+ _cleanup_close_ int ifd = -EBADF;
+ off_t off, to_seek;
+
+ off = lseek(ofd, 0, SEEK_CUR);
+ if (off < 0)
+ return log_error_errno(errno, "Failed to get file offset of %s: %m", merged_initrd);
+
+ to_seek = (4 - (off % 4)) % 4;
+
+ /* seek to assure 4 byte alignment for each initrd */
+ if (to_seek != 0 && lseek(ofd, to_seek, SEEK_CUR) < 0)
+ return log_error_errno(errno, "Failed to seek %s: %m", merged_initrd);
+
+ ifd = open(*i, O_RDONLY|O_CLOEXEC);
+ if (ifd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", *i);
+
+ r = copy_bytes(ifd, ofd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", *i, merged_initrd);
+ }
+
+ *ret = TAKE_PTR(merged_initrd);
+ return 0;
+}
+
+static void set_window_title(PTYForward *f) {
+ _cleanup_free_ char *hn = NULL, *dot = NULL;
+
+ assert(f);
+
+ (void) gethostname_strict(&hn);
+
+ if (emoji_enabled())
+ dot = strjoin(special_glyph(SPECIAL_GLYPH_GREEN_CIRCLE), " ");
+
+ if (hn)
+ (void) pty_forward_set_titlef(f, "%sVirtual Machine %s on %s", strempty(dot), arg_machine, hn);
+ else
+ (void) pty_forward_set_titlef(f, "%sVirtual Machine %s", strempty(dot), arg_machine);
+
+ if (dot)
+ (void) pty_forward_set_title_prefix(f, dot);
+}
+
+static int generate_ssh_keypair(const char *key_path, const char *key_type) {
+ _cleanup_free_ char *ssh_keygen = NULL;
+ _cleanup_strv_free_ char **cmdline = NULL;
+ int r;
+
+ assert(key_path);
+
+ r = find_executable("ssh-keygen", &ssh_keygen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find ssh-keygen: %m");
+
+ cmdline = strv_new(ssh_keygen, "-f", key_path, /* don't encrypt the key */ "-N", "");
+ if (!cmdline)
+ return log_oom();
+
+ if (key_type) {
+ r = strv_extend_many(&cmdline, "-t", key_type);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY);
+ if (!joined)
+ return log_oom();
+
+ log_debug("Executing: %s", joined);
+ }
+
+ r = safe_fork(
+ ssh_keygen,
+ FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_RLIMIT_NOFILE_SAFE|FORK_REARRANGE_STDIO,
+ NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execv(ssh_keygen, cmdline);
+ log_error_errno(errno, "Failed to execve %s: %m", ssh_keygen);
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
+static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
+ SSHInfo ssh_info; /* Used when talking to pid1 via SSH, but must survive until the function ends. */
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL, *kernel = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *ssh_private_key_path = NULL, *ssh_public_key_path = NULL;
+ _cleanup_close_ int notify_sock_fd = -EBADF;
_cleanup_strv_free_ char **cmdline = NULL;
- _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL;
+ _cleanup_free_ int *pass_fds = NULL;
+ size_t n_pass_fds = 0;
+ const char *accel, *shm;
int r;
- _cleanup_close_ int vsock_fd = -EBADF;
- bool use_kvm = arg_qemu_kvm > 0;
- if (arg_qemu_kvm < 0) {
+ if (arg_privileged)
+ r = sd_bus_default_system(&bus);
+ else
+ r = sd_bus_default_user(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to systemd bus: %m");
+
+ r = start_transient_scope(bus, arg_machine, /* allow_pidfd= */ true, &trans_scope);
+ if (r < 0)
+ return r;
+
+ bool use_kvm = arg_kvm > 0;
+ if (arg_kvm < 0) {
r = qemu_check_kvm_support();
if (r < 0)
return log_error_errno(r, "Failed to check for KVM support: %m");
use_kvm = r;
}
- r = find_ovmf_config(arg_secure_boot, &ovmf_config);
+ if (arg_firmware)
+ r = load_ovmf_config(arg_firmware, &ovmf_config);
+ else
+ r = find_ovmf_config(arg_secure_boot, &ovmf_config);
if (r < 0)
return log_error_errno(r, "Failed to find OVMF config: %m");
@@ -458,114 +1359,279 @@ static int run_virtual_machine(void) {
log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
"falling back to OVMF firmware blobs without Secure Boot support.");
- const char *accel = use_kvm ? "kvm" : "tcg";
- if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
- machine = strjoin("type=virt,accel=", accel);
+ shm = arg_directory || arg_runtime_mounts.n_mounts != 0 ? ",memory-backend=mem" : "";
+ if (ARCHITECTURE_SUPPORTS_SMM)
+ machine = strjoin("type=" QEMU_MACHINE_TYPE ",smm=", on_off(ovmf_config->supports_sb), shm);
else
- machine = strjoin("type=q35,accel=", accel, ",smm=", on_off(ovmf_config->supports_sb));
+ machine = strjoin("type=" QEMU_MACHINE_TYPE, shm);
if (!machine)
return log_oom();
+ if (arg_linux) {
+ kernel = strdup(arg_linux);
+ if (!kernel)
+ return log_oom();
+ } else if (arg_directory) {
+ /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */
+ r = discover_boot_entry(arg_directory, &kernel, &arg_initrds);
+ if (r < 0)
+ return log_error_errno(r, "Failed to locate UKI in directory type image, please specify one with --linux=.");
+
+ log_debug("Discovered UKI image at %s", kernel);
+ }
+
r = find_qemu_binary(&qemu_binary);
if (r == -EOPNOTSUPP)
return log_error_errno(r, "Native architecture is not supported by qemu.");
if (r < 0)
return log_error_errno(r, "Failed to find QEMU binary: %m");
- if (asprintf(&mem, "%.4fM", (double)arg_qemu_mem / (1024.0 * 1024.0)) < 0)
+ if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram, U64_MB)) < 0)
return log_oom();
cmdline = strv_new(
qemu_binary,
"-machine", machine,
- "-smp", arg_qemu_smp ?: "1",
+ "-smp", arg_cpus ?: "1",
"-m", mem,
"-object", "rng-random,filename=/dev/urandom,id=rng0",
"-device", "virtio-rng-pci,rng=rng0,id=rng-device0",
- "-nic", "user,model=virtio-net-pci"
+ "-device", "virtio-balloon,free-page-reporting=on"
);
if (!cmdline)
return log_oom();
- bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
- if (arg_qemu_vsock < 0) {
- r = qemu_check_vsock_support();
+ if (!sd_id128_is_null(arg_uuid))
+ if (strv_extend_many(&cmdline, "-uuid", SD_ID128_TO_UUID_STRING(arg_uuid)) < 0)
+ return log_oom();
+
+ /* Derive a vmgenid automatically from the invocation ID, in a deterministic way. */
+ sd_id128_t vmgenid;
+ r = sd_id128_get_invocation_app_specific(SD_ID128_MAKE(bd,84,6d,e3,e4,7d,4b,6c,a6,85,4a,87,0f,3c,a3,a0), &vmgenid);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get invocation ID, making up randomized vmgenid: %m");
+
+ r = sd_id128_randomize(&vmgenid);
if (r < 0)
- return log_error_errno(r, "Failed to check for VSock support: %m");
+ return log_error_errno(r, "Failed to make up randomized vmgenid: %m");
+ }
- use_vsock = r;
+ _cleanup_free_ char *vmgenid_device = NULL;
+ if (asprintf(&vmgenid_device, "vmgenid,guid=" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(vmgenid)) < 0)
+ return log_oom();
+
+ if (strv_extend_many(&cmdline, "-device", vmgenid_device) < 0)
+ return log_oom();
+
+ /* if we are going to be starting any units with state then create our runtime dir */
+ if (arg_tpm != 0 || arg_directory || arg_runtime_mounts.n_mounts != 0) {
+ r = runtime_directory(&arg_runtime_directory, arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, "systemd/vmspawn");
+ if (r < 0)
+ return log_error_errno(r, "Failed to lookup runtime directory: %m");
+ if (r) {
+ /* r > 0 means we need to create our own runtime dir */
+ r = mkdir_p(arg_runtime_directory, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create runtime directory: %m");
+ arg_runtime_directory_created = true;
+ }
}
- unsigned child_cid = VMADDR_CID_ANY;
- _cleanup_close_ int child_vsock_fd = -EBADF;
- if (use_vsock) {
- if (arg_vsock_cid < UINT_MAX)
- child_cid = (unsigned)arg_vsock_cid;
+ if (arg_network_stack == NETWORK_STACK_TAP) {
+ _cleanup_free_ char *tap_name = NULL;
+ struct ether_addr mac_vm = {};
+
+ tap_name = strjoin("tp-", arg_machine);
+ if (!tap_name)
+ return log_oom();
+
+ (void) net_shorten_ifname(tap_name, /* check_naming_scheme= */ false);
+
+ if (ether_addr_is_null(&arg_network_provided_mac)){
+ r = net_generate_mac(arg_machine, &mac_vm, VM_TAP_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for VM side: %m");
+ } else
+ mac_vm = arg_network_provided_mac;
- r = vsock_fix_child_cid(&child_cid, arg_machine, &child_vsock_fd);
+ r = strv_extend(&cmdline, "-nic");
if (r < 0)
- return log_error_errno(r, "Failed to fix CID for the guest vsock socket: %m");
+ return log_oom();
- r = strv_extend(&cmdline, "-device");
+ r = strv_extendf(&cmdline, "tap,ifname=%s,script=no,model=virtio-net-pci,mac=%s", tap_name, ETHER_ADDR_TO_STR(&mac_vm));
if (r < 0)
return log_oom();
+ } else if (arg_network_stack == NETWORK_STACK_USER)
+ r = strv_extend_many(&cmdline, "-nic", "user,model=virtio-net-pci");
+ else
+ r = strv_extend_many(&cmdline, "-nic", "none");
+ if (r < 0)
+ return log_oom();
- log_debug("vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd);
- r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd);
+ /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
+ if (arg_directory || arg_runtime_mounts.n_mounts != 0) {
+ r = strv_extend(&cmdline, "-object");
if (r < 0)
return log_oom();
+
+ r = strv_extendf(&cmdline, "memory-backend-memfd,id=mem,size=%s,share=on", mem);
+ if (r < 0)
+ return log_oom();
+ }
+
+ bool use_vsock = arg_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
+ if (arg_vsock < 0) {
+ r = qemu_check_vsock_support();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for VSOCK support: %m");
+
+ use_vsock = r;
}
- r = strv_extend_strv(&cmdline, STRV_MAKE("-cpu", "max"), /* filter_duplicates= */ false);
+ if (!use_kvm && kvm_device_fd >= 0) {
+ log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
+ kvm_device_fd = safe_close(kvm_device_fd);
+ }
+
+ if (use_kvm && kvm_device_fd >= 0) {
+ /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
+ * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
+ accel = "kvm,device=/dev/fdset/1";
+
+ r = strv_extend(&cmdline, "--add-fd");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd);
+ if (r < 0)
+ return log_oom();
+
+ if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
+ return log_oom();
+
+ pass_fds[n_pass_fds++] = kvm_device_fd;
+ } else if (use_kvm)
+ accel = "kvm";
+ else
+ accel = "tcg";
+
+ r = strv_extend_many(&cmdline, "-accel", accel);
if (r < 0)
return log_oom();
- if (arg_qemu_gui) {
- r = strv_extend_strv(&cmdline, STRV_MAKE("-vga", "virtio"), /* filter_duplicates= */ false);
+ _cleanup_close_ int child_vsock_fd = -EBADF;
+ unsigned child_cid = arg_vsock_cid;
+ if (use_vsock) {
+ int device_fd = vhost_device_fd;
+
+ if (device_fd < 0) {
+ child_vsock_fd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
+ if (child_vsock_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
+
+ device_fd = child_vsock_fd;
+ }
+
+ r = vsock_fix_child_cid(device_fd, &child_cid, arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fix CID for the guest VSOCK socket: %m");
+
+ r = strv_extend(&cmdline, "-device");
if (r < 0)
return log_oom();
- } else {
- r = strv_extend_strv(&cmdline, STRV_MAKE(
- "-nographic",
- "-nodefaults",
- "-chardev", "stdio,mux=on,id=console,signal=off",
- "-serial", "chardev:console",
- "-mon", "console"
- ), /* filter_duplicates= */ false);
+
+ r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, device_fd);
if (r < 0)
return log_oom();
+
+ if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
+ return log_oom();
+
+ pass_fds[n_pass_fds++] = device_fd;
}
- if (ARCHITECTURE_SUPPORTS_SMBIOS) {
- ssize_t n;
- FOREACH_ARRAY(cred, arg_credentials, arg_n_credentials) {
- _cleanup_free_ char *cred_data_b64 = NULL;
+ r = strv_extend_many(&cmdline, "-cpu",
+#ifdef __x86_64__
+ "max,hv_relaxed,hv-vapic,hv-time"
+#else
+ "max"
+#endif
+ );
+ if (r < 0)
+ return log_oom();
- n = base64mem(cred->data, cred->size, &cred_data_b64);
- if (n < 0)
- return log_oom();
+ _cleanup_close_ int master = -EBADF;
+ PTYForwardFlags ptyfwd_flags = 0;
+ switch (arg_console_mode) {
- r = strv_extend(&cmdline, "-smbios");
- if (r < 0)
- return log_oom();
+ case CONSOLE_READ_ONLY:
+ ptyfwd_flags |= PTY_FORWARD_READ_ONLY;
- r = strv_extendf(&cmdline, "type=11,value=io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64);
- if (r < 0)
- return log_oom();
- }
+ _fallthrough_;
+
+ case CONSOLE_INTERACTIVE: {
+ _cleanup_free_ char *pty_path = NULL;
+
+ master = openpt_allocate(O_RDWR|O_NONBLOCK, &pty_path);
+ if (master < 0)
+ return log_error_errno(master, "Failed to setup pty: %m");
+
+ if (strv_extend_many(
+ &cmdline,
+ "-nographic",
+ "-nodefaults",
+ "-chardev") < 0)
+ return log_oom();
+
+ if (strv_extendf(&cmdline,
+ "serial,id=console,path=%s", pty_path) < 0)
+ return log_oom();
+
+ r = strv_extend_many(
+ &cmdline,
+ "-serial", "chardev:console");
+ break;
+ }
+
+ case CONSOLE_GUI:
+ r = strv_extend_many(
+ &cmdline,
+ "-vga",
+ "virtio");
+ break;
+
+ case CONSOLE_NATIVE:
+ r = strv_extend_many(
+ &cmdline,
+ "-nographic",
+ "-nodefaults",
+ "-chardev", "stdio,mux=on,id=console,signal=off",
+ "-serial", "chardev:console",
+ "-mon", "console");
+ break;
+
+ default:
+ assert_not_reached();
}
+ if (r < 0)
+ return log_oom();
r = strv_extend(&cmdline, "-drive");
if (r < 0)
return log_oom();
- r = strv_extendf(&cmdline, "if=pflash,format=raw,readonly=on,file=%s", ovmf_config->path);
+ _cleanup_free_ char *escaped_ovmf_config_path = escape_qemu_value(ovmf_config->path);
+ if (!escaped_ovmf_config_path)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config), escaped_ovmf_config_path);
if (r < 0)
return log_oom();
_cleanup_(unlink_and_freep) char *ovmf_vars_to = NULL;
if (ovmf_config->supports_sb) {
const char *ovmf_vars_from = ovmf_config->vars;
+ _cleanup_free_ char *escaped_ovmf_vars_to = NULL;
_cleanup_close_ int source_fd = -EBADF, target_fd = -EBADF;
r = tempfn_random_child(NULL, "vmspawn-", &ovmf_vars_to);
@@ -589,63 +1655,413 @@ static int run_virtual_machine(void) {
(void) copy_access(source_fd, target_fd);
(void) copy_times(source_fd, target_fd, 0);
- r = strv_extend_strv(&cmdline, STRV_MAKE(
- "-global", "ICH9-LPC.disable_s3=1",
- "-global", "driver=cfi.pflash01,property=secure,value=on",
- "-drive"
- ), /* filter_duplicates= */ false);
+ r = strv_extend_many(
+ &cmdline,
+ "-global", "ICH9-LPC.disable_s3=1",
+ "-global", "driver=cfi.pflash01,property=secure,value=on",
+ "-drive");
if (r < 0)
return log_oom();
- r = strv_extendf(&cmdline, "file=%s,if=pflash,format=raw", ovmf_vars_to);
+ escaped_ovmf_vars_to = escape_qemu_value(ovmf_vars_to);
+ if (!escaped_ovmf_vars_to)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "file=%s,if=pflash,format=%s", escaped_ovmf_vars_to, ovmf_config_format(ovmf_config));
if (r < 0)
return log_oom();
}
- r = strv_extend(&cmdline, "-drive");
- if (r < 0)
- return log_oom();
+ STRV_FOREACH(drive, arg_extra_drives) {
+ _cleanup_free_ char *escaped_drive = NULL;
- r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image);
- if (r < 0)
- return log_oom();
+ r = strv_extend(&cmdline, "-drive");
+ if (r < 0)
+ return log_oom();
- r = strv_extend_strv(&cmdline, STRV_MAKE(
- "-device", "virtio-scsi-pci,id=scsi",
- "-device", "scsi-hd,drive=mkosi,bootindex=1"
- ), /* filter_duplicates= */ false);
+ escaped_drive = escape_qemu_value(*drive);
+ if (!escaped_drive)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "format=raw,cache=unsafe,file=%s", escaped_drive);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (kernel) {
+ r = strv_extend_many(&cmdline, "-kernel", kernel);
+ if (r < 0)
+ return log_oom();
+
+ /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root=
+ * kernel argument instead. */
+ if (arg_image) {
+ r = kernel_cmdline_maybe_append_root();
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (arg_image) {
+ _cleanup_free_ char *escaped_image = NULL;
+
+ assert(!arg_directory);
+
+ r = strv_extend(&cmdline, "-drive");
+ if (r < 0)
+ return log_oom();
+
+ escaped_image = escape_qemu_value(arg_image);
+ if (!escaped_image)
+ log_oom();
+
+ r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw,discard=%s", escaped_image, on_off(arg_discard_disk));
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend_many(&cmdline,
+ "-device", "virtio-scsi-pci,id=scsi",
+ "-device", "scsi-hd,drive=mkosi,bootindex=1");
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (arg_directory) {
+ _cleanup_free_ char *sock_path = NULL, *sock_name = NULL, *escaped_sock_path = NULL;
+
+ r = start_virtiofsd(bus, trans_scope, arg_directory, /* uidmap= */ true, &sock_path, &sock_name);
+ if (r < 0)
+ return r;
+
+ escaped_sock_path = escape_qemu_value(sock_path);
+ if (!escaped_sock_path)
+ log_oom();
+
+ r = strv_extend(&cmdline, "-chardev");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, escaped_sock_path);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&cmdline, "-device");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&arg_kernel_cmdline_extra, "root=root rootfstype=virtiofs rw");
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_prepend(&arg_kernel_cmdline_extra, "console=" DEFAULT_SERIAL_TTY);
if (r < 0)
return log_oom();
- if (!strv_isempty(arg_parameters)) {
- if (ARCHITECTURE_SUPPORTS_SMBIOS) {
- _cleanup_free_ char *kcl = strv_join(arg_parameters, " ");
- if (!kcl)
+ FOREACH_ARRAY(mount, arg_runtime_mounts.mounts, arg_runtime_mounts.n_mounts) {
+ _cleanup_free_ char *sock_path = NULL, *sock_name = NULL, *clean_target = NULL, *escaped_sock_path = NULL;
+ r = start_virtiofsd(bus, trans_scope, mount->source, /* uidmap= */ false, &sock_path, &sock_name);
+ if (r < 0)
+ return r;
+
+ escaped_sock_path = escape_qemu_value(sock_path);
+ if (!escaped_sock_path)
+ log_oom();
+
+ r = strv_extend(&cmdline, "-chardev");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, escaped_sock_path);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&cmdline, "-device");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", sock_name);
+ if (r < 0)
+ return log_oom();
+
+ clean_target = xescape(mount->target, "\":");
+ if (!clean_target)
+ return log_oom();
+
+ r = strv_extendf(&arg_kernel_cmdline_extra, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
+ sock_name, clean_target, mount->read_only ? "ro" : "rw");
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (ARCHITECTURE_SUPPORTS_SMBIOS) {
+ _cleanup_free_ char *kcl = strv_join(arg_kernel_cmdline_extra, " "), *escaped_kcl = NULL;
+ if (!kcl)
+ return log_oom();
+
+ if (kernel) {
+ r = strv_extend_many(&cmdline, "-append", kcl);
+ if (r < 0)
+ return log_oom();
+ } else {
+ if (ARCHITECTURE_SUPPORTS_SMBIOS) {
+ escaped_kcl = escape_qemu_value(kcl);
+ if (!escaped_kcl)
+ log_oom();
+
+ r = strv_extend(&cmdline, "-smbios");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", escaped_kcl);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&cmdline, "-smbios");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "type=11,value=io.systemd.boot.kernel-cmdline-extra=%s", escaped_kcl);
+ if (r < 0)
+ return log_oom();
+ } else
+ log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring");
+ }
+ } else
+ log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS");
+
+ /* disable TPM autodetection if the user's hardware doesn't support it */
+ if (!ARCHITECTURE_SUPPORTS_TPM) {
+ if (arg_tpm < 0) {
+ arg_tpm = 0;
+ log_debug("TPM not support on %s, disabling tpm autodetection and continuing", architecture_to_string(native_architecture()));
+ } else if (arg_tpm > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM not supported on %s, refusing", architecture_to_string(native_architecture()));
+ }
+
+ _cleanup_free_ char *swtpm = NULL;
+ if (arg_tpm != 0) {
+ r = find_executable("swtpm", &swtpm);
+ if (r < 0) {
+ /* log if the user asked for swtpm and we cannot find it */
+ if (arg_tpm > 0)
+ return log_error_errno(r, "Failed to find swtpm binary: %m");
+ /* also log if we got an error other than ENOENT from find_executable */
+ if (r != -ENOENT && arg_tpm < 0)
+ return log_error_errno(r, "Error detecting swtpm: %m");
+ }
+ }
+
+ _cleanup_free_ char *tpm_state_tempdir = NULL;
+ if (swtpm) {
+ r = start_tpm(bus, trans_scope, swtpm, &tpm_state_tempdir);
+ if (r < 0) {
+ /* only bail if the user asked for a tpm */
+ if (arg_tpm > 0)
+ return log_error_errno(r, "Failed to start tpm: %m");
+ log_debug_errno(r, "Failed to start tpm, ignoring: %m");
+ }
+ }
+
+ if (tpm_state_tempdir) {
+ _cleanup_free_ char *escaped_state_dir = NULL;
+
+ escaped_state_dir = escape_qemu_value(tpm_state_tempdir);
+ if (!escaped_state_dir)
+ log_oom();
+
+ r = strv_extend(&cmdline, "-chardev");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "socket,id=chrtpm,path=%s/sock", escaped_state_dir);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend_many(&cmdline, "-tpmdev", "emulator,id=tpm0,chardev=chrtpm");
+ if (r < 0)
+ return log_oom();
+
+ if (native_architecture() == ARCHITECTURE_X86_64)
+ r = strv_extend_many(&cmdline, "-device", "tpm-tis,tpmdev=tpm0");
+ else if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
+ r = strv_extend_many(&cmdline, "-device", "tpm-tis-device,tpmdev=tpm0");
+ if (r < 0)
+ return log_oom();
+ }
+
+ char *initrd = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *merged_initrd = NULL;
+ size_t n_initrds = strv_length(arg_initrds);
+
+ if (n_initrds == 1)
+ initrd = arg_initrds[0];
+ else if (n_initrds > 1) {
+ r = merge_initrds(&merged_initrd);
+ if (r < 0)
+ return r;
+
+ initrd = merged_initrd;
+ }
+
+ if (initrd) {
+ r = strv_extend_many(&cmdline, "-initrd", initrd);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (arg_forward_journal) {
+ _cleanup_free_ char *sd_journal_remote = NULL, *listen_address = NULL, *cred = NULL;
+
+ r = find_executable_full(
+ "systemd-journal-remote",
+ /* root = */ NULL,
+ STRV_MAKE(LIBEXECDIR),
+ /* use_path_envvar = */ true, /* systemd-journal-remote should be installed in
+ * LIBEXECDIR, but for supporting fancy setups. */
+ &sd_journal_remote,
+ /* ret_fd = */ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
+
+ r = start_systemd_journal_remote(bus, trans_scope, child_cid, sd_journal_remote, &listen_address);
+ if (r < 0)
+ return r;
+
+ cred = strjoin("journal.forward_to_socket:", listen_address);
+ if (!cred)
+ return log_oom();
+
+ r = machine_credential_set(&arg_credentials, cred);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_pass_ssh_key) {
+ _cleanup_free_ char *scope_prefix = NULL, *privkey_path = NULL, *pubkey_path = NULL;
+ const char *key_type = arg_ssh_key_type ?: "ed25519";
+
+ r = unit_name_to_prefix(trans_scope, &scope_prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
+
+ privkey_path = strjoin(arg_runtime_directory, "/", scope_prefix, "-", key_type);
+ if (!privkey_path)
+ return log_oom();
+
+ pubkey_path = strjoin(privkey_path, ".pub");
+ if (!pubkey_path)
+ return log_oom();
+
+ r = generate_ssh_keypair(privkey_path, key_type);
+ if (r < 0)
+ return r;
+
+ ssh_private_key_path = TAKE_PTR(privkey_path);
+ ssh_public_key_path = TAKE_PTR(pubkey_path);
+ }
+
+ if (ssh_public_key_path && ssh_private_key_path) {
+ _cleanup_free_ char *scope_prefix = NULL, *cred_path = NULL;
+
+ cred_path = strjoin("ssh.ephemeral-authorized_keys-all:", ssh_public_key_path);
+ if (!cred_path)
+ return log_oom();
+
+ r = machine_credential_load(&arg_credentials, cred_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load credential %s: %m", cred_path);
+
+ r = unit_name_to_prefix(trans_scope, &scope_prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
+
+ /* on distros that provide their own sshd@.service file we need to provide a dropin which
+ * picks up our public key credential */
+ r = machine_credential_set(
+ &arg_credentials,
+ "systemd.unit-dropin.sshd-vsock@.service:"
+ "[Service]\n"
+ "ExecStart=\n"
+ "ExecStart=sshd -i -o 'AuthorizedKeysFile=%d/ssh.ephemeral-authorized_keys-all .ssh/authorized_keys'\n"
+ "ImportCredential=ssh.ephemeral-authorized_keys-all\n");
+ if (r < 0)
+ return log_error_errno(r, "Failed to set credential systemd.unit-dropin.sshd-vsock@.service: %m");
+ }
+
+ if (ARCHITECTURE_SUPPORTS_SMBIOS)
+ FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) {
+ _cleanup_free_ char *cred_data_b64 = NULL;
+ ssize_t n;
+
+ n = base64mem(cred->data, cred->size, &cred_data_b64);
+ if (n < 0)
return log_oom();
r = strv_extend(&cmdline, "-smbios");
if (r < 0)
return log_oom();
- r = strv_extendf(&cmdline, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", kcl);
+ r = strv_extendf(&cmdline, "type=11,value=io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64);
if (r < 0)
return log_oom();
- } else
- log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS");
- }
+ }
if (use_vsock) {
- vsock_fd = open_vsock();
- if (vsock_fd < 0)
- return log_error_errno(vsock_fd, "Failed to open vsock: %m");
+ notify_sock_fd = open_vsock();
+ if (notify_sock_fd < 0)
+ return log_error_errno(notify_sock_fd, "Failed to open VSOCK: %m");
- r = cmdline_add_vsock(&cmdline, vsock_fd);
+ r = cmdline_add_vsock(&cmdline, notify_sock_fd);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
- return log_error_errno(r, "Failed to call getsockname on vsock: %m");
+ return log_error_errno(r, "Failed to call getsockname on VSOCK: %m");
+ }
+
+ const char *e = secure_getenv("SYSTEMD_VMSPAWN_QEMU_EXTRA");
+ if (e) {
+ _cleanup_strv_free_ char **extra = NULL;
+
+ r = strv_split_full(&extra, e, /* separator= */ NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to split $SYSTEMD_VMSPAWN_QEMU_EXTRA environment variable: %m");
+
+ if (strv_extend_strv(&cmdline, extra, /* filter_duplicates= */ false) < 0)
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY);
+ if (!joined)
+ return log_oom();
+
+ log_debug("Executing: %s", joined);
+ }
+
+ if (arg_register) {
+ char vm_address[STRLEN("vsock/") + DECIMAL_STR_MAX(unsigned)];
+
+ xsprintf(vm_address, "vsock/%u", child_cid);
+ r = register_machine(
+ bus,
+ arg_machine,
+ arg_uuid,
+ trans_scope,
+ arg_directory,
+ child_cid,
+ child_cid != VMADDR_CID_ANY ? vm_address : NULL,
+ ssh_private_key_path);
+ if (r < 0)
+ return r;
}
+ assert_se(sigprocmask_many(SIG_BLOCK, /* old_sigset=*/ NULL, SIGCHLD, SIGWINCH) >= 0);
+
_cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
r = sd_event_new(&event);
@@ -654,15 +2070,16 @@ static int run_virtual_machine(void) {
(void) sd_event_set_watchdog(event, true);
- pid_t child_pid;
- r = safe_fork_full(
+ _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL;
+
+ r = pidref_safe_fork_full(
qemu_binary,
- NULL,
- &child_vsock_fd, 1, /* pass the vsock fd to qemu */
- FORK_CLOEXEC_OFF,
- &child_pid);
+ /* stdio_fds= */ NULL,
+ pass_fds, n_pass_fds,
+ FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_CLOEXEC_OFF|FORK_RLIMIT_NOFILE_SAFE,
+ &child_pidref);
if (r < 0)
- return log_error_errno(r, "Failed to fork off %s: %m", qemu_binary);
+ return r;
if (r == 0) {
/* set TERM and LANG if they are missing */
if (setenv("TERM", "vt220", 0) < 0)
@@ -671,35 +2088,72 @@ static int run_virtual_machine(void) {
if (setenv("LANG", "C.UTF-8", 0) < 0)
return log_oom();
- execve(qemu_binary, cmdline, environ);
+ execv(qemu_binary, cmdline);
log_error_errno(errno, "Failed to execve %s: %m", qemu_binary);
_exit(EXIT_FAILURE);
}
+ /* Close the vsock fd we passed to qemu in the parent. We don't need it anymore. */
+ child_vsock_fd = safe_close(child_vsock_fd);
int exit_status = INT_MAX;
if (use_vsock) {
- r = setup_notify_parent(event, vsock_fd, &exit_status, &notify_event_source);
+ r = setup_notify_parent(event, notify_sock_fd, &exit_status, &notify_event_source);
if (r < 0)
- return log_error_errno(r, "Failed to setup event loop to handle vsock notify events: %m");
+ return log_error_errno(r, "Failed to setup event loop to handle VSOCK notify events: %m");
}
- /* shutdown qemu when we are shutdown */
- (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(child_pid));
- (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(child_pid));
+ /* If we have the vsock address and the SSH key, ask pid1 inside the guest to shutdown. */
+ if (child_cid != VMADDR_CID_ANY && ssh_private_key_path) {
+ ssh_info = (SSHInfo) {
+ .cid = child_cid,
+ .private_key_path = ssh_private_key_path,
+ .port = 22,
+ };
- (void) sd_event_add_signal(event, NULL, SIGRTMIN+18, sigrtmin18_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT | SD_EVENT_SIGNAL_PROCMASK, forward_signal_to_vm_pid1, &ssh_info);
+ (void) sd_event_add_signal(event, NULL, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, forward_signal_to_vm_pid1, &ssh_info);
+ } else {
+ /* As a fallback in case SSH cannot be used, send a shutdown signal to the VMM instead. */
+ (void) sd_event_add_signal(event, NULL, SIGINT | SD_EVENT_SIGNAL_PROCMASK, on_orderly_shutdown, &child_pidref);
+ (void) sd_event_add_signal(event, NULL, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, on_orderly_shutdown, &child_pidref);
+ }
+
+ (void) sd_event_add_signal(event, NULL, (SIGRTMIN+18) | SD_EVENT_SIGNAL_PROCMASK, sigrtmin18_handler, NULL);
/* Exit when the child exits */
- (void) sd_event_add_child(event, NULL, child_pid, WEXITED, on_child_exit, NULL);
+ (void) event_add_child_pidref(event, NULL, &child_pidref, WEXITED, on_child_exit, NULL);
+
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ if (master >= 0) {
+ r = pty_forward_new(event, master, ptyfwd_flags, &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ if (!arg_background && shall_tint_background()) {
+ _cleanup_free_ char *bg = NULL;
+
+ r = terminal_tint_color(130 /* green */, &bg);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine terminal background color, not tinting.");
+ else
+ (void) pty_forward_set_background_color(forward, bg);
+ } else if (!isempty(arg_background))
+ (void) pty_forward_set_background_color(forward, arg_background);
+
+ set_window_title(forward);
+ }
r = sd_event_loop(event);
if (r < 0)
return log_error_errno(r, "Failed to run event loop: %m");
+ if (arg_register)
+ (void) unregister_machine(bus, arg_machine);
+
if (use_vsock) {
if (exit_status == INT_MAX) {
- log_debug("Couldn't retrieve inner EXIT_STATUS from vsock");
+ log_debug("Couldn't retrieve inner EXIT_STATUS from VSOCK");
return EXIT_SUCCESS;
}
if (exit_status != 0)
@@ -713,20 +2167,52 @@ static int run_virtual_machine(void) {
static int determine_names(void) {
int r;
- if (!arg_image)
- return log_error_errno(SYNTHETIC_ERRNO(-EINVAL), "Missing required argument -i/--image=, quitting");
+ if (!arg_directory && !arg_image) {
+ if (arg_machine) {
+ _cleanup_(image_unrefp) Image *i = NULL;
- if (!arg_machine) {
- char *e;
+ r = image_find(IMAGE_MACHINE, arg_machine, NULL, &i);
+ if (r == -ENOENT)
+ return log_error_errno(r, "No image for machine '%s'.", arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find image for machine '%s': %m", arg_machine);
+
+ if (IN_SET(i->type, IMAGE_RAW, IMAGE_BLOCK))
+ r = free_and_strdup(&arg_image, i->path);
+ else if (IN_SET(i->type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME))
+ r = free_and_strdup(&arg_directory, i->path);
+ else
+ assert_not_reached();
+ if (r < 0)
+ return log_oom();
+ } else {
+ r = safe_getcwd(&arg_directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine current directory: %m");
+ }
+ }
- r = path_extract_filename(arg_image, &arg_machine);
- if (r < 0)
- return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
+ if (!arg_machine) {
+ if (arg_directory && path_equal(arg_directory, "/")) {
+ arg_machine = gethostname_malloc();
+ if (!arg_machine)
+ return log_oom();
+ } else if (arg_image) {
+ char *e;
- /* Truncate suffix if there is one */
- e = endswith(arg_machine, ".raw");
- if (e)
- *e = 0;
+ r = path_extract_filename(arg_image, &arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
+
+ /* Truncate suffix if there is one */
+ e = endswith(arg_machine, ".raw");
+ if (e)
+ *e = 0;
+ } else {
+ r = path_extract_filename(arg_directory, &arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_directory);
+ }
hostname_cleanup(arg_machine);
if (!hostname_is_valid(arg_machine, 0))
@@ -736,31 +2222,79 @@ static int determine_names(void) {
return 0;
}
+static int verify_arguments(void) {
+ if (arg_network_stack == NETWORK_STACK_TAP && !arg_privileged)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "--network-tap requires root privileges, refusing.");
+
+ if (!strv_isempty(arg_initrds) && !arg_linux)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --initrd= cannot be used without --linux=.");
+
+ if (arg_register && !arg_privileged)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "--register= requires root privileges, refusing.");
+
+ return 0;
+}
+
static int run(int argc, char *argv[]) {
- int r, ret = EXIT_SUCCESS;
+ int r, kvm_device_fd = -EBADF, vhost_device_fd = -EBADF;
+ _cleanup_strv_free_ char **names = NULL;
log_setup();
+ arg_privileged = getuid() == 0;
+
+ /* don't attempt to register as a machine when running as a user */
+ arg_register = arg_privileged;
+
+ r = parse_environment();
+ if (r < 0)
+ return r;
+
r = parse_argv(argc, argv);
if (r <= 0)
- goto finish;
+ return r;
r = determine_names();
if (r < 0)
- goto finish;
+ return r;
+
+ r = verify_arguments();
+ if (r < 0)
+ return r;
- assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, SIGRTMIN+18, -1) >= 0);
+ if (!arg_quiet && arg_console_mode != CONSOLE_GUI) {
+ _cleanup_free_ char *u = NULL;
+ const char *vm_path = arg_image ?: arg_directory;
+ (void) terminal_urlify_path(vm_path, vm_path, &u);
- r = run_virtual_machine();
- if (r > 0)
- ret = r;
-finish:
- machine_credential_free_all(arg_credentials, arg_n_credentials);
+ log_info("%s %sSpawning VM %s on %s.%s",
+ special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: vm_path, ansi_normal());
+ if (arg_console_mode == CONSOLE_INTERACTIVE)
+ log_info("%s %sPress %sCtrl-]%s three times within 1s to kill VM.%s",
+ special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
+ else if (arg_console_mode == CONSOLE_NATIVE)
+ log_info("%s %sPress %sCtrl-a x%s to kill VM.%s",
+ special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
+ }
+
+ r = sd_listen_fds_with_names(true, &names);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to get passed file descriptors: %m");
+
+ for (int i = 0; i < r; i++) {
+ int fd = SD_LISTEN_FDS_START + i;
+ if (streq(names[i], "kvm"))
+ kvm_device_fd = fd;
+ else if (streq(names[i], "vhost-vsock"))
+ vhost_device_fd = fd;
+ else {
+ log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd, names[i]);
+ safe_close(fd);
+ }
+ }
- return ret;
+ return run_virtual_machine(kvm_device_fd, vhost_device_fd);
}
DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);