diff options
Diffstat (limited to 'arch/um')
221 files changed, 33866 insertions, 0 deletions
diff --git a/arch/um/.gitignore b/arch/um/.gitignore new file mode 100644 index 0000000000..d69ea5b562 --- /dev/null +++ b/arch/um/.gitignore @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +kernel/config.c +kernel/config.tmp +kernel/vmlinux.lds +kernel/capflags.c diff --git a/arch/um/Kbuild b/arch/um/Kbuild new file mode 100644 index 0000000000..6cf0c1e592 --- /dev/null +++ b/arch/um/Kbuild @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-y += kernel/ drivers/ os-Linux/ diff --git a/arch/um/Kconfig b/arch/um/Kconfig new file mode 100644 index 0000000000..b5e1793605 --- /dev/null +++ b/arch/um/Kconfig @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "UML-specific options" + +config UML + bool + default y + select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV + select ARCH_HAS_STRNCPY_FROM_USER + select ARCH_HAS_STRNLEN_USER + select ARCH_NO_PREEMPT + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_KASAN if X86_64 + select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ASM_MODVERSIONS + select HAVE_UID16 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_BUGVERBOSE + select NO_DMA if !UML_DMA_EMULATION + select OF_EARLY_FLATTREE if OF + select GENERIC_IRQ_SHOW + select GENERIC_CPU_DEVICES + select HAVE_GCC_PLUGINS + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN + select TRACE_IRQFLAGS_SUPPORT + select TTY # Needed for line.c + select HAVE_ARCH_VMAP_STACK + select HAVE_RUST if X86_64 + +config MMU + bool + default y + +config UML_DMA_EMULATION + bool + +config NO_IOMEM + bool "disable IOMEM" if EXPERT + depends on !INDIRECT_IOMEM + default y + +config UML_IOMEM_EMULATION + bool + select INDIRECT_IOMEM + select GENERIC_PCI_IOMAP + select GENERIC_IOMAP + select NO_GENERIC_PCI_IOPORT_MAP + +config NO_IOPORT_MAP + def_bool y + +config ISA + bool + +config SBUS + bool + +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + select STACKTRACE + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config HZ + int + default 100 + +config NR_CPUS + int + range 1 1 + default 1 + +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + +source "arch/$(HEADER_ARCH)/um/Kconfig" + +config MAY_HAVE_RUNTIME_DEPS + bool + +config STATIC_LINK + bool "Force a static link" + depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS + help + This option gives you the ability to force a static link of UML. + Normally, UML is linked as a shared binary. This is inconvenient for + use in a chroot jail. So, if you intend to run UML inside a chroot, + you probably want to say Y here. + Additionally, this option enables using higher memory spaces (up to + 2.75G) for UML. + + NOTE: This option is incompatible with some networking features which + depend on features that require being dynamically loaded (like NSS). + +config LD_SCRIPT_STATIC + bool + default y + depends on STATIC_LINK + +config LD_SCRIPT_DYN + bool + default y + depends on !LD_SCRIPT_STATIC + +config LD_SCRIPT_DYN_RPATH + bool "set rpath in the binary" if EXPERT + default y + depends on LD_SCRIPT_DYN + help + Add /lib (and /lib64 for 64-bit) to the linux binary's rpath + explicitly. + + You may need to turn this off if compiling for nix systems + that have their libraries in random /nix directories and + might otherwise unexpected use libraries from /lib or /lib64 + instead of the desired ones. + +config HOSTFS + tristate "Host filesystem" + help + While the User-Mode Linux port uses its own root file system for + booting and normal file access, this module lets the UML user + access files stored on the host. It does not require any + network connection between the Host and UML. An example use of + this might be: + + mount none /tmp/fromhost -t hostfs -o /tmp/umlshare + + where /tmp/fromhost is an empty directory inside UML and + /tmp/umlshare is a directory on the host with files the UML user + wishes to access. + + For more information, see + <http://user-mode-linux.sourceforge.net/hostfs.html>. + + If you'd like to be able to work with files stored on the host, + say Y or M here; otherwise say N. + +config MCONSOLE + bool "Management console" + depends on PROC_FS + default y + help + The user mode linux management console is a low-level interface to + the kernel, somewhat like the i386 SysRq interface. Since there is + a full-blown operating system running under every user mode linux + instance, there is much greater flexibility possible than with the + SysRq mechanism. + + If you answer 'Y' to this option, to use this feature, you need the + mconsole client (called uml_mconsole) which is present in CVS in + 2.4.5-9um and later (path /tools/mconsole), and is also in the + distribution RPM package in 2.4.6 and later. + + It is safe to say 'Y' here. + +config MAGIC_SYSRQ + bool "Magic SysRq key" + depends on MCONSOLE + help + If you say Y here, you will have some control over the system even + if the system crashes for example during kernel debugging (e.g., you + will be able to flush the buffer cache to disk, reboot the system + immediately or dump some status information). A key for each of the + possible requests is provided. + + This is the feature normally accomplished by pressing a key + while holding SysRq (Alt+PrintScreen). + + On UML, this is accomplished by sending a "sysrq" command with + mconsole, followed by the letter for the requested command. + + The keys are documented in <file:Documentation/admin-guide/sysrq.rst>. Don't say Y + unless you really know what this hack does. + +config KERNEL_STACK_ORDER + int "Kernel stack size order" + default 2 if 64BIT + range 2 10 if 64BIT + default 1 if !64BIT + help + This option determines the size of UML kernel stacks. They will + be 1 << order pages. The default is OK unless you're running Valgrind + on UML, in which case, set this to 3. + It is possible to reduce the stack to 1 for 64BIT and 0 for 32BIT on + older (pre-2017) CPUs. It is not recommended on newer CPUs due to the + increase in the size of the state which needs to be saved when handling + signals. + +config MMAPPER + tristate "iomem emulation driver" + help + This driver allows a host file to be used as emulated IO memory inside + UML. + +config PGTABLE_LEVELS + int + default 3 if 3_LEVEL_PGTABLES + default 2 + +config UML_TIME_TRAVEL_SUPPORT + bool + prompt "Support time-travel mode (e.g. for test execution)" + # inf-cpu mode is incompatible with the benchmarking + depends on !RAID6_PQ_BENCHMARK + depends on !SMP + help + Enable this option to support time travel inside the UML instance. + + After enabling this option, two modes are accessible at runtime + (selected by the kernel command line), see the kernel's command- + line help for more details. + + It is safe to say Y, but you probably don't need this. + +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0x100000000000 + help + This is the offset at which the ~16TB of shadow memory is + mapped and used by KASAN for memory debugging. This can be any + address that has at least KASAN_SHADOW_SIZE (total address space divided + by 8) amount of space so that the KASAN shadow memory does not conflict + with anything. The default is 0x100000000000, which works even if mem is + set to a large value. On low-memory systems, try 0x7fff8000, as it fits + into the immediate of most instructions, improving performance. + +endmenu + +source "arch/um/drivers/Kconfig" + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +menu "Power management options" + +source "kernel/power/Kconfig" + +endmenu diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug new file mode 100644 index 0000000000..1dfb2959c7 --- /dev/null +++ b/arch/um/Kconfig.debug @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0 + +config GPROF + bool "Enable gprof support" + depends on DEBUG_INFO && FRAME_POINTER + help + This allows profiling of a User-Mode Linux kernel with the gprof + utility. + + See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more + details. + + If you're involved in UML kernel development and want to use gprof, + say Y. If you're unsure, say N. + +config GCOV + bool "Enable gcov support" + depends on DEBUG_INFO + depends on !KCOV + depends on !MODULES + help + This option allows developers to retrieve coverage data from a UML + session. + + See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more + details. + + If you're involved in UML kernel development and want to use gcov, + say Y. If you're unsure, say N. + +config EARLY_PRINTK + bool "Early printk" + default y + help + Write kernel log output directly to stdout. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. diff --git a/arch/um/Makefile b/arch/um/Makefile new file mode 100644 index 0000000000..82f05f2506 --- /dev/null +++ b/arch/um/Makefile @@ -0,0 +1,153 @@ +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +# select defconfig based on actual architecture +ifeq ($(SUBARCH),x86) + ifeq ($(shell uname -m),x86_64) + KBUILD_DEFCONFIG := x86_64_defconfig + else + KBUILD_DEFCONFIG := i386_defconfig + endif +else + KBUILD_DEFCONFIG := $(SUBARCH)_defconfig +endif + +ARCH_DIR := arch/um +# We require bash because the vmlinux link and loader script cpp use bash +# features. +SHELL := /bin/bash + +MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas + +HEADER_ARCH := $(SUBARCH) + +ifneq ($(filter $(SUBARCH),x86 x86_64 i386),) + HEADER_ARCH := x86 +endif + +ifdef CONFIG_64BIT + KBUILD_CFLAGS += -mcmodel=large +endif + +HOST_DIR := arch/$(HEADER_ARCH) + +include $(srctree)/$(ARCH_DIR)/Makefile-skas +include $(srctree)/$(HOST_DIR)/Makefile.um + +core-y += $(HOST_DIR)/um/ + +SHARED_HEADERS := $(ARCH_DIR)/include/shared +ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS) +ARCH_INCLUDE += -I$(srctree)/$(HOST_DIR)/um/shared +KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um + +# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so +# named - it's a common symbol in libpcap, so we get a binary which crashes. +# +# Same things for in6addr_loopback and mktime - found in libc. For these two we +# only get link-time error, luckily. +# +# -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a +# embedded copy of longjmp, same thing for setjmp. +# +# These apply to USER_CFLAGS to. + +KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ + $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ + -Din6addr_loopback=kernel_in6addr_loopback \ + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr + +KBUILD_RUSTFLAGS += -Crelocation-model=pie + +KBUILD_AFLAGS += $(ARCH_INCLUDE) + +USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ + $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ + -D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \ + -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ + +#This will adjust *FLAGS accordingly to the platform. +include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux + +KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \ + -I$(srctree)/$(HOST_DIR)/include/uapi \ + -I$(objtree)/$(HOST_DIR)/include/generated \ + -I$(objtree)/$(HOST_DIR)/include/generated/uapi + +# -Derrno=kernel_errno - This turns all kernel references to errno into +# kernel_errno to separate them from the libc errno. This allows -fno-common +# in KBUILD_CFLAGS. Otherwise, it would cause ld to complain about the two different +# errnos. +# These apply to kernelspace only. +# +# strip leading and trailing whitespace to make the USER_CFLAGS removal of these +# defines more robust + +KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \ + -Dmktime=kernel_mktime $(ARCH_KERNEL_DEFINES)) +KBUILD_CFLAGS += $(KERNEL_DEFINES) + +PHONY += linux + +all: linux + +linux: vmlinux + @echo ' LINK $@' + $(Q)ln -f $< $@ + +define archhelp + echo '* linux - Binary kernel image (./linux) - for backward' + echo ' compatibility only, this creates a hard link to the' + echo ' real kernel binary, the "vmlinux" binary you' + echo ' find in the kernel root.' +endef + +archheaders: + $(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) asm-generic archheaders + +archprepare: + $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h + +LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static +LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib + +CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ + -fno-stack-protector $(call cc-option, -fno-stack-protector-all) + +# Options used by linker script +export LDS_START := $(START) +export LDS_ELF_ARCH := $(ELF_ARCH) +export LDS_ELF_FORMAT := $(ELF_FORMAT) + +# The wrappers will select whether using "malloc" or the kernel allocator. +LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc + +# Avoid binutils 2.39+ warnings by marking the stack non-executable and +# ignorning warnings for the kallsyms sections. +LDFLAGS_EXECSTACK = -z noexecstack +ifeq ($(CONFIG_LD_IS_BFD),y) +LDFLAGS_EXECSTACK += $(call ld-option,--no-warn-rwx-segments) +endif + +LD_FLAGS_CMDLINE = $(foreach opt,$(KBUILD_LDFLAGS) $(LDFLAGS_EXECSTACK),-Wl,$(opt)) + +# Used by link-vmlinux.sh which has special support for um link +export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) $(CC_FLAGS_LTO) + +# When cleaning we don't include .config, so we don't include +# TT or skas makefiles and don't clean skas_ptregs.h. +CLEAN_FILES += linux x.i gmon.out +MRPROPER_FILES += $(HOST_DIR)/include/generated + +archclean: + @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ + -o -name '*.gcov' \) -type f -print | xargs rm -f + +export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH diff --git a/arch/um/Makefile-os-Linux b/arch/um/Makefile-os-Linux new file mode 100644 index 0000000000..2c8a598ec1 --- /dev/null +++ b/arch/um/Makefile-os-Linux @@ -0,0 +1,9 @@ +# +# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +# To get a definition of F_SETSIG +USER_CFLAGS += -D_GNU_SOURCE -D_LARGEFILE64_SOURCE +KBUILD_CFLAGS += -D_LARGEFILE64_SOURCE +DEV_NULL_PATH = \"/dev/null\" diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas new file mode 100644 index 0000000000..ac35de5316 --- /dev/null +++ b/arch/um/Makefile-skas @@ -0,0 +1,12 @@ +# +# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +GPROF_OPT += -pg +GCOV_OPT += -fprofile-arcs -ftest-coverage + +CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT) +CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT) +LINK-$(CONFIG_GCOV) += $(GCOV_OPT) +LINK-$(CONFIG_GPROF) += $(GPROF_OPT) diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig new file mode 100644 index 0000000000..e543cbac87 --- /dev/null +++ b/arch/um/configs/i386_defconfig @@ -0,0 +1,73 @@ +CONFIG_3_LEVEL_PGTABLES=y +# CONFIG_COMPACTION is not set +CONFIG_BINFMT_MISC=m +CONFIG_HOSTFS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_KERNEL_STACK_ORDER=1 +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_BLK_CGROUP=y +# CONFIG_PID_NS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_IOSCHED_BFQ=m +CONFIG_SSL=y +CONFIG_NULL_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y +CONFIG_CON_CHAN="pts" +CONFIG_SSL_CHAN="pts" +CONFIG_SOUND=m +CONFIG_UML_SOUND=m +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_BLK_DEV_UBD=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_DUMMY=m +CONFIG_TUN=m +CONFIG_PPP=m +CONFIG_SLIP=m +CONFIG_LEGACY_PTY_COUNT=32 +# CONFIG_HW_RANDOM is not set +CONFIG_UML_RANDOM=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_UML_NET_SLIRP=y +CONFIG_EXT4_FS=y +CONFIG_REISERFS_FS=y +CONFIG_QUOTA=y +CONFIG_AUTOFS_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_NLS=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_KERNEL=y diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig new file mode 100644 index 0000000000..939cb12318 --- /dev/null +++ b/arch/um/configs/x86_64_defconfig @@ -0,0 +1,72 @@ +# CONFIG_COMPACTION is not set +CONFIG_BINFMT_MISC=m +CONFIG_HOSTFS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_BLK_CGROUP=y +# CONFIG_PID_NS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_IOSCHED_BFQ=m +CONFIG_SSL=y +CONFIG_NULL_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y +CONFIG_CON_CHAN="pts" +CONFIG_SSL_CHAN="pts" +CONFIG_SOUND=m +CONFIG_UML_SOUND=m +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_BLK_DEV_UBD=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_DUMMY=m +CONFIG_TUN=m +CONFIG_PPP=m +CONFIG_SLIP=m +CONFIG_LEGACY_PTY_COUNT=32 +# CONFIG_HW_RANDOM is not set +CONFIG_UML_RANDOM=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_UML_NET_SLIRP=y +CONFIG_EXT4_FS=y +CONFIG_REISERFS_FS=y +CONFIG_QUOTA=y +CONFIG_AUTOFS_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_NLS=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_FRAME_WARN=1024 +CONFIG_DEBUG_KERNEL=y diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig new file mode 100644 index 0000000000..b94b2618e7 --- /dev/null +++ b/arch/um/drivers/Kconfig @@ -0,0 +1,385 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "UML Character Devices" + +config STDERR_CONSOLE + bool "stderr console" + default y + help + console driver which dumps all printk messages to stderr. + +config SSL + bool "Virtual serial line" + help + The User-Mode Linux environment allows you to create virtual serial + lines on the UML that are usually made to show up on the host as + ttys or ptys. + + See <http://user-mode-linux.sourceforge.net/old/input.html> for more + information and command line examples of how to use this facility. + + Unless you have a specific reason for disabling this, say Y. + +config NULL_CHAN + bool "null channel support" + help + This option enables support for attaching UML consoles and serial + lines to a device similar to /dev/null. Data written to it disappears + and there is never any data to be read. + +config PORT_CHAN + bool "port channel support" + help + This option enables support for attaching UML consoles and serial + lines to host portals. They may be accessed with 'telnet <host> + <port number>'. Any number of consoles and serial lines may be + attached to a single portal, although what UML device you get when + you telnet to that portal will be unpredictable. + It is safe to say 'Y' here. + +config PTY_CHAN + bool "pty channel support" + help + This option enables support for attaching UML consoles and serial + lines to host pseudo-terminals. Access to both traditional + pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled + with this option. The assignment of UML devices to host devices + will be announced in the kernel message log. + It is safe to say 'Y' here. + +config TTY_CHAN + bool "tty channel support" + help + This option enables support for attaching UML consoles and serial + lines to host terminals. Access to both virtual consoles + (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and + /dev/pts/*) are controlled by this option. + It is safe to say 'Y' here. + +config XTERM_CHAN + bool "xterm channel support" + help + This option enables support for attaching UML consoles and serial + lines to xterms. Each UML device so assigned will be brought up in + its own xterm. + It is safe to say 'Y' here. + +config XTERM_CHAN_DEFAULT_EMULATOR + string "xterm channel default terminal emulator" + depends on XTERM_CHAN + default "xterm" + help + This option allows changing the default terminal emulator. + +config NOCONFIG_CHAN + bool + default !(XTERM_CHAN && TTY_CHAN && PTY_CHAN && PORT_CHAN && NULL_CHAN) + +config CON_ZERO_CHAN + string "Default main console channel initialization" + default "fd:0,fd:1" + help + This is the string describing the channel to which the main console + will be attached by default. This value can be overridden from the + command line. The default value is "fd:0,fd:1", which attaches the + main console to stdin and stdout. + It is safe to leave this unchanged. + +config CON_CHAN + string "Default console channel initialization" + default "xterm" + help + This is the string describing the channel to which all consoles + except the main console will be attached by default. This value can + be overridden from the command line. The default value is "xterm", + which brings them up in xterms. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have X or xterm available. + +config SSL_CHAN + string "Default serial line channel initialization" + default "pty" + help + This is the string describing the channel to which the serial lines + will be attached by default. This value can be overridden from the + command line. The default value is "pty", which attaches them to + traditional pseudo-terminals. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have a set of /dev/pty* devices. + +config UML_SOUND + tristate "Sound support" + depends on SOUND + select SOUND_OSS_CORE + help + This option enables UML sound support. If enabled, it will pull in + the UML hostaudio relay, which acts as a intermediary + between the host's dsp and mixer devices and the UML sound system. + It is safe to say 'Y' here. + +endmenu + +menu "UML Network Devices" + depends on NET + +# UML virtual driver +config UML_NET + bool "Virtual network device" + help + While the User-Mode port cannot directly talk to any physical + hardware devices, this choice and the following transport options + provide one or more virtual network devices through which the UML + kernels can talk to each other, the host, and with the host's help, + machines on the outside world. + + For more information, including explanations of the networking and + sample configurations, see + <http://user-mode-linux.sourceforge.net/old/networking.html>. + + If you'd like to be able to enable networking in the User-Mode + linux environment, say Y; otherwise say N. Note that you must + enable at least one of the following transport options to actually + make use of UML networking. + +config UML_NET_ETHERTAP + bool "Ethertap transport (obsolete)" + depends on UML_NET + help + The Ethertap User-Mode Linux network transport allows a single + running UML to exchange packets with its host over one of the + host's Ethertap devices, such as /dev/tap0. Additional running + UMLs can use additional Ethertap devices, one per running UML. + While the UML believes it's on a (multi-device, broadcast) virtual + Ethernet network, it's in fact communicating over a point-to-point + link with the host. + + To use this, your host kernel must have support for Ethertap + devices. Also, if your host kernel is 2.4.x, it must have + CONFIG_NETLINK_DEV configured as Y or M. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Ethertap + networking. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_TUNTAP + bool "TUN/TAP transport (obsolete)" + depends on UML_NET + help + The UML TUN/TAP network transport allows a UML instance to exchange + packets with the host over a TUN/TAP device. This option will only + work with a 2.4 host, unless you've applied the TUN/TAP patch to + your 2.2 host kernel. + + To use this transport, your host kernel must have support for TUN/TAP + devices, either built-in or as a module. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_SLIP + bool "SLIP transport (obsolete)" + depends on UML_NET + help + The slip User-Mode Linux network transport allows a running UML to + network with its host over a point-to-point link. Unlike Ethertap, + which can carry any Ethernet frame (and hence even non-IP packets), + the slip transport can only carry IP packets. + + To use this, your host must support slip devices. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html>. + has examples of the UML command line to use to enable slip + networking, and details of a few quirks with it. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_DAEMON + bool "Daemon transport (obsolete)" + depends on UML_NET + help + This User-Mode Linux network transport allows one or more running + UMLs on a single host to communicate with each other, but not to + the host. + + To use this form of networking, you'll need to run the UML + networking daemon on the host. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Daemon + networking. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_DAEMON_DEFAULT_SOCK + string "Default socket for daemon transport" + default "/tmp/uml.ctl" + depends on UML_NET_DAEMON + help + This option allows setting the default socket for the daemon + transport, normally it defaults to /tmp/uml.ctl. + +config UML_NET_VECTOR + bool "Vector I/O high performance network devices" + depends on UML_NET + select MAY_HAVE_RUNTIME_DEPS + help + This User-Mode Linux network driver uses multi-message send + and receive functions. The host running the UML guest must have + a linux kernel version above 3.0 and a libc version > 2.13. + This driver provides tap, raw, gre and l2tpv3 network transports + with up to 4 times higher network throughput than the UML network + drivers. + +config UML_NET_VDE + bool "VDE transport (obsolete)" + depends on UML_NET + depends on !MODVERSIONS + select MAY_HAVE_RUNTIME_DEPS + help + This User-Mode Linux network transport allows one or more running + UMLs on a single host to communicate with each other and also + with the rest of the world using Virtual Distributed Ethernet, + an improved fork of uml_switch. + + You must have libvdeplug installed in order to build the vde + transport into UML. + + To use this form of networking, you will need to run vde_switch + on the host. + + For more information, see <http://wiki.virtualsquare.org/> + That site has a good overview of what VDE is and also examples + of the UML command line to use to enable VDE networking. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_MCAST + bool "Multicast transport (obsolete)" + depends on UML_NET + help + This Multicast User-Mode Linux network transport allows multiple + UMLs (even ones running on different host machines!) to talk to + each other over a virtual ethernet network. However, it requires + at least one UML with one of the other transports to act as a + bridge if any of them need to be able to talk to their hosts or any + other IP machines. + + To use this, your host kernel(s) must support IP Multicasting. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Multicast + networking, and notes about the security of this approach. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_PCAP + bool "pcap transport (obsolete)" + depends on UML_NET + depends on !MODVERSIONS + select MAY_HAVE_RUNTIME_DEPS + help + The pcap transport makes a pcap packet stream on the host look + like an ethernet device inside UML. This is useful for making + UML act as a network monitor for the host. You must have libcap + installed in order to build the pcap transport into UML. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable this option. + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + +config UML_NET_SLIRP + bool "SLiRP transport (obsolete)" + depends on UML_NET + help + The SLiRP User-Mode Linux network transport allows a running UML + to network by invoking a program that can handle SLIP encapsulated + packets. This is commonly (but not limited to) the application + known as SLiRP, a program that can re-socket IP packets back onto + he host on which it is run. Only IP packets are supported, + unlike other network transports that can handle all Ethernet + frames. In general, slirp allows the UML the same IP connectivity + to the outside world that the host user is permitted, and unlike + other transports, SLiRP works without the need of root level + privileges, setuid binaries, or SLIP devices on the host. This + also means not every type of connection is possible, but most + situations can be accommodated with carefully crafted slirp + commands that can be passed along as part of the network device's + setup string. The effect of this transport on the UML is similar + that of a host behind a firewall that masquerades all network + connections passing through it (but is less secure). + + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + + Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + +endmenu + +config VIRTIO_UML + bool "UML driver for virtio devices" + select VIRTIO + help + This driver provides support for virtio based paravirtual device + drivers over vhost-user sockets. + +config UML_RTC + bool "UML RTC driver" + depends on RTC_CLASS + # there's no use in this if PM_SLEEP isn't enabled ... + depends on PM_SLEEP + help + When PM_SLEEP is configured, it may be desirable to wake up using + rtcwake, especially in time-travel mode. This driver enables that + by providing a fake RTC clock that causes a wakeup at the right + time. + +config UML_PCI_OVER_VIRTIO + bool "Enable PCI over VIRTIO device simulation" + # in theory, just VIRTIO is enough, but that causes recursion + depends on VIRTIO_UML + select FORCE_PCI + select UML_IOMEM_EMULATION + select UML_DMA_EMULATION + select PCI_MSI + select PCI_LOCKLESS_CONFIG + +config UML_PCI_OVER_VIRTIO_DEVICE_ID + int "set the virtio device ID for PCI emulation" + default -1 + depends on UML_PCI_OVER_VIRTIO + help + There's no official device ID assigned (yet), set the one you + wish to use for experimentation here. The default of -1 is + not valid and will cause the driver to fail at probe. diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile new file mode 100644 index 0000000000..0e6af81096 --- /dev/null +++ b/arch/um/drivers/Makefile @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) +# + +# pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked +# in to pcap.o + +slip-objs := slip_kern.o slip_user.o +slirp-objs := slirp_kern.o slirp_user.o +daemon-objs := daemon_kern.o daemon_user.o +vector-objs := vector_kern.o vector_user.o vector_transports.o +umcast-objs := umcast_kern.o umcast_user.o +net-objs := net_kern.o net_user.o +mconsole-objs := mconsole_kern.o mconsole_user.o +hostaudio-objs := hostaudio_kern.o +ubd-objs := ubd_kern.o ubd_user.o +port-objs := port_kern.o port_user.o +harddog-objs := harddog_kern.o +harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o +rtc-objs := rtc_kern.o rtc_user.o + +LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) + +LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) + +targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o + +$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o + $(LD) -r -dp -o $@ $^ $(ld_flags) + +$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o + $(LD) -r -dp -o $@ $^ $(ld_flags) + +#XXX: The call below does not work because the flags are added before the +# object name, so nothing from the library gets linked. +#$(call if_changed,ld) + +# When the above is fixed, don't forget to add this too! +#targets += $(obj)/pcap.o + +obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o +obj-$(CONFIG_SSL) += ssl.o +obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o + +obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o +obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o +obj-$(CONFIG_UML_NET_DAEMON) += daemon.o +obj-$(CONFIG_UML_NET_VECTOR) += vector.o +obj-$(CONFIG_UML_NET_VDE) += vde.o +obj-$(CONFIG_UML_NET_MCAST) += umcast.o +obj-$(CONFIG_UML_NET_PCAP) += pcap.o +obj-$(CONFIG_UML_NET) += net.o +obj-$(CONFIG_MCONSOLE) += mconsole.o +obj-$(CONFIG_MMAPPER) += mmapper_kern.o +obj-$(CONFIG_BLK_DEV_UBD) += ubd.o +obj-$(CONFIG_UML_SOUND) += hostaudio.o +obj-$(CONFIG_NULL_CHAN) += null.o +obj-$(CONFIG_PORT_CHAN) += port.o +obj-$(CONFIG_PTY_CHAN) += pty.o +obj-$(CONFIG_TTY_CHAN) += tty.o +obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o +obj-$(CONFIG_UML_WATCHDOG) += harddog.o +obj-y += $(harddog-builtin-y) $(harddog-builtin-m) +obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o +obj-$(CONFIG_UML_RANDOM) += random.o +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o +obj-$(CONFIG_UML_RTC) += rtc.o +obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o + +# pcap_user.o must be added explicitly. +USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o +CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) + +CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"' + +include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h new file mode 100644 index 0000000000..3fec3b8406 --- /dev/null +++ b/arch/um/drivers/chan.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __CHAN_KERN_H__ +#define __CHAN_KERN_H__ + +#include <linux/tty.h> +#include <linux/list.h> +#include <linux/console.h> +#include "chan_user.h" +#include "line.h" + +struct chan { + struct list_head list; + struct list_head free_list; + struct line *line; + char *dev; + unsigned int primary:1; + unsigned int input:1; + unsigned int output:1; + unsigned int opened:1; + unsigned int enabled:1; + int fd; + const struct chan_ops *ops; + void *data; +}; + +extern void chan_interrupt(struct line *line, int irq); +extern int parse_chan_pair(char *str, struct line *line, int device, + const struct chan_opts *opts, char **error_out); +extern int write_chan(struct chan *chan, const char *buf, int len, + int write_irq); +extern int console_write_chan(struct chan *chan, const char *buf, + int len); +extern int console_open_chan(struct line *line, struct console *co); +extern void deactivate_chan(struct chan *chan, int irq); +extern void chan_enable_winch(struct chan *chan, struct tty_port *port); +extern int enable_chan(struct line *line); +extern void close_chan(struct line *line); +extern int chan_window_size(struct line *line, + unsigned short *rows_out, + unsigned short *cols_out); +extern int chan_config_string(struct line *line, char *str, int size, + char **error_out); + +#endif diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c new file mode 100644 index 0000000000..26a702a065 --- /dev/null +++ b/arch/um/drivers/chan_kern.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <linux/slab.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include "chan.h" +#include <os.h> +#include <irq_kern.h> + +#ifdef CONFIG_NOCONFIG_CHAN +static void *not_configged_init(char *str, int device, + const struct chan_opts *opts) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return NULL; +} + +static int not_configged_open(int input, int output, int primary, void *data, + char **dev_out) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -ENODEV; +} + +static void not_configged_close(int fd, void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); +} + +static int not_configged_read(int fd, char *c_out, void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -EIO; +} + +static int not_configged_write(int fd, const char *buf, int len, void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -EIO; +} + +static int not_configged_console_write(int fd, const char *buf, int len) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -EIO; +} + +static int not_configged_window_size(int fd, void *data, unsigned short *rows, + unsigned short *cols) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -ENODEV; +} + +static void not_configged_free(void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); +} + +static const struct chan_ops not_configged_ops = { + .init = not_configged_init, + .open = not_configged_open, + .close = not_configged_close, + .read = not_configged_read, + .write = not_configged_write, + .console_write = not_configged_console_write, + .window_size = not_configged_window_size, + .free = not_configged_free, + .winch = 0, +}; +#endif /* CONFIG_NOCONFIG_CHAN */ + +static int open_one_chan(struct chan *chan) +{ + int fd, err; + + if (chan->opened) + return 0; + + if (chan->ops->open == NULL) + fd = 0; + else fd = (*chan->ops->open)(chan->input, chan->output, chan->primary, + chan->data, &chan->dev); + if (fd < 0) + return fd; + + err = os_set_fd_block(fd, 0); + if (err) { + (*chan->ops->close)(fd, chan->data); + return err; + } + + chan->fd = fd; + + chan->opened = 1; + return 0; +} + +static int open_chan(struct list_head *chans) +{ + struct list_head *ele; + struct chan *chan; + int ret, err = 0; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + ret = open_one_chan(chan); + if (chan->primary) + err = ret; + } + return err; +} + +void chan_enable_winch(struct chan *chan, struct tty_port *port) +{ + if (chan && chan->primary && chan->ops->winch) + register_winch(chan->fd, port); +} + +static void line_timer_cb(struct work_struct *work) +{ + struct line *line = container_of(work, struct line, task.work); + + if (!line->throttled) + chan_interrupt(line, line->read_irq); +} + +int enable_chan(struct line *line) +{ + struct list_head *ele; + struct chan *chan; + int err; + + INIT_DELAYED_WORK(&line->task, line_timer_cb); + + list_for_each(ele, &line->chan_list) { + chan = list_entry(ele, struct chan, list); + err = open_one_chan(chan); + if (err) { + if (chan->primary) + goto out_close; + + continue; + } + + if (chan->enabled) + continue; + err = line_setup_irq(chan->fd, chan->input, chan->output, line, + chan); + if (err) + goto out_close; + + chan->enabled = 1; + } + + return 0; + + out_close: + close_chan(line); + return err; +} + +/* Items are added in IRQ context, when free_irq can't be called, and + * removed in process context, when it can. + * This handles interrupt sources which disappear, and which need to + * be permanently disabled. This is discovered in IRQ context, but + * the freeing of the IRQ must be done later. + */ +static DEFINE_SPINLOCK(irqs_to_free_lock); +static LIST_HEAD(irqs_to_free); + +void free_irqs(void) +{ + struct chan *chan; + LIST_HEAD(list); + struct list_head *ele; + unsigned long flags; + + spin_lock_irqsave(&irqs_to_free_lock, flags); + list_splice_init(&irqs_to_free, &list); + spin_unlock_irqrestore(&irqs_to_free_lock, flags); + + list_for_each(ele, &list) { + chan = list_entry(ele, struct chan, free_list); + + if (chan->input && chan->enabled) + um_free_irq(chan->line->read_irq, chan); + if (chan->output && chan->enabled) + um_free_irq(chan->line->write_irq, chan); + chan->enabled = 0; + } +} + +static void close_one_chan(struct chan *chan, int delay_free_irq) +{ + unsigned long flags; + + if (!chan->opened) + return; + + if (delay_free_irq) { + spin_lock_irqsave(&irqs_to_free_lock, flags); + list_add(&chan->free_list, &irqs_to_free); + spin_unlock_irqrestore(&irqs_to_free_lock, flags); + } else { + if (chan->input && chan->enabled) + um_free_irq(chan->line->read_irq, chan); + if (chan->output && chan->enabled) + um_free_irq(chan->line->write_irq, chan); + chan->enabled = 0; + } + if (chan->ops->close != NULL) + (*chan->ops->close)(chan->fd, chan->data); + + chan->opened = 0; + chan->fd = -1; +} + +void close_chan(struct line *line) +{ + struct chan *chan; + + /* Close in reverse order as open in case more than one of them + * refers to the same device and they save and restore that device's + * state. Then, the first one opened will have the original state, + * so it must be the last closed. + */ + list_for_each_entry_reverse(chan, &line->chan_list, list) { + close_one_chan(chan, 0); + } +} + +void deactivate_chan(struct chan *chan, int irq) +{ + if (chan && chan->enabled) + deactivate_fd(chan->fd, irq); +} + +int write_chan(struct chan *chan, const char *buf, int len, + int write_irq) +{ + int n, ret = 0; + + if (len == 0 || !chan || !chan->ops->write) + return 0; + + n = chan->ops->write(chan->fd, buf, len, chan->data); + if (chan->primary) { + ret = n; + } + return ret; +} + +int console_write_chan(struct chan *chan, const char *buf, int len) +{ + int n, ret = 0; + + if (!chan || !chan->ops->console_write) + return 0; + + n = chan->ops->console_write(chan->fd, buf, len); + if (chan->primary) + ret = n; + return ret; +} + +int console_open_chan(struct line *line, struct console *co) +{ + int err; + + err = open_chan(&line->chan_list); + if (err) + return err; + + printk(KERN_INFO "Console initialized on /dev/%s%d\n", co->name, + co->index); + return 0; +} + +int chan_window_size(struct line *line, unsigned short *rows_out, + unsigned short *cols_out) +{ + struct chan *chan; + + chan = line->chan_in; + if (chan && chan->primary) { + if (chan->ops->window_size == NULL) + return 0; + return chan->ops->window_size(chan->fd, chan->data, + rows_out, cols_out); + } + chan = line->chan_out; + if (chan && chan->primary) { + if (chan->ops->window_size == NULL) + return 0; + return chan->ops->window_size(chan->fd, chan->data, + rows_out, cols_out); + } + return 0; +} + +static void free_one_chan(struct chan *chan) +{ + list_del(&chan->list); + + close_one_chan(chan, 0); + + if (chan->ops->free != NULL) + (*chan->ops->free)(chan->data); + + if (chan->primary && chan->output) + ignore_sigio_fd(chan->fd); + kfree(chan); +} + +static void free_chan(struct list_head *chans) +{ + struct list_head *ele, *next; + struct chan *chan; + + list_for_each_safe(ele, next, chans) { + chan = list_entry(ele, struct chan, list); + free_one_chan(chan); + } +} + +static int one_chan_config_string(struct chan *chan, char *str, int size, + char **error_out) +{ + int n = 0; + + if (chan == NULL) { + CONFIG_CHUNK(str, size, n, "none", 1); + return n; + } + + CONFIG_CHUNK(str, size, n, chan->ops->type, 0); + + if (chan->dev == NULL) { + CONFIG_CHUNK(str, size, n, "", 1); + return n; + } + + CONFIG_CHUNK(str, size, n, ":", 0); + CONFIG_CHUNK(str, size, n, chan->dev, 0); + + return n; +} + +static int chan_pair_config_string(struct chan *in, struct chan *out, + char *str, int size, char **error_out) +{ + int n; + + n = one_chan_config_string(in, str, size, error_out); + str += n; + size -= n; + + if (in == out) { + CONFIG_CHUNK(str, size, n, "", 1); + return n; + } + + CONFIG_CHUNK(str, size, n, ",", 1); + n = one_chan_config_string(out, str, size, error_out); + str += n; + size -= n; + CONFIG_CHUNK(str, size, n, "", 1); + + return n; +} + +int chan_config_string(struct line *line, char *str, int size, + char **error_out) +{ + struct chan *in = line->chan_in, *out = line->chan_out; + + if (in && !in->primary) + in = NULL; + if (out && !out->primary) + out = NULL; + + return chan_pair_config_string(in, out, str, size, error_out); +} + +struct chan_type { + char *key; + const struct chan_ops *ops; +}; + +static const struct chan_type chan_table[] = { + { "fd", &fd_ops }, + +#ifdef CONFIG_NULL_CHAN + { "null", &null_ops }, +#else + { "null", ¬_configged_ops }, +#endif + +#ifdef CONFIG_PORT_CHAN + { "port", &port_ops }, +#else + { "port", ¬_configged_ops }, +#endif + +#ifdef CONFIG_PTY_CHAN + { "pty", &pty_ops }, + { "pts", &pts_ops }, +#else + { "pty", ¬_configged_ops }, + { "pts", ¬_configged_ops }, +#endif + +#ifdef CONFIG_TTY_CHAN + { "tty", &tty_ops }, +#else + { "tty", ¬_configged_ops }, +#endif + +#ifdef CONFIG_XTERM_CHAN + { "xterm", &xterm_ops }, +#else + { "xterm", ¬_configged_ops }, +#endif +}; + +static struct chan *parse_chan(struct line *line, char *str, int device, + const struct chan_opts *opts, char **error_out) +{ + const struct chan_type *entry; + const struct chan_ops *ops; + struct chan *chan; + void *data; + int i; + + ops = NULL; + data = NULL; + for(i = 0; i < ARRAY_SIZE(chan_table); i++) { + entry = &chan_table[i]; + if (!strncmp(str, entry->key, strlen(entry->key))) { + ops = entry->ops; + str += strlen(entry->key); + break; + } + } + if (ops == NULL) { + *error_out = "No match for configured backends"; + return NULL; + } + + data = (*ops->init)(str, device, opts); + if (data == NULL) { + *error_out = "Configuration failed"; + return NULL; + } + + chan = kmalloc(sizeof(*chan), GFP_ATOMIC); + if (chan == NULL) { + *error_out = "Memory allocation failed"; + return NULL; + } + *chan = ((struct chan) { .list = LIST_HEAD_INIT(chan->list), + .free_list = + LIST_HEAD_INIT(chan->free_list), + .line = line, + .primary = 1, + .input = 0, + .output = 0, + .opened = 0, + .enabled = 0, + .fd = -1, + .ops = ops, + .data = data }); + return chan; +} + +int parse_chan_pair(char *str, struct line *line, int device, + const struct chan_opts *opts, char **error_out) +{ + struct list_head *chans = &line->chan_list; + struct chan *new; + char *in, *out; + + if (!list_empty(chans)) { + line->chan_in = line->chan_out = NULL; + free_chan(chans); + INIT_LIST_HEAD(chans); + } + + if (!str) + return 0; + + out = strchr(str, ','); + if (out != NULL) { + in = str; + *out = '\0'; + out++; + new = parse_chan(line, in, device, opts, error_out); + if (new == NULL) + return -1; + + new->input = 1; + list_add(&new->list, chans); + line->chan_in = new; + + new = parse_chan(line, out, device, opts, error_out); + if (new == NULL) + return -1; + + list_add(&new->list, chans); + new->output = 1; + line->chan_out = new; + } + else { + new = parse_chan(line, str, device, opts, error_out); + if (new == NULL) + return -1; + + list_add(&new->list, chans); + new->input = 1; + new->output = 1; + line->chan_in = line->chan_out = new; + } + return 0; +} + +void chan_interrupt(struct line *line, int irq) +{ + struct tty_port *port = &line->port; + struct chan *chan = line->chan_in; + int err; + char c; + + if (!chan || !chan->ops->read) + goto out; + + do { + if (!tty_buffer_request_room(port, 1)) { + schedule_delayed_work(&line->task, 1); + goto out; + } + err = chan->ops->read(chan->fd, &c, chan->data); + if (err > 0) + tty_insert_flip_char(port, c, TTY_NORMAL); + } while (err > 0); + + if (err == -EIO) { + if (chan->primary) { + tty_port_tty_hangup(&line->port, false); + if (line->chan_out != chan) + close_one_chan(line->chan_out, 1); + } + close_one_chan(chan, 1); + if (chan->primary) + return; + } + out: + tty_flip_buffer_push(port); +} diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c new file mode 100644 index 0000000000..25727ed648 --- /dev/null +++ b/arch/um/drivers/chan_user.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <termios.h> +#include <sys/ioctl.h> +#include "chan_user.h" +#include <os.h> +#include <um_malloc.h> + +void generic_close(int fd, void *unused) +{ + close(fd); +} + +int generic_read(int fd, char *c_out, void *unused) +{ + int n; + + n = read(fd, c_out, sizeof(*c_out)); + if (n > 0) + return n; + else if (n == 0) + return -EIO; + else if (errno == EAGAIN) + return 0; + return -errno; +} + +/* XXX Trivial wrapper around write */ + +int generic_write(int fd, const char *buf, int n, void *unused) +{ + int err; + + err = write(fd, buf, n); + if (err > 0) + return err; + else if (errno == EAGAIN) + return 0; + else if (err == 0) + return -EIO; + return -errno; +} + +int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out) +{ + struct winsize size; + int ret; + + if (ioctl(fd, TIOCGWINSZ, &size) < 0) + return -errno; + + ret = ((*rows_out != size.ws_row) || (*cols_out != size.ws_col)); + + *rows_out = size.ws_row; + *cols_out = size.ws_col; + + return ret; +} + +void generic_free(void *data) +{ + kfree(data); +} + +int generic_console_write(int fd, const char *buf, int n) +{ + sigset_t old, no_sigio; + struct termios save, new; + int err; + + if (isatty(fd)) { + sigemptyset(&no_sigio); + sigaddset(&no_sigio, SIGIO); + if (sigprocmask(SIG_BLOCK, &no_sigio, &old)) + goto error; + + CATCH_EINTR(err = tcgetattr(fd, &save)); + if (err) + goto error; + new = save; + /* + * The terminal becomes a bit less raw, to handle \n also as + * "Carriage Return", not only as "New Line". Otherwise, the new + * line won't start at the first column. + */ + new.c_oflag |= OPOST; + CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &new)); + if (err) + goto error; + } + err = generic_write(fd, buf, n, NULL); + /* + * Restore raw mode, in any case; we *must* ignore any error apart + * EINTR, except for debug. + */ + if (isatty(fd)) { + CATCH_EINTR(tcsetattr(fd, TCSAFLUSH, &save)); + sigprocmask(SIG_SETMASK, &old, NULL); + } + + return err; +error: + return -errno; +} + +/* + * UML SIGWINCH handling + * + * The point of this is to handle SIGWINCH on consoles which have host + * ttys and relay them inside UML to whatever might be running on the + * console and cares about the window size (since SIGWINCH notifies + * about terminal size changes). + * + * So, we have a separate thread for each host tty attached to a UML + * device (side-issue - I'm annoyed that one thread can't have + * multiple controlling ttys for the purpose of handling SIGWINCH, but + * I imagine there are other reasons that doesn't make any sense). + * + * SIGWINCH can't be received synchronously, so you have to set up to + * receive it as a signal. That being the case, if you are going to + * wait for it, it is convenient to sit in sigsuspend() and wait for + * the signal to bounce you out of it (see below for how we make sure + * to exit only on SIGWINCH). + */ + +static void winch_handler(int sig) +{ +} + +struct winch_data { + int pty_fd; + int pipe_fd; +}; + +static int winch_thread(void *arg) +{ + struct winch_data *data = arg; + sigset_t sigs; + int pty_fd, pipe_fd; + int count; + char c = 1; + + pty_fd = data->pty_fd; + pipe_fd = data->pipe_fd; + count = write(pipe_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "winch_thread : failed to write " + "synchronization byte, err = %d\n", -count); + + /* + * We are not using SIG_IGN on purpose, so don't fix it as I thought to + * do! If using SIG_IGN, the sigsuspend() call below would not stop on + * SIGWINCH. + */ + + signal(SIGWINCH, winch_handler); + sigfillset(&sigs); + /* Block all signals possible. */ + if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) { + printk(UM_KERN_ERR "winch_thread : sigprocmask failed, " + "errno = %d\n", errno); + exit(1); + } + /* In sigsuspend(), block anything else than SIGWINCH. */ + sigdelset(&sigs, SIGWINCH); + + if (setsid() < 0) { + printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n", + errno); + exit(1); + } + + if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) { + printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on " + "fd %d err = %d\n", pty_fd, errno); + exit(1); + } + + if (tcsetpgrp(pty_fd, os_getpid()) < 0) { + printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on " + "fd %d err = %d\n", pty_fd, errno); + exit(1); + } + + /* + * These are synchronization calls between various UML threads on the + * host - since they are not different kernel threads, we cannot use + * kernel semaphores. We don't use SysV semaphores because they are + * persistent. + */ + count = read(pipe_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "winch_thread : failed to read " + "synchronization byte, err = %d\n", errno); + + while(1) { + /* + * This will be interrupted by SIGWINCH only, since + * other signals are blocked. + */ + sigsuspend(&sigs); + + count = write(pipe_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "winch_thread : write failed, " + "err = %d\n", errno); + } +} + +static int winch_tramp(int fd, struct tty_port *port, int *fd_out, + unsigned long *stack_out) +{ + struct winch_data data; + int fds[2], n, err, pid; + char c; + + err = os_pipe(fds, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "winch_tramp : os_pipe failed, err = %d\n", + -err); + goto out; + } + + data = ((struct winch_data) { .pty_fd = fd, + .pipe_fd = fds[1] } ); + /* + * CLONE_FILES so this thread doesn't hold open files which are open + * now, but later closed in a different thread. This is a + * problem with /dev/net/tun, which if held open by this + * thread, prevents the TUN/TAP device from being reused. + */ + pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (pid < 0) { + err = pid; + printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", + -err); + goto out_close; + } + + *fd_out = fds[0]; + n = read(fds[0], &c, sizeof(c)); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "winch_tramp : failed to read " + "synchronization byte\n"); + printk(UM_KERN_ERR "read failed, err = %d\n", errno); + printk(UM_KERN_ERR "fd %d will not support SIGWINCH\n", fd); + err = -EINVAL; + goto out_close; + } + + err = os_set_fd_block(*fd_out, 0); + if (err) { + printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd " + "non-blocking.\n"); + goto out_close; + } + + return pid; + + out_close: + close(fds[1]); + close(fds[0]); + out: + return err; +} + +void register_winch(int fd, struct tty_port *port) +{ + unsigned long stack; + int pid, thread, count, thread_fd = -1; + char c = 1; + + if (!isatty(fd)) + return; + + pid = tcgetpgrp(fd); + if (is_skas_winch(pid, fd, port)) { + register_winch_irq(-1, fd, -1, port, 0); + return; + } + + if (pid == -1) { + thread = winch_tramp(fd, port, &thread_fd, &stack); + if (thread < 0) + return; + + register_winch_irq(thread_fd, fd, thread, port, stack); + + count = write(thread_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "register_winch : failed to write " + "synchronization byte, err = %d\n", errno); + } +} diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h new file mode 100644 index 0000000000..4e51b85e2a --- /dev/null +++ b/arch/um/drivers/chan_user.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __CHAN_USER_H__ +#define __CHAN_USER_H__ + +#include <init.h> + +struct chan_opts { + void (*const announce)(char *dev_name, int dev); + char *xterm_title; + int raw; +}; + +struct chan_ops { + char *type; + void *(*init)(char *, int, const struct chan_opts *); + int (*open)(int, int, int, void *, char **); + void (*close)(int, void *); + int (*read)(int, char *, void *); + int (*write)(int, const char *, int, void *); + int (*console_write)(int, const char *, int); + int (*window_size)(int, void *, unsigned short *, unsigned short *); + void (*free)(void *); + int winch; +}; + +extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops, + tty_ops, xterm_ops; + +extern void generic_close(int fd, void *unused); +extern int generic_read(int fd, char *c_out, void *unused); +extern int generic_write(int fd, const char *buf, int n, void *unused); +extern int generic_console_write(int fd, const char *buf, int n); +extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out); +extern void generic_free(void *data); + +struct tty_port; +extern void register_winch(int fd, struct tty_port *port); +extern void register_winch_irq(int fd, int tty_fd, int pid, + struct tty_port *port, unsigned long stack); + +#define __channel_help(fn, prefix) \ +__uml_help(fn, prefix "[0-9]*=<channel description>\n" \ +" Attach a console or serial line to a host channel. See\n" \ +" http://user-mode-linux.sourceforge.net/old/input.html for a complete\n" \ +" description of this switch.\n\n" \ +); + +#endif diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h new file mode 100644 index 0000000000..9a67c01700 --- /dev/null +++ b/arch/um/drivers/cow.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __COW_H__ +#define __COW_H__ + +#include <asm/types.h> + +extern int init_cow_file(int fd, char *cow_file, char *backing_file, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out); + +extern int file_reader(__u64 offset, char *buf, int len, void *arg); +extern int read_cow_header(int (*reader)(__u64, char *, int, void *), + void *arg, __u32 *version_out, + char **backing_file_out, long long *mtime_out, + unsigned long long *size_out, int *sectorsize_out, + __u32 *align_out, int *bitmap_offset_out); + +extern int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, + unsigned long long *size); + +extern void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out); + +#endif diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h new file mode 100644 index 0000000000..916811ef53 --- /dev/null +++ b/arch/um/drivers/cow_sys.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __COW_SYS_H__ +#define __COW_SYS_H__ + +#include <kern_util.h> +#include <os.h> +#include <um_malloc.h> + +static inline void *cow_malloc(int size) +{ + return uml_kmalloc(size, UM_GFP_KERNEL); +} + +static inline void cow_free(void *ptr) +{ + kfree(ptr); +} + +#define cow_printf printk + +static inline char *cow_strdup(char *str) +{ + return uml_strdup(str); +} + +static inline int cow_seek_file(int fd, __u64 offset) +{ + return os_seek_file(fd, offset); +} + +static inline int cow_file_size(char *file, unsigned long long *size_out) +{ + return os_file_size(file, size_out); +} + +static inline int cow_write_file(int fd, void *buf, int size) +{ + return os_write_file(fd, buf, size); +} + +#endif diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c new file mode 100644 index 0000000000..29b46581dd --- /dev/null +++ b/arch/um/drivers/cow_user.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +/* + * _XOPEN_SOURCE is needed for pread, but we define _GNU_SOURCE, which defines + * that. + */ +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <arpa/inet.h> +#include <endian.h> +#include "cow.h" +#include "cow_sys.h" + +#define PATH_LEN_V1 256 + +/* unsigned time_t works until year 2106 */ +typedef __u32 time32_t; + +struct cow_header_v1 { + __s32 magic; + __s32 version; + char backing_file[PATH_LEN_V1]; + time32_t mtime; + __u64 size; + __s32 sectorsize; +} __attribute__((packed)); + +/* + * Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in + * case other systems have different values for MAXPATHLEN. + * + * The same must hold for V2 - we want file format compatibility, not anything + * else. + */ +#define PATH_LEN_V3 4096 +#define PATH_LEN_V2 PATH_LEN_V3 + +struct cow_header_v2 { + __u32 magic; + __u32 version; + char backing_file[PATH_LEN_V2]; + time32_t mtime; + __u64 size; + __s32 sectorsize; +} __attribute__((packed)); + +/* + * Changes from V2 - + * PATH_LEN_V3 as described above + * Explicitly specify field bit lengths for systems with different + * lengths for the usual C types. Not sure whether char or + * time_t should be changed, this can be changed later without + * breaking compatibility + * Add alignment field so that different alignments can be used for the + * bitmap and data + * Add cow_format field to allow for the possibility of different ways + * of specifying the COW blocks. For now, the only value is 0, + * for the traditional COW bitmap. + * Move the backing_file field to the end of the header. This allows + * for the possibility of expanding it into the padding required + * by the bitmap alignment. + * The bitmap and data portions of the file will be aligned as specified + * by the alignment field. This is to allow COW files to be + * put on devices with restrictions on access alignments, such as + * /dev/raw, with a 512 byte alignment restriction. This also + * allows the data to be more aligned more strictly than on + * sector boundaries. This is needed for ubd-mmap, which needs + * the data to be page aligned. + * Fixed (finally!) the rounding bug + */ + +/* + * Until Dec2005, __attribute__((packed)) was left out from the below + * definition, leading on 64-bit systems to 4 bytes of padding after mtime, to + * align size to 8-byte alignment. This shifted all fields above (no padding + * was present on 32-bit, no other padding was added). + * + * However, this _can be detected_: it means that cow_format (always 0 until + * now) is shifted onto the first 4 bytes of backing_file, where it is otherwise + * impossible to find 4 zeros. -bb */ + +struct cow_header_v3 { + __u32 magic; + __u32 version; + __u32 mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +} __attribute__((packed)); + +/* This is the broken layout used by some 64-bit binaries. */ +struct cow_header_v3_broken { + __u32 magic; + __u32 version; + __s64 mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +}; + +/* COW format definitions - for now, we have only the usual COW bitmap */ +#define COW_BITMAP 0 + +union cow_header { + struct cow_header_v1 v1; + struct cow_header_v2 v2; + struct cow_header_v3 v3; + struct cow_header_v3_broken v3_b; +}; + +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 3 + +#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) +#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) + +void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out) +{ + if (version < 3) { + *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = (*data_offset_out + sectorsize - 1) / + sectorsize; + *data_offset_out *= sectorsize; + } + else { + *bitmap_len_out = DIV_ROUND(size, sectorsize); + *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = ROUND_UP(*data_offset_out, align); + } +} + +static int absolutize(char *to, int size, char *from) +{ + char save_cwd[256], *slash; + int remaining; + + if (getcwd(save_cwd, sizeof(save_cwd)) == NULL) { + cow_printf("absolutize : unable to get cwd - errno = %d\n", + errno); + return -1; + } + slash = strrchr(from, '/'); + if (slash != NULL) { + *slash = '\0'; + if (chdir(from)) { + *slash = '/'; + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", from, errno); + return -1; + } + *slash = '/'; + if (getcwd(to, size) == NULL) { + cow_printf("absolutize : unable to get cwd of '%s' - " + "errno = %d\n", from, errno); + return -1; + } + remaining = size - strlen(to); + if (strlen(slash) + 1 > remaining) { + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return -1; + } + strcat(to, slash); + } + else { + if (strlen(save_cwd) + 1 + strlen(from) + 1 > size) { + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return -1; + } + strcpy(to, save_cwd); + strcat(to, "/"); + strcat(to, from); + } + if (chdir(save_cwd)) { + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", save_cwd, errno); + return -1; + } + return 0; +} + +int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, unsigned long long *size) +{ + struct cow_header_v3 *header; + long long modtime; + int err; + + err = cow_seek_file(fd, 0); + if (err < 0) { + cow_printf("write_cow_header - lseek failed, err = %d\n", -err); + goto out; + } + + err = -ENOMEM; + header = cow_malloc(sizeof(*header)); + if (header == NULL) { + cow_printf("write_cow_header - failed to allocate COW V3 " + "header\n"); + goto out; + } + header->magic = htobe32(COW_MAGIC); + header->version = htobe32(COW_VERSION); + + err = -EINVAL; + if (strlen(backing_file) > sizeof(header->backing_file) - 1) { + /* Below, %zd is for a size_t value */ + cow_printf("Backing file name \"%s\" is too long - names are " + "limited to %zd characters\n", backing_file, + sizeof(header->backing_file) - 1); + goto out_free; + } + + if (absolutize(header->backing_file, sizeof(header->backing_file), + backing_file)) + goto out_free; + + err = os_file_modtime(header->backing_file, &modtime); + if (err < 0) { + cow_printf("write_cow_header - backing file '%s' mtime " + "request failed, err = %d\n", header->backing_file, + -err); + goto out_free; + } + + err = cow_file_size(header->backing_file, size); + if (err < 0) { + cow_printf("write_cow_header - couldn't get size of " + "backing file '%s', err = %d\n", + header->backing_file, -err); + goto out_free; + } + + header->mtime = htobe32(modtime); + header->size = htobe64(*size); + header->sectorsize = htobe32(sectorsize); + header->alignment = htobe32(alignment); + header->cow_format = COW_BITMAP; + + err = cow_write_file(fd, header, sizeof(*header)); + if (err != sizeof(*header)) { + cow_printf("write_cow_header - write of header to " + "new COW file '%s' failed, err = %d\n", cow_file, + -err); + goto out_free; + } + err = 0; + out_free: + cow_free(header); + out: + return err; +} + +int file_reader(__u64 offset, char *buf, int len, void *arg) +{ + int fd = *((int *) arg); + + return pread(fd, buf, len, offset); +} + +/* XXX Need to sanity-check the values read from the header */ + +int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, + __u32 *version_out, char **backing_file_out, + long long *mtime_out, unsigned long long *size_out, + int *sectorsize_out, __u32 *align_out, + int *bitmap_offset_out) +{ + union cow_header *header; + char *file; + int err, n; + unsigned long version, magic; + + header = cow_malloc(sizeof(*header)); + if (header == NULL) { + cow_printf("read_cow_header - Failed to allocate header\n"); + return -ENOMEM; + } + err = -EINVAL; + n = (*reader)(0, (char *) header, sizeof(*header), arg); + if (n < offsetof(typeof(header->v1), backing_file)) { + cow_printf("read_cow_header - short header\n"); + goto out; + } + + magic = header->v1.magic; + if (magic == COW_MAGIC) + version = header->v1.version; + else if (magic == be32toh(COW_MAGIC)) + version = be32toh(header->v1.version); + /* No error printed because the non-COW case comes through here */ + else goto out; + + *version_out = version; + + if (version == 1) { + if (n < sizeof(header->v1)) { + cow_printf("read_cow_header - failed to read V1 " + "header\n"); + goto out; + } + *mtime_out = header->v1.mtime; + *size_out = header->v1.size; + *sectorsize_out = header->v1.sectorsize; + *bitmap_offset_out = sizeof(header->v1); + *align_out = *sectorsize_out; + file = header->v1.backing_file; + } + else if (version == 2) { + if (n < sizeof(header->v2)) { + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = be32toh(header->v2.mtime); + *size_out = be64toh(header->v2.size); + *sectorsize_out = be32toh(header->v2.sectorsize); + *bitmap_offset_out = sizeof(header->v2); + *align_out = *sectorsize_out; + file = header->v2.backing_file; + } + /* This is very subtle - see above at union cow_header definition */ + else if (version == 3 && (*((int*)header->v3.backing_file) != 0)) { + if (n < sizeof(header->v3)) { + cow_printf("read_cow_header - failed to read V3 " + "header\n"); + goto out; + } + *mtime_out = be32toh(header->v3.mtime); + *size_out = be64toh(header->v3.size); + *sectorsize_out = be32toh(header->v3.sectorsize); + *align_out = be32toh(header->v3.alignment); + if (*align_out == 0) { + cow_printf("read_cow_header - invalid COW header, " + "align == 0\n"); + } + *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); + file = header->v3.backing_file; + } + else if (version == 3) { + cow_printf("read_cow_header - broken V3 file with" + " 64-bit layout - recovering content.\n"); + + if (n < sizeof(header->v3_b)) { + cow_printf("read_cow_header - failed to read V3 " + "header\n"); + goto out; + } + + /* + * this was used until Dec2005 - 64bits are needed to represent + * 2106+. I.e. we can safely do this truncating cast. + * + * Additionally, we must use be32toh() instead of be64toh(), since + * the program used to use the former (tested - I got mtime + * mismatch "0 vs whatever"). + * + * Ever heard about bug-to-bug-compatibility ? ;-) */ + *mtime_out = (time32_t) be32toh(header->v3_b.mtime); + + *size_out = be64toh(header->v3_b.size); + *sectorsize_out = be32toh(header->v3_b.sectorsize); + *align_out = be32toh(header->v3_b.alignment); + if (*align_out == 0) { + cow_printf("read_cow_header - invalid COW header, " + "align == 0\n"); + } + *bitmap_offset_out = ROUND_UP(sizeof(header->v3_b), *align_out); + file = header->v3_b.backing_file; + } + else { + cow_printf("read_cow_header - invalid COW version\n"); + goto out; + } + err = -ENOMEM; + *backing_file_out = cow_strdup(file); + if (*backing_file_out == NULL) { + cow_printf("read_cow_header - failed to allocate backing " + "file\n"); + goto out; + } + err = 0; + out: + cow_free(header); + return err; +} + +int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, + int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + unsigned long long size, offset; + char zero = 0; + int err; + + err = write_cow_header(cow_file, fd, backing_file, sectorsize, + alignment, &size); + if (err) + goto out; + + *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); + cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + offset = *data_offset_out + size - sizeof(zero); + err = cow_seek_file(fd, offset); + if (err < 0) { + cow_printf("cow bitmap lseek failed : err = %d\n", -err); + goto out; + } + + /* + * does not really matter how much we write it is just to set EOF + * this also sets the entire COW bitmap + * to zero without having to allocate it + */ + err = cow_write_file(fd, &zero, sizeof(zero)); + if (err != sizeof(zero)) { + cow_printf("Write of bitmap to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + if (err >= 0) + err = -EINVAL; + goto out; + } + + return 0; + out: + return err; +} diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h new file mode 100644 index 0000000000..1509cc7eb9 --- /dev/null +++ b/arch/um/drivers/daemon.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __DAEMON_H__ +#define __DAEMON_H__ + +#include <net_user.h> + +#define SWITCH_VERSION 3 + +struct daemon_data { + char *sock_type; + char *ctl_sock; + void *ctl_addr; + void *data_addr; + void *local_addr; + int fd; + int control; + void *dev; +}; + +extern const struct net_user_info daemon_user_info; + +extern int daemon_user_write(int fd, void *buf, int len, + struct daemon_data *pri); + +#endif diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c new file mode 100644 index 0000000000..afde1e82c0 --- /dev/null +++ b/arch/um/drivers/daemon_kern.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by various other people who didn't put their name here. + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include <net_kern.h> +#include "daemon.h" + +struct daemon_init { + char *sock_type; + char *ctl_sock; +}; + +static void daemon_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct daemon_data *dpri; + struct daemon_init *init = data; + + pri = netdev_priv(dev); + dpri = (struct daemon_data *) pri->user; + dpri->sock_type = init->sock_type; + dpri->ctl_sock = init->ctl_sock; + dpri->fd = -1; + dpri->control = -1; + dpri->dev = dev; + /* We will free this pointer. If it contains crap we're burned. */ + dpri->ctl_addr = NULL; + dpri->data_addr = NULL; + dpri->local_addr = NULL; + + printk("daemon backend (uml_switch version %d) - %s:%s", + SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); + printk("\n"); +} + +static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return daemon_user_write(fd, skb->data, skb->len, + (struct daemon_data *) &lp->user); +} + +static const struct net_kern_info daemon_kern_info = { + .init = daemon_init, + .protocol = eth_protocol, + .read = daemon_read, + .write = daemon_write, +}; + +static int daemon_setup(char *str, char **mac_out, void *data) +{ + struct daemon_init *init = data; + char *remain; + + *init = ((struct daemon_init) + { .sock_type = "unix", + .ctl_sock = CONFIG_UML_NET_DAEMON_DEFAULT_SOCK }); + + remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, + NULL); + if (remain != NULL) + printk(KERN_WARNING "daemon_setup : Ignoring data socket " + "specification\n"); + + return 1; +} + +static struct transport daemon_transport = { + .list = LIST_HEAD_INIT(daemon_transport.list), + .name = "daemon", + .setup = daemon_setup, + .user = &daemon_user_info, + .kern = &daemon_kern_info, + .private_size = sizeof(struct daemon_data), + .setup_size = sizeof(struct daemon_init), +}; + +static int register_daemon(void) +{ + register_transport(&daemon_transport); + return 0; +} + +late_initcall(register_daemon); diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c new file mode 100644 index 0000000000..785baedc35 --- /dev/null +++ b/arch/um/drivers/daemon_user.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + */ + +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/un.h> +#include "daemon.h" +#include <net_user.h> +#include <os.h> +#include <um_malloc.h> + +enum request_type { REQ_NEW_CONTROL }; + +#define SWITCH_MAGIC 0xfeedface + +struct request_v3 { + uint32_t magic; + uint32_t version; + enum request_type type; + struct sockaddr_un sock; +}; + +static struct sockaddr_un *new_addr(void *name, int len) +{ + struct sockaddr_un *sun; + + sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (sun == NULL) { + printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " + "failed\n"); + return NULL; + } + sun->sun_family = AF_UNIX; + memcpy(sun->sun_path, name, len); + return sun; +} + +static int connect_to_switch(struct daemon_data *pri) +{ + struct sockaddr_un *ctl_addr = pri->ctl_addr; + struct sockaddr_un *local_addr = pri->local_addr; + struct sockaddr_un *sun; + struct request_v3 req; + int fd, n, err; + + pri->control = socket(AF_UNIX, SOCK_STREAM, 0); + if (pri->control < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : control socket failed, " + "errno = %d\n", -err); + return err; + } + + if (connect(pri->control, (struct sockaddr *) ctl_addr, + sizeof(*ctl_addr)) < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : control connect failed, " + "errno = %d\n", -err); + goto out; + } + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : data socket failed, " + "errno = %d\n", -err); + goto out; + } + if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : data bind failed, " + "errno = %d\n", -err); + goto out_close; + } + + sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (sun == NULL) { + printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " + "failed\n"); + err = -ENOMEM; + goto out_close; + } + + req.magic = SWITCH_MAGIC; + req.version = SWITCH_VERSION; + req.type = REQ_NEW_CONTROL; + req.sock = *local_addr; + n = write(pri->control, &req, sizeof(req)); + if (n != sizeof(req)) { + printk(UM_KERN_ERR "daemon_open : control setup request " + "failed, err = %d\n", -errno); + err = -ENOTCONN; + goto out_free; + } + + n = read(pri->control, sun, sizeof(*sun)); + if (n != sizeof(*sun)) { + printk(UM_KERN_ERR "daemon_open : read of data socket failed, " + "err = %d\n", -errno); + err = -ENOTCONN; + goto out_free; + } + + pri->data_addr = sun; + return fd; + + out_free: + kfree(sun); + out_close: + close(fd); + out: + close(pri->control); + return err; +} + +static int daemon_user_init(void *data, void *dev) +{ + struct daemon_data *pri = data; + struct timeval tv; + struct { + char zero; + int pid; + int usecs; + } name; + + if (!strcmp(pri->sock_type, "unix")) + pri->ctl_addr = new_addr(pri->ctl_sock, + strlen(pri->ctl_sock) + 1); + name.zero = 0; + name.pid = os_getpid(); + gettimeofday(&tv, NULL); + name.usecs = tv.tv_usec; + pri->local_addr = new_addr(&name, sizeof(name)); + pri->dev = dev; + pri->fd = connect_to_switch(pri); + if (pri->fd < 0) { + kfree(pri->local_addr); + pri->local_addr = NULL; + return pri->fd; + } + + return 0; +} + +static int daemon_open(void *data) +{ + struct daemon_data *pri = data; + return pri->fd; +} + +static void daemon_remove(void *data) +{ + struct daemon_data *pri = data; + + close(pri->fd); + pri->fd = -1; + close(pri->control); + pri->control = -1; + + kfree(pri->data_addr); + pri->data_addr = NULL; + kfree(pri->ctl_addr); + pri->ctl_addr = NULL; + kfree(pri->local_addr); + pri->local_addr = NULL; +} + +int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) +{ + struct sockaddr_un *data_addr = pri->data_addr; + + return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); +} + +const struct net_user_info daemon_user_info = { + .init = daemon_user_init, + .open = daemon_open, + .close = NULL, + .remove = daemon_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c new file mode 100644 index 0000000000..082d739dc0 --- /dev/null +++ b/arch/um/drivers/fd.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <termios.h> +#include "chan_user.h" +#include <os.h> +#include <um_malloc.h> + +struct fd_chan { + int fd; + int raw; + struct termios tt; + char str[sizeof("1234567890\0")]; +}; + +static void *fd_init(char *str, int device, const struct chan_opts *opts) +{ + struct fd_chan *data; + char *end; + int n; + + if (*str != ':') { + printk(UM_KERN_ERR "fd_init : channel type 'fd' must specify a " + "file descriptor\n"); + return NULL; + } + str++; + n = strtoul(str, &end, 0); + if ((*end != '\0') || (end == str)) { + printk(UM_KERN_ERR "fd_init : couldn't parse file descriptor " + "'%s'\n", str); + return NULL; + } + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + + *data = ((struct fd_chan) { .fd = n, + .raw = opts->raw }); + return data; +} + +static int fd_open(int input, int output, int primary, void *d, char **dev_out) +{ + struct fd_chan *data = d; + int err; + + if (data->raw && isatty(data->fd)) { + CATCH_EINTR(err = tcgetattr(data->fd, &data->tt)); + if (err) + return err; + + err = raw(data->fd); + if (err) + return err; + } + sprintf(data->str, "%d", data->fd); + *dev_out = data->str; + return data->fd; +} + +static void fd_close(int fd, void *d) +{ + struct fd_chan *data = d; + int err; + + if (!data->raw || !isatty(fd)) + return; + + CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &data->tt)); + if (err) + printk(UM_KERN_ERR "Failed to restore terminal state - " + "errno = %d\n", -err); + data->raw = 0; +} + +const struct chan_ops fd_ops = { + .type = "fd", + .init = fd_init, + .open = fd_open, + .close = fd_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 1, +}; diff --git a/arch/um/drivers/harddog.h b/arch/um/drivers/harddog.h new file mode 100644 index 0000000000..6d9ea60e71 --- /dev/null +++ b/arch/um/drivers/harddog.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UM_WATCHDOG_H +#define UM_WATCHDOG_H + +int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); +void stop_watchdog(int in_fd, int out_fd); +int ping_watchdog(int fd); + +#endif /* UM_WATCHDOG_H */ diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c new file mode 100644 index 0000000000..60d1c6cab8 --- /dev/null +++ b/arch/um/drivers/harddog_kern.c @@ -0,0 +1,174 @@ +/* UML hardware watchdog, shamelessly stolen from: + * + * SoftDog 0.05: A Software Watchdog Device + * + * (c) Copyright 1996 Alan Cox <alan@redhat.com>, All Rights Reserved. + * http://www.redhat.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox nor CymruNet Ltd. admit liability nor provide + * warranty for any of this software. This material is provided + * "AS-IS" and at no charge. + * + * (c) Copyright 1995 Alan Cox <alan@lxorguk.ukuu.org.uk> + * + * Software only watchdog driver. Unlike its big brother the WDT501P + * driver this won't always recover a failed machine. + * + * 03/96: Angelo Haritsis <ah@doc.ic.ac.uk> : + * Modularised. + * Added soft_margin; use upon insmod to change the timer delay. + * NB: uses same minor as wdt (WATCHDOG_MINOR); we could use separate + * minors. + * + * 19980911 Alan Cox + * Made SMP safe for 2.3.x + * + * 20011127 Joel Becker (jlbec@evilplan.org> + * Added soft_noboot; Allows testing the softdog trigger without + * requiring a recompile. + * Added WDIOC_GETTIMEOUT and WDIOC_SETTIMOUT. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/watchdog.h> +#include <linux/reboot.h> +#include <linux/mutex.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include "mconsole.h" +#include "harddog.h" + +MODULE_LICENSE("GPL"); + +static DEFINE_MUTEX(harddog_mutex); +static DEFINE_SPINLOCK(lock); +static int timer_alive; +static int harddog_in_fd = -1; +static int harddog_out_fd = -1; + +/* + * Allow only one person to hold it open + */ + +static int harddog_open(struct inode *inode, struct file *file) +{ + int err = -EBUSY; + char *sock = NULL; + + mutex_lock(&harddog_mutex); + spin_lock(&lock); + if(timer_alive) + goto err; +#ifdef CONFIG_WATCHDOG_NOWAYOUT + __module_get(THIS_MODULE); +#endif + +#ifdef CONFIG_MCONSOLE + sock = mconsole_notify_socket(); +#endif + err = start_watchdog(&harddog_in_fd, &harddog_out_fd, sock); + if(err) + goto err; + + timer_alive = 1; + spin_unlock(&lock); + mutex_unlock(&harddog_mutex); + return stream_open(inode, file); +err: + spin_unlock(&lock); + mutex_unlock(&harddog_mutex); + return err; +} + +static int harddog_release(struct inode *inode, struct file *file) +{ + /* + * Shut off the timer. + */ + + spin_lock(&lock); + + stop_watchdog(harddog_in_fd, harddog_out_fd); + harddog_in_fd = -1; + harddog_out_fd = -1; + + timer_alive=0; + spin_unlock(&lock); + + return 0; +} + +static ssize_t harddog_write(struct file *file, const char __user *data, size_t len, + loff_t *ppos) +{ + /* + * Refresh the timer. + */ + if(len) + return ping_watchdog(harddog_out_fd); + return 0; +} + +static int harddog_ioctl_unlocked(struct file *file, + unsigned int cmd, unsigned long arg) +{ + void __user *argp= (void __user *)arg; + static struct watchdog_info ident = { + WDIOC_SETTIMEOUT, + 0, + "UML Hardware Watchdog" + }; + switch (cmd) { + default: + return -ENOTTY; + case WDIOC_GETSUPPORT: + if(copy_to_user(argp, &ident, sizeof(ident))) + return -EFAULT; + return 0; + case WDIOC_GETSTATUS: + case WDIOC_GETBOOTSTATUS: + return put_user(0,(int __user *)argp); + case WDIOC_KEEPALIVE: + return ping_watchdog(harddog_out_fd); + } +} + +static long harddog_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + long ret; + + mutex_lock(&harddog_mutex); + ret = harddog_ioctl_unlocked(file, cmd, arg); + mutex_unlock(&harddog_mutex); + + return ret; +} + +static const struct file_operations harddog_fops = { + .owner = THIS_MODULE, + .write = harddog_write, + .unlocked_ioctl = harddog_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .open = harddog_open, + .release = harddog_release, + .llseek = no_llseek, +}; + +static struct miscdevice harddog_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &harddog_fops, +}; +module_misc_device(harddog_miscdev); diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c new file mode 100644 index 0000000000..9ed8930497 --- /dev/null +++ b/arch/um/drivers/harddog_user.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <os.h> +#include "harddog.h" + +struct dog_data { + int stdin_fd; + int stdout_fd; + int close_me[2]; +}; + +static void pre_exec(void *d) +{ + struct dog_data *data = d; + + dup2(data->stdin_fd, 0); + dup2(data->stdout_fd, 1); + dup2(data->stdout_fd, 2); + close(data->stdin_fd); + close(data->stdout_fd); + close(data->close_me[0]); + close(data->close_me[1]); +} + +int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) +{ + struct dog_data data; + int in_fds[2], out_fds[2], pid, n, err; + char pid_buf[sizeof("nnnnnnn\0")], c; + char *pid_args[] = { "/usr/bin/uml_watchdog", "-pid", pid_buf, NULL }; + char *mconsole_args[] = { "/usr/bin/uml_watchdog", "-mconsole", NULL, + NULL }; + char **args = NULL; + + err = os_pipe(in_fds, 1, 0); + if (err < 0) { + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out; + } + + err = os_pipe(out_fds, 1, 0); + if (err < 0) { + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out_close_in; + } + + data.stdin_fd = out_fds[0]; + data.stdout_fd = in_fds[1]; + data.close_me[0] = out_fds[1]; + data.close_me[1] = in_fds[0]; + + if (sock != NULL) { + mconsole_args[2] = sock; + args = mconsole_args; + } + else { + /* XXX The os_getpid() is not SMP correct */ + sprintf(pid_buf, "%d", os_getpid()); + args = pid_args; + } + + pid = run_helper(pre_exec, &data, args); + + close(out_fds[0]); + close(in_fds[1]); + + if (pid < 0) { + err = -pid; + printk("harddog_open - run_helper failed, errno = %d\n", -err); + goto out_close_out; + } + + n = read(in_fds[0], &c, sizeof(c)); + if (n == 0) { + printk("harddog_open - EOF on watchdog pipe\n"); + helper_wait(pid); + err = -EIO; + goto out_close_out; + } + else if (n < 0) { + printk("harddog_open - read of watchdog pipe failed, " + "err = %d\n", errno); + helper_wait(pid); + err = n; + goto out_close_out; + } + *in_fd_ret = in_fds[0]; + *out_fd_ret = out_fds[1]; + return 0; + + out_close_in: + close(in_fds[0]); + close(in_fds[1]); + out_close_out: + close(out_fds[0]); + close(out_fds[1]); + out: + return err; +} + +void stop_watchdog(int in_fd, int out_fd) +{ + close(in_fd); + close(out_fd); +} + +int ping_watchdog(int fd) +{ + int n; + char c = '\n'; + + n = write(fd, &c, sizeof(c)); + if (n != sizeof(c)) { + printk("ping_watchdog - write failed, ret = %d, err = %d\n", + n, errno); + if (n < 0) + return n; + return -EIO; + } + return 1; + +} diff --git a/arch/um/drivers/harddog_user_exp.c b/arch/um/drivers/harddog_user_exp.c new file mode 100644 index 0000000000..c74d4b815d --- /dev/null +++ b/arch/um/drivers/harddog_user_exp.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include "harddog.h" + +#if IS_MODULE(CONFIG_UML_WATCHDOG) +EXPORT_SYMBOL(start_watchdog); +EXPORT_SYMBOL(stop_watchdog); +EXPORT_SYMBOL(ping_watchdog); +#endif diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c new file mode 100644 index 0000000000..c42b793bce --- /dev/null +++ b/arch/um/drivers/hostaudio_kern.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 Steve Schmidtke + */ + +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/sound.h> +#include <linux/soundcard.h> +#include <linux/mutex.h> +#include <linux/uaccess.h> +#include <init.h> +#include <os.h> + +struct hostaudio_state { + int fd; +}; + +struct hostmixer_state { + int fd; +}; + +#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" +#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" + +/* + * Changed either at boot time or module load time. At boot, this is + * single-threaded; at module load, multiple modules would each have + * their own copy of these variables. + */ +static char *dsp = HOSTAUDIO_DEV_DSP; +static char *mixer = HOSTAUDIO_DEV_MIXER; + +#define DSP_HELP \ +" This is used to specify the host dsp device to the hostaudio driver.\n" \ +" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" + +#define MIXER_HELP \ +" This is used to specify the host mixer device to the hostaudio driver.\n"\ +" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" + +module_param(dsp, charp, 0644); +MODULE_PARM_DESC(dsp, DSP_HELP); +module_param(mixer, charp, 0644); +MODULE_PARM_DESC(mixer, MIXER_HELP); + +#ifndef MODULE +static int set_dsp(char *name, int *add) +{ + dsp = name; + return 0; +} + +__uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); + +static int set_mixer(char *name, int *add) +{ + mixer = name; + return 0; +} + +__uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); +#endif + +static DEFINE_MUTEX(hostaudio_mutex); + +/* /dev/dsp file operations */ + +static ssize_t hostaudio_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: read called, count = %d\n", count); +#endif + + kbuf = kmalloc(count, GFP_KERNEL); + if (kbuf == NULL) + return -ENOMEM; + + err = os_read_file(state->fd, kbuf, count); + if (err < 0) + goto out; + + if (copy_to_user(buffer, kbuf, err)) + err = -EFAULT; + +out: + kfree(kbuf); + return err; +} + +static ssize_t hostaudio_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: write called, count = %d\n", count); +#endif + + kbuf = memdup_user(buffer, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + err = os_write_file(state->fd, kbuf, count); + if (err < 0) + goto out; + *ppos += err; + + out: + kfree(kbuf); + return err; +} + +static __poll_t hostaudio_poll(struct file *file, + struct poll_table_struct *wait) +{ +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: poll called (unimplemented)\n"); +#endif + + return 0; +} + +static long hostaudio_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hostaudio_state *state = file->private_data; + unsigned long data = 0; + int err; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: ioctl called, cmd = %u\n", cmd); +#endif + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if (get_user(data, (int __user *) arg)) + return -EFAULT; + break; + default: + break; + } + + err = os_ioctl_generic(state->fd, cmd, (unsigned long) &data); + + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if (put_user(data, (int __user *) arg)) + return -EFAULT; + break; + default: + break; + } + + return err; +} + +static int hostaudio_open(struct inode *inode, struct file *file) +{ + struct hostaudio_state *state; + int r = 0, w = 0; + int ret; + +#ifdef DEBUG + kernel_param_lock(THIS_MODULE); + printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp); + kernel_param_unlock(THIS_MODULE); +#endif + + state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); + if (state == NULL) + return -ENOMEM; + + if (file->f_mode & FMODE_READ) + r = 1; + if (file->f_mode & FMODE_WRITE) + w = 1; + + kernel_param_lock(THIS_MODULE); + mutex_lock(&hostaudio_mutex); + ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); + mutex_unlock(&hostaudio_mutex); + kernel_param_unlock(THIS_MODULE); + + if (ret < 0) { + kfree(state); + return ret; + } + state->fd = ret; + file->private_data = state; + return 0; +} + +static int hostaudio_release(struct inode *inode, struct file *file) +{ + struct hostaudio_state *state = file->private_data; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: release called\n"); +#endif + os_close_file(state->fd); + kfree(state); + + return 0; +} + +/* /dev/mixer file operations */ + +static long hostmixer_ioctl_mixdev(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hostmixer_state *state = file->private_data; + +#ifdef DEBUG + printk(KERN_DEBUG "hostmixer: ioctl called\n"); +#endif + + return os_ioctl_generic(state->fd, cmd, arg); +} + +static int hostmixer_open_mixdev(struct inode *inode, struct file *file) +{ + struct hostmixer_state *state; + int r = 0, w = 0; + int ret; + +#ifdef DEBUG + printk(KERN_DEBUG "hostmixer: open called (host: %s)\n", mixer); +#endif + + state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL); + if (state == NULL) + return -ENOMEM; + + if (file->f_mode & FMODE_READ) + r = 1; + if (file->f_mode & FMODE_WRITE) + w = 1; + + kernel_param_lock(THIS_MODULE); + mutex_lock(&hostaudio_mutex); + ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); + mutex_unlock(&hostaudio_mutex); + kernel_param_unlock(THIS_MODULE); + + if (ret < 0) { + kernel_param_lock(THIS_MODULE); + printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', " + "err = %d\n", dsp, -ret); + kernel_param_unlock(THIS_MODULE); + kfree(state); + return ret; + } + + file->private_data = state; + return 0; +} + +static int hostmixer_release(struct inode *inode, struct file *file) +{ + struct hostmixer_state *state = file->private_data; + +#ifdef DEBUG + printk(KERN_DEBUG "hostmixer: release called\n"); +#endif + + os_close_file(state->fd); + kfree(state); + + return 0; +} + +/* kernel module operations */ + +static const struct file_operations hostaudio_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = hostaudio_read, + .write = hostaudio_write, + .poll = hostaudio_poll, + .unlocked_ioctl = hostaudio_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .mmap = NULL, + .open = hostaudio_open, + .release = hostaudio_release, +}; + +static const struct file_operations hostmixer_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = hostmixer_ioctl_mixdev, + .open = hostmixer_open_mixdev, + .release = hostmixer_release, +}; + +static struct { + int dev_audio; + int dev_mixer; +} module_data; + +MODULE_AUTHOR("Steve Schmidtke"); +MODULE_DESCRIPTION("UML Audio Relay"); +MODULE_LICENSE("GPL"); + +static int __init hostaudio_init_module(void) +{ + kernel_param_lock(THIS_MODULE); + printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", + dsp, mixer); + kernel_param_unlock(THIS_MODULE); + + module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); + if (module_data.dev_audio < 0) { + printk(KERN_ERR "hostaudio: couldn't register DSP device!\n"); + return -ENODEV; + } + + module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1); + if (module_data.dev_mixer < 0) { + printk(KERN_ERR "hostmixer: couldn't register mixer " + "device!\n"); + unregister_sound_dsp(module_data.dev_audio); + return -ENODEV; + } + + return 0; +} + +static void __exit hostaudio_cleanup_module (void) +{ + unregister_sound_mixer(module_data.dev_mixer); + unregister_sound_dsp(module_data.dev_audio); +} + +module_init(hostaudio_init_module); +module_exit(hostaudio_cleanup_module); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c new file mode 100644 index 0000000000..b98545f3ed --- /dev/null +++ b/arch/um/drivers/line.c @@ -0,0 +1,764 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/irqreturn.h> +#include <linux/kd.h> +#include <linux/sched/signal.h> +#include <linux/slab.h> + +#include "chan.h" +#include <irq_kern.h> +#include <irq_user.h> +#include <kern_util.h> +#include <os.h> + +#define LINE_BUFSIZE 4096 + +static irqreturn_t line_interrupt(int irq, void *data) +{ + struct chan *chan = data; + struct line *line = chan->line; + + if (line) + chan_interrupt(line, irq); + + return IRQ_HANDLED; +} + +/* + * Returns the free space inside the ring buffer of this line. + * + * Should be called while holding line->lock (this does not modify data). + */ +static unsigned int write_room(struct line *line) +{ + int n; + + if (line->buffer == NULL) + return LINE_BUFSIZE - 1; + + /* This is for the case where the buffer is wrapped! */ + n = line->head - line->tail; + + if (n <= 0) + n += LINE_BUFSIZE; /* The other case */ + return n - 1; +} + +unsigned int line_write_room(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + unsigned int room; + + spin_lock_irqsave(&line->lock, flags); + room = write_room(line); + spin_unlock_irqrestore(&line->lock, flags); + + return room; +} + +unsigned int line_chars_in_buffer(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(&line->lock, flags); + /* write_room subtracts 1 for the needed NULL, so we readd it.*/ + ret = LINE_BUFSIZE - (write_room(line) + 1); + spin_unlock_irqrestore(&line->lock, flags); + + return ret; +} + +/* + * This copies the content of buf into the circular buffer associated with + * this line. + * The return value is the number of characters actually copied, i.e. the ones + * for which there was space: this function is not supposed to ever flush out + * the circular buffer. + * + * Must be called while holding line->lock! + */ +static int buffer_data(struct line *line, const char *buf, int len) +{ + int end, room; + + if (line->buffer == NULL) { + line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); + if (line->buffer == NULL) { + printk(KERN_ERR "buffer_data - atomic allocation " + "failed\n"); + return 0; + } + line->head = line->buffer; + line->tail = line->buffer; + } + + room = write_room(line); + len = (len > room) ? room : len; + + end = line->buffer + LINE_BUFSIZE - line->tail; + + if (len < end) { + memcpy(line->tail, buf, len); + line->tail += len; + } + else { + /* The circular buffer is wrapping */ + memcpy(line->tail, buf, end); + buf += end; + memcpy(line->buffer, buf, len - end); + line->tail = line->buffer + len - end; + } + + return len; +} + +/* + * Flushes the ring buffer to the output channels. That is, write_chan is + * called, passing it line->head as buffer, and an appropriate count. + * + * On exit, returns 1 when the buffer is empty, + * 0 when the buffer is not empty on exit, + * and -errno when an error occurred. + * + * Must be called while holding line->lock!*/ +static int flush_buffer(struct line *line) +{ + int n, count; + + if ((line->buffer == NULL) || (line->head == line->tail)) + return 1; + + if (line->tail < line->head) { + /* line->buffer + LINE_BUFSIZE is the end of the buffer! */ + count = line->buffer + LINE_BUFSIZE - line->head; + + n = write_chan(line->chan_out, line->head, count, + line->write_irq); + if (n < 0) + return n; + if (n == count) { + /* + * We have flushed from ->head to buffer end, now we + * must flush only from the beginning to ->tail. + */ + line->head = line->buffer; + } else { + line->head += n; + return 0; + } + } + + count = line->tail - line->head; + n = write_chan(line->chan_out, line->head, count, + line->write_irq); + + if (n < 0) + return n; + + line->head += n; + return line->head == line->tail; +} + +void line_flush_buffer(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + + spin_lock_irqsave(&line->lock, flags); + flush_buffer(line); + spin_unlock_irqrestore(&line->lock, flags); +} + +/* + * We map both ->flush_chars and ->put_char (which go in pair) onto + * ->flush_buffer and ->write. Hope it's not that bad. + */ +void line_flush_chars(struct tty_struct *tty) +{ + line_flush_buffer(tty); +} + +ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int n, ret = 0; + + spin_lock_irqsave(&line->lock, flags); + if (line->head != line->tail) + ret = buffer_data(line, buf, len); + else { + n = write_chan(line->chan_out, buf, len, + line->write_irq); + if (n < 0) { + ret = n; + goto out_up; + } + + len -= n; + ret += n; + if (len > 0) + ret += buffer_data(line, buf + n, len); + } +out_up: + spin_unlock_irqrestore(&line->lock, flags); + return ret; +} + +void line_throttle(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + + deactivate_chan(line->chan_in, line->read_irq); + line->throttled = 1; +} + +void line_unthrottle(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + + line->throttled = 0; + chan_interrupt(line, line->read_irq); +} + +static irqreturn_t line_write_interrupt(int irq, void *data) +{ + struct chan *chan = data; + struct line *line = chan->line; + int err; + + /* + * Interrupts are disabled here because genirq keep irqs disabled when + * calling the action handler. + */ + + spin_lock(&line->lock); + err = flush_buffer(line); + if (err == 0) { + spin_unlock(&line->lock); + return IRQ_NONE; + } else if ((err < 0) && (err != -EAGAIN)) { + line->head = line->buffer; + line->tail = line->buffer; + } + spin_unlock(&line->lock); + + tty_port_tty_wakeup(&line->port); + + return IRQ_HANDLED; +} + +int line_setup_irq(int fd, int input, int output, struct line *line, void *data) +{ + const struct line_driver *driver = line->driver; + int err; + + if (input) { + err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_READ, + line_interrupt, 0, + driver->read_irq_name, data); + if (err < 0) + return err; + + line->read_irq = err; + } + + if (output) { + err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_WRITE, + line_write_interrupt, 0, + driver->write_irq_name, data); + if (err < 0) + return err; + + line->write_irq = err; + } + + return 0; +} + +static int line_activate(struct tty_port *port, struct tty_struct *tty) +{ + int ret; + struct line *line = tty->driver_data; + + ret = enable_chan(line); + if (ret) + return ret; + + if (!line->sigio) { + chan_enable_winch(line->chan_out, port); + line->sigio = 1; + } + + chan_window_size(line, &tty->winsize.ws_row, + &tty->winsize.ws_col); + + return 0; +} + +static void unregister_winch(struct tty_struct *tty); + +static void line_destruct(struct tty_port *port) +{ + struct tty_struct *tty = tty_port_tty_get(port); + struct line *line = tty->driver_data; + + if (line->sigio) { + unregister_winch(tty); + line->sigio = 0; + } +} + +static const struct tty_port_operations line_port_ops = { + .activate = line_activate, + .destruct = line_destruct, +}; + +int line_open(struct tty_struct *tty, struct file *filp) +{ + struct line *line = tty->driver_data; + + return tty_port_open(&line->port, tty, filp); +} + +int line_install(struct tty_driver *driver, struct tty_struct *tty, + struct line *line) +{ + int ret; + + ret = tty_standard_install(driver, tty); + if (ret) + return ret; + + tty->driver_data = line; + + return 0; +} + +void line_close(struct tty_struct *tty, struct file * filp) +{ + struct line *line = tty->driver_data; + + tty_port_close(&line->port, tty, filp); +} + +void line_hangup(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + + tty_port_hangup(&line->port); +} + +void close_lines(struct line *lines, int nlines) +{ + int i; + + for(i = 0; i < nlines; i++) + close_chan(&lines[i]); +} + +int setup_one_line(struct line *lines, int n, char *init, + const struct chan_opts *opts, char **error_out) +{ + struct line *line = &lines[n]; + struct tty_driver *driver = line->driver->driver; + int err = -EINVAL; + + if (line->port.count) { + *error_out = "Device is already open"; + goto out; + } + + if (!strcmp(init, "none")) { + if (line->valid) { + line->valid = 0; + kfree(line->init_str); + tty_unregister_device(driver, n); + parse_chan_pair(NULL, line, n, opts, error_out); + err = 0; + } + } else { + char *new = kstrdup(init, GFP_KERNEL); + if (!new) { + *error_out = "Failed to allocate memory"; + return -ENOMEM; + } + if (line->valid) { + tty_unregister_device(driver, n); + kfree(line->init_str); + } + line->init_str = new; + line->valid = 1; + err = parse_chan_pair(new, line, n, opts, error_out); + if (!err) { + struct device *d = tty_port_register_device(&line->port, + driver, n, NULL); + if (IS_ERR(d)) { + *error_out = "Failed to register device"; + err = PTR_ERR(d); + parse_chan_pair(NULL, line, n, opts, error_out); + } + } + if (err) { + line->init_str = NULL; + line->valid = 0; + kfree(new); + } + } +out: + return err; +} + +/* + * Common setup code for both startup command line and mconsole initialization. + * @lines contains the array (of size @num) to modify; + * @init is the setup string; + * @error_out is an error string in the case of failure; + */ + +int line_setup(char **conf, unsigned int num, char **def, + char *init, char *name) +{ + char *error; + + if (*init == '=') { + /* + * We said con=/ssl= instead of con#=, so we are configuring all + * consoles at once. + */ + *def = init + 1; + } else { + char *end; + unsigned n = simple_strtoul(init, &end, 0); + + if (*end != '=') { + error = "Couldn't parse device number"; + goto out; + } + if (n >= num) { + error = "Device number out of range"; + goto out; + } + conf[n] = end + 1; + } + return 0; + +out: + printk(KERN_ERR "Failed to set up %s with " + "configuration string \"%s\" : %s\n", name, init, error); + return -EINVAL; +} + +int line_config(struct line *lines, unsigned int num, char *str, + const struct chan_opts *opts, char **error_out) +{ + char *end; + int n; + + if (*str == '=') { + *error_out = "Can't configure all devices from mconsole"; + return -EINVAL; + } + + n = simple_strtoul(str, &end, 0); + if (*end++ != '=') { + *error_out = "Couldn't parse device number"; + return -EINVAL; + } + if (n >= num) { + *error_out = "Device number out of range"; + return -EINVAL; + } + + return setup_one_line(lines, n, end, opts, error_out); +} + +int line_get_config(char *name, struct line *lines, unsigned int num, char *str, + int size, char **error_out) +{ + struct line *line; + char *end; + int dev, n = 0; + + dev = simple_strtoul(name, &end, 0); + if ((*end != '\0') || (end == name)) { + *error_out = "line_get_config failed to parse device number"; + return 0; + } + + if ((dev < 0) || (dev >= num)) { + *error_out = "device number out of range"; + return 0; + } + + line = &lines[dev]; + + if (!line->valid) + CONFIG_CHUNK(str, size, n, "none", 1); + else { + struct tty_struct *tty = tty_port_tty_get(&line->port); + if (tty == NULL) { + CONFIG_CHUNK(str, size, n, line->init_str, 1); + } else { + n = chan_config_string(line, str, size, error_out); + tty_kref_put(tty); + } + } + + return n; +} + +int line_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if ((*end != '\0') || (end == *str)) + return -1; + + *str = end; + *start_out = n; + *end_out = n; + return n; +} + +int line_remove(struct line *lines, unsigned int num, int n, char **error_out) +{ + if (n >= num) { + *error_out = "Device number out of range"; + return -EINVAL; + } + return setup_one_line(lines, n, "none", NULL, error_out); +} + +int register_lines(struct line_driver *line_driver, + const struct tty_operations *ops, + struct line *lines, int nlines) +{ + struct tty_driver *driver; + int err; + int i; + + driver = tty_alloc_driver(nlines, TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV); + if (IS_ERR(driver)) + return PTR_ERR(driver); + + driver->driver_name = line_driver->name; + driver->name = line_driver->device_name; + driver->major = line_driver->major; + driver->minor_start = line_driver->minor_start; + driver->type = line_driver->type; + driver->subtype = line_driver->subtype; + driver->init_termios = tty_std_termios; + + for (i = 0; i < nlines; i++) { + tty_port_init(&lines[i].port); + lines[i].port.ops = &line_port_ops; + spin_lock_init(&lines[i].lock); + lines[i].driver = line_driver; + INIT_LIST_HEAD(&lines[i].chan_list); + } + tty_set_operations(driver, ops); + + err = tty_register_driver(driver); + if (err) { + printk(KERN_ERR "register_lines : can't register %s driver\n", + line_driver->name); + tty_driver_kref_put(driver); + for (i = 0; i < nlines; i++) + tty_port_destroy(&lines[i].port); + return err; + } + + line_driver->driver = driver; + mconsole_register_dev(&line_driver->mc); + return 0; +} + +static DEFINE_SPINLOCK(winch_handler_lock); +static LIST_HEAD(winch_handlers); + +struct winch { + struct list_head list; + int fd; + int tty_fd; + int pid; + struct tty_port *port; + unsigned long stack; + struct work_struct work; +}; + +static void __free_winch(struct work_struct *work) +{ + struct winch *winch = container_of(work, struct winch, work); + um_free_irq(WINCH_IRQ, winch); + + if (winch->pid != -1) + os_kill_process(winch->pid, 1); + if (winch->stack != 0) + free_stack(winch->stack, 0); + kfree(winch); +} + +static void free_winch(struct winch *winch) +{ + int fd = winch->fd; + winch->fd = -1; + if (fd != -1) + os_close_file(fd); + __free_winch(&winch->work); +} + +static irqreturn_t winch_interrupt(int irq, void *data) +{ + struct winch *winch = data; + struct tty_struct *tty; + struct line *line; + int fd = winch->fd; + int err; + char c; + struct pid *pgrp; + + if (fd != -1) { + err = generic_read(fd, &c, NULL); + if (err < 0) { + if (err != -EAGAIN) { + winch->fd = -1; + list_del(&winch->list); + os_close_file(fd); + printk(KERN_ERR "winch_interrupt : " + "read failed, errno = %d\n", -err); + printk(KERN_ERR "fd %d is losing SIGWINCH " + "support\n", winch->tty_fd); + INIT_WORK(&winch->work, __free_winch); + schedule_work(&winch->work); + return IRQ_HANDLED; + } + goto out; + } + } + tty = tty_port_tty_get(winch->port); + if (tty != NULL) { + line = tty->driver_data; + if (line != NULL) { + chan_window_size(line, &tty->winsize.ws_row, + &tty->winsize.ws_col); + pgrp = tty_get_pgrp(tty); + if (pgrp) + kill_pgrp(pgrp, SIGWINCH, 1); + put_pid(pgrp); + } + tty_kref_put(tty); + } + out: + return IRQ_HANDLED; +} + +void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port, + unsigned long stack) +{ + struct winch *winch; + + winch = kmalloc(sizeof(*winch), GFP_KERNEL); + if (winch == NULL) { + printk(KERN_ERR "register_winch_irq - kmalloc failed\n"); + goto cleanup; + } + + *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), + .fd = fd, + .tty_fd = tty_fd, + .pid = pid, + .port = port, + .stack = stack }); + + if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, + IRQF_SHARED, "winch", winch) < 0) { + printk(KERN_ERR "register_winch_irq - failed to register " + "IRQ\n"); + goto out_free; + } + + spin_lock(&winch_handler_lock); + list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + + return; + + out_free: + kfree(winch); + cleanup: + os_kill_process(pid, 1); + os_close_file(fd); + if (stack != 0) + free_stack(stack, 0); +} + +static void unregister_winch(struct tty_struct *tty) +{ + struct list_head *ele, *next; + struct winch *winch; + struct tty_struct *wtty; + + spin_lock(&winch_handler_lock); + + list_for_each_safe(ele, next, &winch_handlers) { + winch = list_entry(ele, struct winch, list); + wtty = tty_port_tty_get(winch->port); + if (wtty == tty) { + list_del(&winch->list); + spin_unlock(&winch_handler_lock); + free_winch(winch); + break; + } + tty_kref_put(wtty); + } + spin_unlock(&winch_handler_lock); +} + +static void winch_cleanup(void) +{ + struct winch *winch; + + spin_lock(&winch_handler_lock); + while ((winch = list_first_entry_or_null(&winch_handlers, + struct winch, list))) { + list_del(&winch->list); + spin_unlock(&winch_handler_lock); + + free_winch(winch); + + spin_lock(&winch_handler_lock); + } + + spin_unlock(&winch_handler_lock); +} +__uml_exitcall(winch_cleanup); + +char *add_xterm_umid(char *base) +{ + char *umid, *title; + int len; + + umid = get_umid(); + if (*umid == '\0') + return base; + + len = strlen(base) + strlen(" ()") + strlen(umid) + 1; + title = kmalloc(len, GFP_KERNEL); + if (title == NULL) { + printk(KERN_ERR "Failed to allocate buffer for xterm title\n"); + return base; + } + + snprintf(title, len, "%s (%s)", base, umid); + return title; +} diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h new file mode 100644 index 0000000000..e84fb9b416 --- /dev/null +++ b/arch/um/drivers/line.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __LINE_H__ +#define __LINE_H__ + +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/tty.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include "chan_user.h" +#include "mconsole_kern.h" + +/* There's only two modifiable fields in this - .mc.list and .driver */ +struct line_driver { + const char *name; + const char *device_name; + const short major; + const short minor_start; + const short type; + const short subtype; + const char *read_irq_name; + const char *write_irq_name; + struct mc_device mc; + struct tty_driver *driver; +}; + +struct line { + struct tty_port port; + int valid; + + int read_irq, write_irq; + + char *init_str; + struct list_head chan_list; + struct chan *chan_in, *chan_out; + + /*This lock is actually, mostly, local to*/ + spinlock_t lock; + int throttled; + /* Yes, this is a real circular buffer. + * XXX: And this should become a struct kfifo! + * + * buffer points to a buffer allocated on demand, of length + * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/ + char *buffer; + char *head; + char *tail; + + int sigio; + struct delayed_work task; + const struct line_driver *driver; +}; + +extern void line_close(struct tty_struct *tty, struct file * filp); +extern int line_open(struct tty_struct *tty, struct file *filp); +extern int line_install(struct tty_driver *driver, struct tty_struct *tty, + struct line *line); +extern void line_cleanup(struct tty_struct *tty); +extern void line_hangup(struct tty_struct *tty); +extern int line_setup(char **conf, unsigned nlines, char **def, + char *init, char *name); +extern ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len); +extern unsigned int line_chars_in_buffer(struct tty_struct *tty); +extern void line_flush_buffer(struct tty_struct *tty); +extern void line_flush_chars(struct tty_struct *tty); +extern unsigned int line_write_room(struct tty_struct *tty); +extern void line_throttle(struct tty_struct *tty); +extern void line_unthrottle(struct tty_struct *tty); + +extern char *add_xterm_umid(char *base); +extern int line_setup_irq(int fd, int input, int output, struct line *line, + void *data); +extern void line_close_chan(struct line *line); +extern int register_lines(struct line_driver *line_driver, + const struct tty_operations *driver, + struct line *lines, int nlines); +extern int setup_one_line(struct line *lines, int n, char *init, + const struct chan_opts *opts, char **error_out); +extern void close_lines(struct line *lines, int nlines); + +extern int line_config(struct line *lines, unsigned int sizeof_lines, + char *str, const struct chan_opts *opts, + char **error_out); +extern int line_id(char **str, int *start_out, int *end_out); +extern int line_remove(struct line *lines, unsigned int sizeof_lines, int n, + char **error_out); +extern int line_get_config(char *dev, struct line *lines, + unsigned int sizeof_lines, char *str, + int size, char **error_out); + +#endif diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h new file mode 100644 index 0000000000..6356378304 --- /dev/null +++ b/arch/um/drivers/mconsole.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __MCONSOLE_H__ +#define __MCONSOLE_H__ + +#ifdef __UM_HOST__ +#include <stdint.h> +#define u32 uint32_t +#endif + +#include <sysdep/ptrace.h> + +#define MCONSOLE_MAGIC (0xcafebabe) +#define MCONSOLE_MAX_DATA (512) +#define MCONSOLE_VERSION 2 + +struct mconsole_request { + u32 magic; + u32 version; + u32 len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mconsole_reply { + u32 err; + u32 more; + u32 len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mconsole_notify { + u32 magic; + u32 version; + enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG, + MCONSOLE_USER_NOTIFY } type; + u32 len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mc_request; + +enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; + +struct mconsole_command +{ + char *command; + void (*handler)(struct mc_request *req); + enum mc_context context; +}; + +struct mc_request +{ + int len; + int as_interrupt; + + int originating_fd; + unsigned int originlen; + unsigned char origin[128]; /* sockaddr_un */ + + struct mconsole_request request; + struct mconsole_command *cmd; + struct uml_pt_regs regs; +}; + +extern char mconsole_socket_name[]; + +extern int mconsole_unlink_socket(void); +extern int mconsole_reply_len(struct mc_request *req, const char *reply, + int len, int err, int more); +extern int mconsole_reply(struct mc_request *req, const char *str, int err, + int more); + +extern void mconsole_version(struct mc_request *req); +extern void mconsole_help(struct mc_request *req); +extern void mconsole_halt(struct mc_request *req); +extern void mconsole_reboot(struct mc_request *req); +extern void mconsole_config(struct mc_request *req); +extern void mconsole_remove(struct mc_request *req); +extern void mconsole_sysrq(struct mc_request *req); +extern void mconsole_cad(struct mc_request *req); +extern void mconsole_stop(struct mc_request *req); +extern void mconsole_go(struct mc_request *req); +extern void mconsole_log(struct mc_request *req); +extern void mconsole_proc(struct mc_request *req); +extern void mconsole_stack(struct mc_request *req); + +extern int mconsole_get_request(int fd, struct mc_request *req); +extern int mconsole_notify(char *sock_name, int type, const void *data, + int len); +extern char *mconsole_notify_socket(void); +extern void lock_notify(void); +extern void unlock_notify(void); + +#endif diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c new file mode 100644 index 0000000000..ff4bda95b9 --- /dev/null +++ b/arch/um/drivers/mconsole_kern.c @@ -0,0 +1,871 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/console.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/panic_notifier.h> +#include <linux/reboot.h> +#include <linux/sched/debug.h> +#include <linux/proc_fs.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/utsname.h> +#include <linux/socket.h> +#include <linux/un.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/file.h> +#include <linux/uaccess.h> +#include <asm/switch_to.h> + +#include <init.h> +#include <irq_kern.h> +#include <irq_user.h> +#include <kern_util.h> +#include "mconsole.h" +#include "mconsole_kern.h" +#include <os.h> + +static struct vfsmount *proc_mnt = NULL; + +static int do_unlink_socket(struct notifier_block *notifier, + unsigned long what, void *data) +{ + return mconsole_unlink_socket(); +} + + +static struct notifier_block reboot_notifier = { + .notifier_call = do_unlink_socket, + .priority = 0, +}; + +/* Safe without explicit locking for now. Tasklets provide their own + * locking, and the interrupt handler is safe because it can't interrupt + * itself and it can only happen on CPU 0. + */ + +static LIST_HEAD(mc_requests); + +static void mc_work_proc(struct work_struct *unused) +{ + struct mconsole_entry *req; + unsigned long flags; + + while (!list_empty(&mc_requests)) { + local_irq_save(flags); + req = list_entry(mc_requests.next, struct mconsole_entry, list); + list_del(&req->list); + local_irq_restore(flags); + req->request.cmd->handler(&req->request); + kfree(req); + } +} + +static DECLARE_WORK(mconsole_work, mc_work_proc); + +static irqreturn_t mconsole_interrupt(int irq, void *dev_id) +{ + /* long to avoid size mismatch warnings from gcc */ + long fd; + struct mconsole_entry *new; + static struct mc_request req; /* that's OK */ + + fd = (long) dev_id; + while (mconsole_get_request(fd, &req)) { + if (req.cmd->context == MCONSOLE_INTR) + (*req.cmd->handler)(&req); + else { + new = kmalloc(sizeof(*new), GFP_NOWAIT); + if (new == NULL) + mconsole_reply(&req, "Out of memory", 1, 0); + else { + new->request = req; + new->request.regs = get_irq_regs()->regs; + list_add(&new->list, &mc_requests); + } + } + } + if (!list_empty(&mc_requests)) + schedule_work(&mconsole_work); + return IRQ_HANDLED; +} + +void mconsole_version(struct mc_request *req) +{ + char version[256]; + + sprintf(version, "%s %s %s %s %s", utsname()->sysname, + utsname()->nodename, utsname()->release, utsname()->version, + utsname()->machine); + mconsole_reply(req, version, 0, 0); +} + +void mconsole_log(struct mc_request *req) +{ + int len; + char *ptr = req->request.data; + + ptr += strlen("log "); + + len = req->len - (ptr - req->request.data); + printk(KERN_WARNING "%.*s", len, ptr); + mconsole_reply(req, "", 0, 0); +} + +void mconsole_proc(struct mc_request *req) +{ + struct vfsmount *mnt = proc_mnt; + char *buf; + int len; + struct file *file; + int first_chunk = 1; + char *ptr = req->request.data; + loff_t pos = 0; + + ptr += strlen("proc"); + ptr = skip_spaces(ptr); + + if (!mnt) { + mconsole_reply(req, "Proc not available", 1, 0); + goto out; + } + file = file_open_root_mnt(mnt, ptr, O_RDONLY, 0); + if (IS_ERR(file)) { + mconsole_reply(req, "Failed to open file", 1, 0); + printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file)); + goto out; + } + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf == NULL) { + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + goto out_fput; + } + + do { + len = kernel_read(file, buf, PAGE_SIZE - 1, &pos); + if (len < 0) { + mconsole_reply(req, "Read of file failed", 1, 0); + goto out_free; + } + /* Begin the file content on his own line. */ + if (first_chunk) { + mconsole_reply(req, "\n", 0, 1); + first_chunk = 0; + } + buf[len] = '\0'; + mconsole_reply(req, buf, 0, (len > 0)); + } while (len > 0); + out_free: + kfree(buf); + out_fput: + fput(file); + out: ; +} + +#define UML_MCONSOLE_HELPTEXT \ +"Commands: \n\ + version - Get kernel version \n\ + help - Print this message \n\ + halt - Halt UML \n\ + reboot - Reboot UML \n\ + config <dev>=<config> - Add a new device to UML; \n\ + same syntax as command line \n\ + config <dev> - Query the configuration of a device \n\ + remove <dev> - Remove a device from UML \n\ + sysrq <letter> - Performs the SysRq action controlled by the letter \n\ + cad - invoke the Ctrl-Alt-Del handler \n\ + stop - pause the UML; it will do nothing until it receives a 'go' \n\ + go - continue the UML after a 'stop' \n\ + log <string> - make UML enter <string> into the kernel log\n\ + proc <file> - returns the contents of the UML's /proc/<file>\n\ + stack <pid> - returns the stack of the specified pid\n\ +" + +void mconsole_help(struct mc_request *req) +{ + mconsole_reply(req, UML_MCONSOLE_HELPTEXT, 0, 0); +} + +void mconsole_halt(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + machine_halt(); +} + +void mconsole_reboot(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + machine_restart(NULL); +} + +void mconsole_cad(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + ctrl_alt_del(); +} + +void mconsole_go(struct mc_request *req) +{ + mconsole_reply(req, "Not stopped", 1, 0); +} + +void mconsole_stop(struct mc_request *req) +{ + block_signals(); + os_set_fd_block(req->originating_fd, 1); + mconsole_reply(req, "stopped", 0, 0); + for (;;) { + if (!mconsole_get_request(req->originating_fd, req)) + continue; + if (req->cmd->handler == mconsole_go) + break; + if (req->cmd->handler == mconsole_stop) { + mconsole_reply(req, "Already stopped", 1, 0); + continue; + } + if (req->cmd->handler == mconsole_sysrq) { + struct pt_regs *old_regs; + old_regs = set_irq_regs((struct pt_regs *)&req->regs); + mconsole_sysrq(req); + set_irq_regs(old_regs); + continue; + } + (*req->cmd->handler)(req); + } + os_set_fd_block(req->originating_fd, 0); + mconsole_reply(req, "", 0, 0); + unblock_signals(); +} + +static DEFINE_SPINLOCK(mc_devices_lock); +static LIST_HEAD(mconsole_devices); + +void mconsole_register_dev(struct mc_device *new) +{ + spin_lock(&mc_devices_lock); + BUG_ON(!list_empty(&new->list)); + list_add(&new->list, &mconsole_devices); + spin_unlock(&mc_devices_lock); +} + +static struct mc_device *mconsole_find_dev(char *name) +{ + struct list_head *ele; + struct mc_device *dev; + + list_for_each(ele, &mconsole_devices) { + dev = list_entry(ele, struct mc_device, list); + if (!strncmp(name, dev->name, strlen(dev->name))) + return dev; + } + return NULL; +} + +#define UNPLUGGED_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(unsigned long)) + +struct unplugged_pages { + struct list_head list; + void *pages[UNPLUGGED_PER_PAGE]; +}; + +static DEFINE_MUTEX(plug_mem_mutex); +static unsigned long long unplugged_pages_count; +static LIST_HEAD(unplugged_pages); +static int unplug_index = UNPLUGGED_PER_PAGE; + +static int mem_config(char *str, char **error_out) +{ + unsigned long long diff; + int err = -EINVAL, i, add; + char *ret; + + if (str[0] != '=') { + *error_out = "Expected '=' after 'mem'"; + goto out; + } + + str++; + if (str[0] == '-') + add = 0; + else if (str[0] == '+') { + add = 1; + } + else { + *error_out = "Expected increment to start with '-' or '+'"; + goto out; + } + + str++; + diff = memparse(str, &ret); + if (*ret != '\0') { + *error_out = "Failed to parse memory increment"; + goto out; + } + + diff /= PAGE_SIZE; + + mutex_lock(&plug_mem_mutex); + for (i = 0; i < diff; i++) { + struct unplugged_pages *unplugged; + void *addr; + + if (add) { + if (list_empty(&unplugged_pages)) + break; + + unplugged = list_entry(unplugged_pages.next, + struct unplugged_pages, list); + if (unplug_index > 0) + addr = unplugged->pages[--unplug_index]; + else { + list_del(&unplugged->list); + addr = unplugged; + unplug_index = UNPLUGGED_PER_PAGE; + } + + free_page((unsigned long) addr); + unplugged_pages_count--; + } + else { + struct page *page; + + page = alloc_page(GFP_ATOMIC); + if (page == NULL) + break; + + unplugged = page_address(page); + if (unplug_index == UNPLUGGED_PER_PAGE) { + list_add(&unplugged->list, &unplugged_pages); + unplug_index = 0; + } + else { + struct list_head *entry = unplugged_pages.next; + addr = unplugged; + + unplugged = list_entry(entry, + struct unplugged_pages, + list); + err = os_drop_memory(addr, PAGE_SIZE); + if (err) { + printk(KERN_ERR "Failed to release " + "memory - errno = %d\n", err); + *error_out = "Failed to release memory"; + goto out_unlock; + } + unplugged->pages[unplug_index++] = addr; + } + + unplugged_pages_count++; + } + } + + err = 0; +out_unlock: + mutex_unlock(&plug_mem_mutex); +out: + return err; +} + +static int mem_get_config(char *name, char *str, int size, char **error_out) +{ + char buf[sizeof("18446744073709551615")]; + int len = 0; + + sprintf(buf, "%ld", uml_physmem); + CONFIG_CHUNK(str, size, len, buf, 1); + + return len; +} + +static int mem_id(char **str, int *start_out, int *end_out) +{ + *start_out = 0; + *end_out = 0; + + return 0; +} + +static int mem_remove(int n, char **error_out) +{ + *error_out = "Memory doesn't support the remove operation"; + return -EBUSY; +} + +static struct mc_device mem_mc = { + .list = LIST_HEAD_INIT(mem_mc.list), + .name = "mem", + .config = mem_config, + .get_config = mem_get_config, + .id = mem_id, + .remove = mem_remove, +}; + +static int __init mem_mc_init(void) +{ + if (can_drop_memory()) + mconsole_register_dev(&mem_mc); + else printk(KERN_ERR "Can't release memory to the host - memory " + "hotplug won't be supported\n"); + return 0; +} + +__initcall(mem_mc_init); + +#define CONFIG_BUF_SIZE 64 + +static void mconsole_get_config(int (*get_config)(char *, char *, int, + char **), + struct mc_request *req, char *name) +{ + char default_buf[CONFIG_BUF_SIZE], *error, *buf; + int n, size; + + if (get_config == NULL) { + mconsole_reply(req, "No get_config routine defined", 1, 0); + return; + } + + error = NULL; + size = ARRAY_SIZE(default_buf); + buf = default_buf; + + while (1) { + n = (*get_config)(name, buf, size, &error); + if (error != NULL) { + mconsole_reply(req, error, 1, 0); + goto out; + } + + if (n <= size) { + mconsole_reply(req, buf, 0, 0); + goto out; + } + + if (buf != default_buf) + kfree(buf); + + size = n; + buf = kmalloc(size, GFP_KERNEL); + if (buf == NULL) { + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + return; + } + } + out: + if (buf != default_buf) + kfree(buf); +} + +void mconsole_config(struct mc_request *req) +{ + struct mc_device *dev; + char *ptr = req->request.data, *name, *error_string = ""; + int err; + + ptr += strlen("config"); + ptr = skip_spaces(ptr); + dev = mconsole_find_dev(ptr); + if (dev == NULL) { + mconsole_reply(req, "Bad configuration option", 1, 0); + return; + } + + name = &ptr[strlen(dev->name)]; + ptr = name; + while ((*ptr != '=') && (*ptr != '\0')) + ptr++; + + if (*ptr == '=') { + err = (*dev->config)(name, &error_string); + mconsole_reply(req, error_string, err, 0); + } + else mconsole_get_config(dev->get_config, req, name); +} + +void mconsole_remove(struct mc_request *req) +{ + struct mc_device *dev; + char *ptr = req->request.data, *err_msg = ""; + char error[256]; + int err, start, end, n; + + ptr += strlen("remove"); + ptr = skip_spaces(ptr); + dev = mconsole_find_dev(ptr); + if (dev == NULL) { + mconsole_reply(req, "Bad remove option", 1, 0); + return; + } + + ptr = &ptr[strlen(dev->name)]; + + err = 1; + n = (*dev->id)(&ptr, &start, &end); + if (n < 0) { + err_msg = "Couldn't parse device number"; + goto out; + } + else if ((n < start) || (n > end)) { + sprintf(error, "Invalid device number - must be between " + "%d and %d", start, end); + err_msg = error; + goto out; + } + + err_msg = NULL; + err = (*dev->remove)(n, &err_msg); + switch(err) { + case 0: + err_msg = ""; + break; + case -ENODEV: + if (err_msg == NULL) + err_msg = "Device doesn't exist"; + break; + case -EBUSY: + if (err_msg == NULL) + err_msg = "Device is currently open"; + break; + default: + break; + } +out: + mconsole_reply(req, err_msg, err, 0); +} + +struct mconsole_output { + struct list_head list; + struct mc_request *req; +}; + +static DEFINE_SPINLOCK(client_lock); +static LIST_HEAD(clients); +static char console_buf[MCONSOLE_MAX_DATA] __nonstring; + +static void console_write(struct console *console, const char *string, + unsigned int len) +{ + struct list_head *ele; + int n; + + if (list_empty(&clients)) + return; + + while (len > 0) { + n = min((size_t) len, ARRAY_SIZE(console_buf)); + memcpy(console_buf, string, n); + string += n; + len -= n; + + list_for_each(ele, &clients) { + struct mconsole_output *entry; + + entry = list_entry(ele, struct mconsole_output, list); + mconsole_reply_len(entry->req, console_buf, n, 0, 1); + } + } +} + +static struct console mc_console = { .name = "mc", + .write = console_write, + .flags = CON_ENABLED, + .index = -1 }; + +static int mc_add_console(void) +{ + register_console(&mc_console); + return 0; +} + +late_initcall(mc_add_console); + +static void with_console(struct mc_request *req, void (*proc)(void *), + void *arg) +{ + struct mconsole_output entry; + unsigned long flags; + + entry.req = req; + spin_lock_irqsave(&client_lock, flags); + list_add(&entry.list, &clients); + spin_unlock_irqrestore(&client_lock, flags); + + (*proc)(arg); + + mconsole_reply_len(req, "", 0, 0, 0); + + spin_lock_irqsave(&client_lock, flags); + list_del(&entry.list); + spin_unlock_irqrestore(&client_lock, flags); +} + +#ifdef CONFIG_MAGIC_SYSRQ + +#include <linux/sysrq.h> + +static void sysrq_proc(void *arg) +{ + char *op = arg; + handle_sysrq(*op); +} + +void mconsole_sysrq(struct mc_request *req) +{ + char *ptr = req->request.data; + + ptr += strlen("sysrq"); + ptr = skip_spaces(ptr); + + /* + * With 'b', the system will shut down without a chance to reply, + * so in this case, we reply first. + */ + if (*ptr == 'b') + mconsole_reply(req, "", 0, 0); + + with_console(req, sysrq_proc, ptr); +} +#else +void mconsole_sysrq(struct mc_request *req) +{ + mconsole_reply(req, "Sysrq not compiled in", 1, 0); +} +#endif + +static void stack_proc(void *arg) +{ + struct task_struct *task = arg; + + show_stack(task, NULL, KERN_INFO); +} + +/* + * Mconsole stack trace + * Added by Allan Graves, Jeff Dike + * Dumps a stacks registers to the linux console. + * Usage stack <pid>. + */ +void mconsole_stack(struct mc_request *req) +{ + char *ptr = req->request.data; + int pid_requested= -1; + struct task_struct *to = NULL; + + /* + * Would be nice: + * 1) Send showregs output to mconsole. + * 2) Add a way to stack dump all pids. + */ + + ptr += strlen("stack"); + ptr = skip_spaces(ptr); + + /* + * Should really check for multiple pids or reject bad args here + */ + /* What do the arguments in mconsole_reply mean? */ + if (sscanf(ptr, "%d", &pid_requested) == 0) { + mconsole_reply(req, "Please specify a pid", 1, 0); + return; + } + + to = find_task_by_pid_ns(pid_requested, &init_pid_ns); + if ((to == NULL) || (pid_requested == 0)) { + mconsole_reply(req, "Couldn't find that pid", 1, 0); + return; + } + with_console(req, stack_proc, to); +} + +static int __init mount_proc(void) +{ + struct file_system_type *proc_fs_type; + struct vfsmount *mnt; + + proc_fs_type = get_fs_type("proc"); + if (!proc_fs_type) + return -ENODEV; + + mnt = kern_mount(proc_fs_type); + put_filesystem(proc_fs_type); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + proc_mnt = mnt; + return 0; +} + +/* + * Changed by mconsole_setup, which is __setup, and called before SMP is + * active. + */ +static char *notify_socket = NULL; + +static int __init mconsole_init(void) +{ + /* long to avoid size mismatch warnings from gcc */ + long sock; + int err; + char file[UNIX_PATH_MAX]; + + mount_proc(); + + if (umid_file_name("mconsole", file, sizeof(file))) + return -1; + snprintf(mconsole_socket_name, sizeof(file), "%s", file); + + sock = os_create_unix_socket(file, sizeof(file), 1); + if (sock < 0) { + printk(KERN_ERR "Failed to initialize management console\n"); + return 1; + } + if (os_set_fd_block(sock, 0)) + goto out; + + register_reboot_notifier(&reboot_notifier); + + err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, + IRQF_SHARED, "mconsole", (void *)sock); + if (err < 0) { + printk(KERN_ERR "Failed to get IRQ for management console\n"); + goto out; + } + + if (notify_socket != NULL) { + notify_socket = kstrdup(notify_socket, GFP_KERNEL); + if (notify_socket != NULL) + mconsole_notify(notify_socket, MCONSOLE_SOCKET, + mconsole_socket_name, + strlen(mconsole_socket_name) + 1); + else printk(KERN_ERR "mconsole_setup failed to strdup " + "string\n"); + } + + printk(KERN_INFO "mconsole (version %d) initialized on %s\n", + MCONSOLE_VERSION, mconsole_socket_name); + return 0; + + out: + os_close_file(sock); + return 1; +} + +__initcall(mconsole_init); + +static ssize_t mconsole_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + char *buf; + + buf = memdup_user_nul(buffer, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); + kfree(buf); + return count; +} + +static const struct proc_ops mconsole_proc_ops = { + .proc_write = mconsole_proc_write, + .proc_lseek = noop_llseek, +}; + +static int create_proc_mconsole(void) +{ + struct proc_dir_entry *ent; + + if (notify_socket == NULL) + return 0; + + ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_ops); + if (ent == NULL) { + printk(KERN_INFO "create_proc_mconsole : proc_create failed\n"); + return 0; + } + return 0; +} + +static DEFINE_SPINLOCK(notify_spinlock); + +void lock_notify(void) +{ + spin_lock(¬ify_spinlock); +} + +void unlock_notify(void) +{ + spin_unlock(¬ify_spinlock); +} + +__initcall(create_proc_mconsole); + +#define NOTIFY "notify:" + +static int mconsole_setup(char *str) +{ + if (!strncmp(str, NOTIFY, strlen(NOTIFY))) { + str += strlen(NOTIFY); + notify_socket = str; + } + else printk(KERN_ERR "mconsole_setup : Unknown option - '%s'\n", str); + return 1; +} + +__setup("mconsole=", mconsole_setup); + +__uml_help(mconsole_setup, +"mconsole=notify:<socket>\n" +" Requests that the mconsole driver send a message to the named Unix\n" +" socket containing the name of the mconsole socket. This also serves\n" +" to notify outside processes when UML has booted far enough to respond\n" +" to mconsole requests.\n\n" +); + +static int notify_panic(struct notifier_block *self, unsigned long unused1, + void *ptr) +{ + char *message = ptr; + + if (notify_socket == NULL) + return 0; + + mconsole_notify(notify_socket, MCONSOLE_PANIC, message, + strlen(message) + 1); + return NOTIFY_DONE; +} + +static struct notifier_block panic_exit_notifier = { + .notifier_call = notify_panic, + .priority = INT_MAX, /* run as soon as possible */ +}; + +static int add_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &panic_exit_notifier); + return 0; +} + +__initcall(add_notifier); + +char *mconsole_notify_socket(void) +{ + return notify_socket; +} + +EXPORT_SYMBOL(mconsole_notify_socket); diff --git a/arch/um/drivers/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h new file mode 100644 index 0000000000..56d8d6a3ff --- /dev/null +++ b/arch/um/drivers/mconsole_kern.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __MCONSOLE_KERN_H__ +#define __MCONSOLE_KERN_H__ + +#include <linux/list.h> +#include "mconsole.h" + +struct mconsole_entry { + struct list_head list; + struct mc_request request; +}; + +/* All these methods are called in process context. */ +struct mc_device { + struct list_head list; + char *name; + int (*config)(char *, char **); + int (*get_config)(char *, char *, int, char **); + int (*id)(char **, int *, int *); + int (*remove)(int, char **); +}; + +#define CONFIG_CHUNK(str, size, current, chunk, end) \ +do { \ + current += strlen(chunk); \ + if(current >= size) \ + str = NULL; \ + if(str != NULL){ \ + strcpy(str, chunk); \ + str += strlen(chunk); \ + } \ + if(end) \ + current++; \ +} while(0) + +#ifdef CONFIG_MCONSOLE + +extern void mconsole_register_dev(struct mc_device *new); + +#else + +static inline void mconsole_register_dev(struct mc_device *new) +{ +} + +#endif + +#endif diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c new file mode 100644 index 0000000000..e24298a734 --- /dev/null +++ b/arch/um/drivers/mconsole_user.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/uio.h> +#include <sys/un.h> +#include "mconsole.h" + +static struct mconsole_command commands[] = { + /* + * With uts namespaces, uts information becomes process-specific, so + * we need a process context. If we try handling this in interrupt + * context, we may hit an exiting process without a valid uts + * namespace. + */ + { "version", mconsole_version, MCONSOLE_PROC }, + { "halt", mconsole_halt, MCONSOLE_PROC }, + { "reboot", mconsole_reboot, MCONSOLE_PROC }, + { "config", mconsole_config, MCONSOLE_PROC }, + { "remove", mconsole_remove, MCONSOLE_PROC }, + { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, + { "help", mconsole_help, MCONSOLE_INTR }, + { "cad", mconsole_cad, MCONSOLE_INTR }, + { "stop", mconsole_stop, MCONSOLE_PROC }, + { "go", mconsole_go, MCONSOLE_INTR }, + { "log", mconsole_log, MCONSOLE_INTR }, + { "proc", mconsole_proc, MCONSOLE_PROC }, + { "stack", mconsole_stack, MCONSOLE_INTR }, +}; + +/* Initialized in mconsole_init, which is an initcall */ +char mconsole_socket_name[256]; + +static int mconsole_reply_v0(struct mc_request *req, char *reply) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = reply; + iov.iov_len = strlen(reply); + + msg.msg_name = &(req->origin); + msg.msg_namelen = req->originlen; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return sendmsg(req->originating_fd, &msg, 0); +} + +static struct mconsole_command *mconsole_parse(struct mc_request *req) +{ + struct mconsole_command *cmd; + int i; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + cmd = &commands[i]; + if (!strncmp(req->request.data, cmd->command, + strlen(cmd->command))) { + return cmd; + } + } + return NULL; +} + +#define MIN(a,b) ((a)<(b) ? (a):(b)) + +#define STRINGX(x) #x +#define STRING(x) STRINGX(x) + +int mconsole_get_request(int fd, struct mc_request *req) +{ + int len; + + req->originlen = sizeof(req->origin); + req->len = recvfrom(fd, &req->request, sizeof(req->request), 0, + (struct sockaddr *) req->origin, &req->originlen); + if (req->len < 0) + return 0; + + req->originating_fd = fd; + + if (req->request.magic != MCONSOLE_MAGIC) { + /* Unversioned request */ + len = MIN(sizeof(req->request.data) - 1, + strlen((char *) &req->request)); + memmove(req->request.data, &req->request, len); + req->request.data[len] = '\0'; + + req->request.magic = MCONSOLE_MAGIC; + req->request.version = 0; + req->request.len = len; + + mconsole_reply_v0(req, "ERR Version 0 mconsole clients are " + "not supported by this driver"); + return 0; + } + + if (req->request.len >= MCONSOLE_MAX_DATA) { + mconsole_reply(req, "Request too large", 1, 0); + return 0; + } + if (req->request.version != MCONSOLE_VERSION) { + mconsole_reply(req, "This driver only supports version " + STRING(MCONSOLE_VERSION) " clients", 1, 0); + } + + req->request.data[req->request.len] = '\0'; + req->cmd = mconsole_parse(req); + if (req->cmd == NULL) { + mconsole_reply(req, "Unknown command", 1, 0); + return 0; + } + + return 1; +} + +int mconsole_reply_len(struct mc_request *req, const char *str, int total, + int err, int more) +{ + /* + * XXX This is a stack consumption problem. It'd be nice to + * make it global and serialize access to it, but there are a + * ton of callers to this function. + */ + struct mconsole_reply reply; + int len, n; + + do { + reply.err = err; + + /* err can only be true on the first packet */ + err = 0; + + len = MIN(total, MCONSOLE_MAX_DATA - 1); + + if (len == total) reply.more = more; + else reply.more = 1; + + memcpy(reply.data, str, len); + reply.data[len] = '\0'; + total -= len; + str += len; + reply.len = len + 1; + + len = sizeof(reply) + reply.len - sizeof(reply.data); + + n = sendto(req->originating_fd, &reply, len, 0, + (struct sockaddr *) req->origin, req->originlen); + + if (n < 0) + return -errno; + } while (total > 0); + return 0; +} + +int mconsole_reply(struct mc_request *req, const char *str, int err, int more) +{ + return mconsole_reply_len(req, str, strlen(str), err, more); +} + + +int mconsole_unlink_socket(void) +{ + unlink(mconsole_socket_name); + return 0; +} + +static int notify_sock = -1; + +int mconsole_notify(char *sock_name, int type, const void *data, int len) +{ + struct sockaddr_un target; + struct mconsole_notify packet; + int n, err = 0; + + lock_notify(); + if (notify_sock < 0) { + notify_sock = socket(PF_UNIX, SOCK_DGRAM, 0); + if (notify_sock < 0) { + err = -errno; + printk(UM_KERN_ERR "mconsole_notify - socket failed, " + "errno = %d\n", errno); + } + } + unlock_notify(); + + if (err) + return err; + + target.sun_family = AF_UNIX; + strcpy(target.sun_path, sock_name); + + packet.magic = MCONSOLE_MAGIC; + packet.version = MCONSOLE_VERSION; + packet.type = type; + len = (len > sizeof(packet.data)) ? sizeof(packet.data) : len; + packet.len = len; + memcpy(packet.data, data, len); + + err = 0; + len = sizeof(packet) + packet.len - sizeof(packet.data); + n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target, + sizeof(target)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "mconsole_notify - sendto failed, " + "errno = %d\n", errno); + } + return err; +} diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c new file mode 100644 index 0000000000..807cd33587 --- /dev/null +++ b/arch/um/drivers/mmapper_kern.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/um/drivers/mmapper_kern.c + * + * BRIEF MODULE DESCRIPTION + * + * Copyright (C) 2000 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + */ + +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mm.h> + +#include <linux/uaccess.h> +#include <mem_user.h> + +/* These are set in mmapper_init, which is called at boot time */ +static unsigned long mmapper_size; +static unsigned long p_buf; +static char *v_buf; + +static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size); +} + +static ssize_t mmapper_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (*ppos > mmapper_size) + return -EINVAL; + + return simple_write_to_buffer(v_buf, mmapper_size, ppos, buf, count); +} + +static long mmapper_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +static int mmapper_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret = -EINVAL; + int size; + + if (vma->vm_pgoff != 0) + goto out; + + size = vma->vm_end - vma->vm_start; + if (size > mmapper_size) + return -EFAULT; + + /* + * XXX A comment above remap_pfn_range says it should only be + * called when the mm semaphore is held + */ + if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size, + vma->vm_page_prot)) + goto out; + ret = 0; +out: + return ret; +} + +static int mmapper_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int mmapper_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations mmapper_fops = { + .owner = THIS_MODULE, + .read = mmapper_read, + .write = mmapper_write, + .unlocked_ioctl = mmapper_ioctl, + .mmap = mmapper_mmap, + .open = mmapper_open, + .release = mmapper_release, + .llseek = default_llseek, +}; + +/* + * No locking needed - only used (and modified) by below initcall and exitcall. + */ +static struct miscdevice mmapper_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mmapper", + .fops = &mmapper_fops +}; + +static int __init mmapper_init(void) +{ + int err; + + printk(KERN_INFO "Mapper v0.1\n"); + + v_buf = (char *) find_iomem("mmapper", &mmapper_size); + if (mmapper_size == 0) { + printk(KERN_ERR "mmapper_init - find_iomem failed\n"); + return -ENODEV; + } + p_buf = __pa(v_buf); + + err = misc_register(&mmapper_dev); + if (err) { + printk(KERN_ERR "mmapper - misc_register failed, err = %d\n", + err); + return err; + } + return 0; +} + +static void __exit mmapper_exit(void) +{ + misc_deregister(&mmapper_dev); +} + +module_init(mmapper_init); +module_exit(mmapper_exit); + +MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>"); +MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c new file mode 100644 index 0000000000..3d7836c465 --- /dev/null +++ b/arch/um/drivers/net_kern.c @@ -0,0 +1,892 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + */ + +#include <linux/memblock.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <init.h> +#include <irq_kern.h> +#include <irq_user.h> +#include "mconsole_kern.h" +#include <net_kern.h> +#include <net_user.h> + +#define DRIVER_NAME "uml-netdev" + +static DEFINE_SPINLOCK(opened_lock); +static LIST_HEAD(opened); + +/* + * The drop_skb is used when we can't allocate an skb. The + * packet is read into drop_skb in order to get the data off the + * connection to the host. + * It is reallocated whenever a maximum packet size is seen which is + * larger than any seen before. update_drop_skb is called from + * eth_configure when a new interface is added. + */ +static DEFINE_SPINLOCK(drop_lock); +static struct sk_buff *drop_skb; +static int drop_max; + +static int update_drop_skb(int max) +{ + struct sk_buff *new; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&drop_lock, flags); + + if (max <= drop_max) + goto out; + + err = -ENOMEM; + new = dev_alloc_skb(max); + if (new == NULL) + goto out; + + skb_put(new, max); + + kfree_skb(drop_skb); + drop_skb = new; + drop_max = max; + err = 0; +out: + spin_unlock_irqrestore(&drop_lock, flags); + + return err; +} + +static int uml_net_rx(struct net_device *dev) +{ + struct uml_net_private *lp = netdev_priv(dev); + int pkt_len; + struct sk_buff *skb; + + /* If we can't allocate memory, try again next round. */ + skb = dev_alloc_skb(lp->max_packet); + if (skb == NULL) { + drop_skb->dev = dev; + /* Read a packet into drop_skb and don't do anything with it. */ + (*lp->read)(lp->fd, drop_skb, lp); + dev->stats.rx_dropped++; + return 0; + } + + skb->dev = dev; + skb_put(skb, lp->max_packet); + skb_reset_mac_header(skb); + pkt_len = (*lp->read)(lp->fd, skb, lp); + + if (pkt_len > 0) { + skb_trim(skb, pkt_len); + skb->protocol = (*lp->protocol)(skb); + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + netif_rx(skb); + return pkt_len; + } + + kfree_skb(skb); + return pkt_len; +} + +static void uml_dev_close(struct work_struct *work) +{ + struct uml_net_private *lp = + container_of(work, struct uml_net_private, work); + dev_close(lp->dev); +} + +static irqreturn_t uml_net_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct uml_net_private *lp = netdev_priv(dev); + int err; + + if (!netif_running(dev)) + return IRQ_NONE; + + spin_lock(&lp->lock); + while ((err = uml_net_rx(dev)) > 0) ; + if (err < 0) { + printk(KERN_ERR + "Device '%s' read returned %d, shutting it down\n", + dev->name, err); + /* dev_close can't be called in interrupt context, and takes + * again lp->lock. + * And dev_close() can be safely called multiple times on the + * same device, since it tests for (dev->flags & IFF_UP). So + * there's no harm in delaying the device shutdown. + * Furthermore, the workqueue will not re-enqueue an already + * enqueued work item. */ + schedule_work(&lp->work); + goto out; + } +out: + spin_unlock(&lp->lock); + return IRQ_HANDLED; +} + +static int uml_net_open(struct net_device *dev) +{ + struct uml_net_private *lp = netdev_priv(dev); + int err; + + if (lp->fd >= 0) { + err = -ENXIO; + goto out; + } + + lp->fd = (*lp->open)(&lp->user); + if (lp->fd < 0) { + err = lp->fd; + goto out; + } + + err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, + IRQF_SHARED, dev->name, dev); + if (err < 0) { + printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); + err = -ENETUNREACH; + goto out_close; + } + + netif_start_queue(dev); + + /* clear buffer - it can happen that the host side of the interface + * is full when we get here. In this case, new data is never queued, + * SIGIOs never arrive, and the net never works. + */ + while ((err = uml_net_rx(dev)) > 0) ; + + spin_lock(&opened_lock); + list_add(&lp->list, &opened); + spin_unlock(&opened_lock); + + return 0; +out_close: + if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user); + lp->fd = -1; +out: + return err; +} + +static int uml_net_close(struct net_device *dev) +{ + struct uml_net_private *lp = netdev_priv(dev); + + netif_stop_queue(dev); + + um_free_irq(dev->irq, dev); + if (lp->close != NULL) + (*lp->close)(lp->fd, &lp->user); + lp->fd = -1; + + spin_lock(&opened_lock); + list_del(&lp->list); + spin_unlock(&opened_lock); + + return 0; +} + +static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct uml_net_private *lp = netdev_priv(dev); + unsigned long flags; + int len; + + netif_stop_queue(dev); + + spin_lock_irqsave(&lp->lock, flags); + + len = (*lp->write)(lp->fd, skb, lp); + skb_tx_timestamp(skb); + + if (len == skb->len) { + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + netif_trans_update(dev); + netif_start_queue(dev); + + /* this is normally done in the interrupt when tx finishes */ + netif_wake_queue(dev); + } + else if (len == 0) { + netif_start_queue(dev); + dev->stats.tx_dropped++; + } + else { + netif_start_queue(dev); + printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); + } + + spin_unlock_irqrestore(&lp->lock, flags); + + dev_consume_skb_any(skb); + + return NETDEV_TX_OK; +} + +static void uml_net_set_multicast_list(struct net_device *dev) +{ + return; +} + +static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + netif_trans_update(dev); + netif_wake_queue(dev); +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void uml_net_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + uml_net_interrupt(dev->irq, dev); + enable_irq(dev->irq); +} +#endif + +static void uml_net_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); +} + +static const struct ethtool_ops uml_net_ethtool_ops = { + .get_drvinfo = uml_net_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = ethtool_op_get_ts_info, +}; + +void uml_net_setup_etheraddr(struct net_device *dev, char *str) +{ + u8 addr[ETH_ALEN]; + char *end; + int i; + + if (str == NULL) + goto random; + + for (i = 0; i < 6; i++) { + addr[i] = simple_strtoul(str, &end, 16); + if ((end == str) || + ((*end != ':') && (*end != ',') && (*end != '\0'))) { + printk(KERN_ERR + "setup_etheraddr: failed to parse '%s' " + "as an ethernet address\n", str); + goto random; + } + str = end + 1; + } + if (is_multicast_ether_addr(addr)) { + printk(KERN_ERR + "Attempt to assign a multicast ethernet address to a " + "device disallowed\n"); + goto random; + } + if (!is_valid_ether_addr(addr)) { + printk(KERN_ERR + "Attempt to assign an invalid ethernet address to a " + "device disallowed\n"); + goto random; + } + if (!is_local_ether_addr(addr)) { + printk(KERN_WARNING + "Warning: Assigning a globally valid ethernet " + "address to a device\n"); + printk(KERN_WARNING "You should set the 2nd rightmost bit in " + "the first byte of the MAC,\n"); + printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", + addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], + addr[5]); + } + eth_hw_addr_set(dev, addr); + return; + +random: + printk(KERN_INFO + "Choosing a random ethernet address for device %s\n", dev->name); + eth_hw_addr_random(dev); +} + +static DEFINE_SPINLOCK(devices_lock); +static LIST_HEAD(devices); + +static struct platform_driver uml_net_driver = { + .driver = { + .name = DRIVER_NAME, + }, +}; + +static void net_device_release(struct device *dev) +{ + struct uml_net *device = dev_get_drvdata(dev); + struct net_device *netdev = device->dev; + struct uml_net_private *lp = netdev_priv(netdev); + + if (lp->remove != NULL) + (*lp->remove)(&lp->user); + list_del(&device->list); + kfree(device); + free_netdev(netdev); +} + +static const struct net_device_ops uml_netdev_ops = { + .ndo_open = uml_net_open, + .ndo_stop = uml_net_close, + .ndo_start_xmit = uml_net_start_xmit, + .ndo_set_rx_mode = uml_net_set_multicast_list, + .ndo_tx_timeout = uml_net_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = uml_net_poll_controller, +#endif +}; + +/* + * Ensures that platform_driver_register is called only once by + * eth_configure. Will be set in an initcall. + */ +static int driver_registered; + +static void eth_configure(int n, void *init, char *mac, + struct transport *transport, gfp_t gfp_mask) +{ + struct uml_net *device; + struct net_device *dev; + struct uml_net_private *lp; + int err, size; + + size = transport->private_size + sizeof(struct uml_net_private); + + device = kzalloc(sizeof(*device), gfp_mask); + if (device == NULL) { + printk(KERN_ERR "eth_configure failed to allocate struct " + "uml_net\n"); + return; + } + + dev = alloc_etherdev(size); + if (dev == NULL) { + printk(KERN_ERR "eth_configure: failed to allocate struct " + "net_device for eth%d\n", n); + goto out_free_device; + } + + INIT_LIST_HEAD(&device->list); + device->index = n; + + /* If this name ends up conflicting with an existing registered + * netdevice, that is OK, register_netdev{,ice}() will notice this + * and fail. + */ + snprintf(dev->name, sizeof(dev->name), "eth%d", n); + + uml_net_setup_etheraddr(dev, mac); + + printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); + + lp = netdev_priv(dev); + /* This points to the transport private data. It's still clear, but we + * must memset it to 0 *now*. Let's help the drivers. */ + memset(lp, 0, size); + INIT_WORK(&lp->work, uml_dev_close); + + /* sysfs register */ + if (!driver_registered) { + platform_driver_register(¨_net_driver); + driver_registered = 1; + } + device->pdev.id = n; + device->pdev.name = DRIVER_NAME; + device->pdev.dev.release = net_device_release; + dev_set_drvdata(&device->pdev.dev, device); + if (platform_device_register(&device->pdev)) + goto out_free_netdev; + SET_NETDEV_DEV(dev,&device->pdev.dev); + + device->dev = dev; + + /* + * These just fill in a data structure, so there's no failure + * to be worried about. + */ + (*transport->kern->init)(dev, init); + + *lp = ((struct uml_net_private) + { .list = LIST_HEAD_INIT(lp->list), + .dev = dev, + .fd = -1, + .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, + .max_packet = transport->user->max_packet, + .protocol = transport->kern->protocol, + .open = transport->user->open, + .close = transport->user->close, + .remove = transport->user->remove, + .read = transport->kern->read, + .write = transport->kern->write, + .add_address = transport->user->add_address, + .delete_address = transport->user->delete_address }); + + spin_lock_init(&lp->lock); + memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac)); + + if ((transport->user->init != NULL) && + ((*transport->user->init)(&lp->user, dev) != 0)) + goto out_unregister; + + dev->mtu = transport->user->mtu; + dev->netdev_ops = ¨_netdev_ops; + dev->ethtool_ops = ¨_net_ethtool_ops; + dev->watchdog_timeo = (HZ >> 1); + dev->irq = UM_ETH_IRQ; + + err = update_drop_skb(lp->max_packet); + if (err) + goto out_undo_user_init; + + rtnl_lock(); + err = register_netdevice(dev); + rtnl_unlock(); + if (err) + goto out_undo_user_init; + + spin_lock(&devices_lock); + list_add(&device->list, &devices); + spin_unlock(&devices_lock); + + return; + +out_undo_user_init: + if (transport->user->remove != NULL) + (*transport->user->remove)(&lp->user); +out_unregister: + platform_device_unregister(&device->pdev); + return; /* platform_device_unregister frees dev and device */ +out_free_netdev: + free_netdev(dev); +out_free_device: + kfree(device); +} + +static struct uml_net *find_device(int n) +{ + struct uml_net *device; + struct list_head *ele; + + spin_lock(&devices_lock); + list_for_each(ele, &devices) { + device = list_entry(ele, struct uml_net, list); + if (device->index == n) + goto out; + } + device = NULL; + out: + spin_unlock(&devices_lock); + return device; +} + +static int eth_parse(char *str, int *index_out, char **str_out, + char **error_out) +{ + char *end; + int n, err = -EINVAL; + + n = simple_strtoul(str, &end, 0); + if (end == str) { + *error_out = "Bad device number"; + return err; + } + + str = end; + if (*str != '=') { + *error_out = "Expected '=' after device number"; + return err; + } + + str++; + if (find_device(n)) { + *error_out = "Device already configured"; + return err; + } + + *index_out = n; + *str_out = str; + return 0; +} + +struct eth_init { + struct list_head list; + char *init; + int index; +}; + +static DEFINE_SPINLOCK(transports_lock); +static LIST_HEAD(transports); + +/* Filled in during early boot */ +static LIST_HEAD(eth_cmd_line); + +static int check_transport(struct transport *transport, char *eth, int n, + void **init_out, char **mac_out, gfp_t gfp_mask) +{ + int len; + + len = strlen(transport->name); + if (strncmp(eth, transport->name, len)) + return 0; + + eth += len; + if (*eth == ',') + eth++; + else if (*eth != '\0') + return 0; + + *init_out = kmalloc(transport->setup_size, gfp_mask); + if (*init_out == NULL) + return 1; + + if (!transport->setup(eth, mac_out, *init_out)) { + kfree(*init_out); + *init_out = NULL; + } + return 1; +} + +void register_transport(struct transport *new) +{ + struct list_head *ele, *next; + struct eth_init *eth; + void *init; + char *mac = NULL; + int match; + + spin_lock(&transports_lock); + BUG_ON(!list_empty(&new->list)); + list_add(&new->list, &transports); + spin_unlock(&transports_lock); + + list_for_each_safe(ele, next, ð_cmd_line) { + eth = list_entry(ele, struct eth_init, list); + match = check_transport(new, eth->init, eth->index, &init, + &mac, GFP_KERNEL); + if (!match) + continue; + else if (init != NULL) { + eth_configure(eth->index, init, mac, new, GFP_KERNEL); + kfree(init); + } + list_del(ð->list); + } +} + +static int eth_setup_common(char *str, int index) +{ + struct list_head *ele; + struct transport *transport; + void *init; + char *mac = NULL; + int found = 0; + + spin_lock(&transports_lock); + list_for_each(ele, &transports) { + transport = list_entry(ele, struct transport, list); + if (!check_transport(transport, str, index, &init, + &mac, GFP_ATOMIC)) + continue; + if (init != NULL) { + eth_configure(index, init, mac, transport, GFP_ATOMIC); + kfree(init); + } + found = 1; + break; + } + + spin_unlock(&transports_lock); + return found; +} + +static int __init eth_setup(char *str) +{ + struct eth_init *new; + char *error; + int n, err; + + err = eth_parse(str, &n, &str, &error); + if (err) { + printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n", + str, error); + return 1; + } + + new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); + if (!new) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*new)); + + INIT_LIST_HEAD(&new->list); + new->index = n; + new->init = str; + + list_add_tail(&new->list, ð_cmd_line); + return 1; +} + +__setup("eth", eth_setup); +__uml_help(eth_setup, +"eth[0-9]+=<transport>,<options>\n" +" Configure a network device.\n\n" +); + +static int net_config(char *str, char **error_out) +{ + int n, err; + + err = eth_parse(str, &n, &str, error_out); + if (err) + return err; + + /* This string is broken up and the pieces used by the underlying + * driver. So, it is freed only if eth_setup_common fails. + */ + str = kstrdup(str, GFP_KERNEL); + if (str == NULL) { + *error_out = "net_config failed to strdup string"; + return -ENOMEM; + } + err = !eth_setup_common(str, n); + if (err) + kfree(str); + return err; +} + +static int net_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if ((*end != '\0') || (end == *str)) + return -1; + + *start_out = n; + *end_out = n; + *str = end; + return n; +} + +static int net_remove(int n, char **error_out) +{ + struct uml_net *device; + struct net_device *dev; + struct uml_net_private *lp; + + device = find_device(n); + if (device == NULL) + return -ENODEV; + + dev = device->dev; + lp = netdev_priv(dev); + if (lp->fd > 0) + return -EBUSY; + unregister_netdev(dev); + platform_device_unregister(&device->pdev); + + return 0; +} + +static struct mc_device net_mc = { + .list = LIST_HEAD_INIT(net_mc.list), + .name = "eth", + .config = net_config, + .get_config = NULL, + .id = net_id, + .remove = net_remove, +}; + +#ifdef CONFIG_INET +static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct uml_net_private *lp; + void (*proc)(unsigned char *, unsigned char *, void *); + unsigned char addr_buf[4], netmask_buf[4]; + + if (dev->netdev_ops->ndo_open != uml_net_open) + return NOTIFY_DONE; + + lp = netdev_priv(dev); + + proc = NULL; + switch (event) { + case NETDEV_UP: + proc = lp->add_address; + break; + case NETDEV_DOWN: + proc = lp->delete_address; + break; + } + if (proc != NULL) { + memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf)); + memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf)); + (*proc)(addr_buf, netmask_buf, &lp->user); + } + return NOTIFY_DONE; +} + +/* uml_net_init shouldn't be called twice on two CPUs at the same time */ +static struct notifier_block uml_inetaddr_notifier = { + .notifier_call = uml_inetaddr_event, +}; + +static void inet_register(void) +{ + struct list_head *ele; + struct uml_net_private *lp; + struct in_device *ip; + struct in_ifaddr *in; + + register_inetaddr_notifier(¨_inetaddr_notifier); + + /* Devices may have been opened already, so the uml_inetaddr_notifier + * didn't get a chance to run for them. This fakes it so that + * addresses which have already been set up get handled properly. + */ + spin_lock(&opened_lock); + list_for_each(ele, &opened) { + lp = list_entry(ele, struct uml_net_private, list); + ip = lp->dev->ip_ptr; + if (ip == NULL) + continue; + in = ip->ifa_list; + while (in != NULL) { + uml_inetaddr_event(NULL, NETDEV_UP, in); + in = in->ifa_next; + } + } + spin_unlock(&opened_lock); +} +#else +static inline void inet_register(void) +{ +} +#endif + +static int uml_net_init(void) +{ + mconsole_register_dev(&net_mc); + inet_register(); + return 0; +} + +__initcall(uml_net_init); + +static void close_devices(void) +{ + struct list_head *ele; + struct uml_net_private *lp; + + spin_lock(&opened_lock); + list_for_each(ele, &opened) { + lp = list_entry(ele, struct uml_net_private, list); + um_free_irq(lp->dev->irq, lp->dev); + if ((lp->close != NULL) && (lp->fd >= 0)) + (*lp->close)(lp->fd, &lp->user); + if (lp->remove != NULL) + (*lp->remove)(&lp->user); + } + spin_unlock(&opened_lock); +} + +__uml_exitcall(close_devices); + +void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, + void *), + void *arg) +{ + struct net_device *dev = d; + struct in_device *ip = dev->ip_ptr; + struct in_ifaddr *in; + unsigned char address[4], netmask[4]; + + if (ip == NULL) return; + in = ip->ifa_list; + while (in != NULL) { + memcpy(address, &in->ifa_address, sizeof(address)); + memcpy(netmask, &in->ifa_mask, sizeof(netmask)); + (*cb)(address, netmask, arg); + in = in->ifa_next; + } +} + +int dev_netmask(void *d, void *m) +{ + struct net_device *dev = d; + struct in_device *ip = dev->ip_ptr; + struct in_ifaddr *in; + __be32 *mask_out = m; + + if (ip == NULL) + return 1; + + in = ip->ifa_list; + if (in == NULL) + return 1; + + *mask_out = in->ifa_mask; + return 0; +} + +void *get_output_buffer(int *len_out) +{ + void *ret; + + ret = (void *) __get_free_pages(GFP_KERNEL, 0); + if (ret) *len_out = PAGE_SIZE; + else *len_out = 0; + return ret; +} + +void free_output_buffer(void *buffer) +{ + free_pages((unsigned long) buffer, 0); +} + +int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, + char **gate_addr) +{ + char *remain; + + remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); + if (remain != NULL) { + printk(KERN_ERR "tap_setup_common - Extra garbage on " + "specification : '%s'\n", remain); + return 1; + } + + return 0; +} + +unsigned short eth_protocol(struct sk_buff *skb) +{ + return eth_type_trans(skb, skb->dev); +} diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c new file mode 100644 index 0000000000..4c9576452a --- /dev/null +++ b/arch/um/drivers/net_user.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdarg.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <net_user.h> +#include <os.h> +#include <um_malloc.h> + +int tap_open_common(void *dev, char *gate_addr) +{ + int tap_addr[4]; + + if (gate_addr == NULL) + return 0; + if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], + &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) { + printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n", + gate_addr); + return -EINVAL; + } + return 0; +} + +void tap_check_ips(char *gate_addr, unsigned char *eth_addr) +{ + int tap_addr[4]; + + if ((gate_addr != NULL) && + (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], + &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && + (eth_addr[0] == tap_addr[0]) && + (eth_addr[1] == tap_addr[1]) && + (eth_addr[2] == tap_addr[2]) && + (eth_addr[3] == tap_addr[3])) { + printk(UM_KERN_ERR "The tap IP address and the UML eth IP " + "address must be different\n"); + } +} + +/* Do reliable error handling as this fails frequently enough. */ +void read_output(int fd, char *output, int len) +{ + int remain, ret, expected; + char c; + char *str; + + if (output == NULL) { + output = &c; + len = sizeof(c); + } + + *output = '\0'; + ret = read(fd, &remain, sizeof(remain)); + + if (ret != sizeof(remain)) { + if (ret < 0) + ret = -errno; + expected = sizeof(remain); + str = "length"; + goto err; + } + + while (remain != 0) { + expected = (remain < len) ? remain : len; + ret = read(fd, output, expected); + if (ret != expected) { + if (ret < 0) + ret = -errno; + str = "data"; + goto err; + } + remain -= ret; + } + + return; + +err: + if (ret < 0) + printk(UM_KERN_ERR "read_output - read of %s failed, " + "errno = %d\n", str, -ret); + else + printk(UM_KERN_ERR "read_output - read of %s failed, read only " + "%d of %d bytes\n", str, ret, expected); +} + +int net_read(int fd, void *buf, int len) +{ + int n; + + n = read(fd, buf, len); + + if ((n < 0) && (errno == EAGAIN)) + return 0; + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_recvfrom(int fd, void *buf, int len) +{ + int n; + + CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_write(int fd, void *buf, int len) +{ + int n; + + n = write(fd, buf, len); + + if ((n < 0) && (errno == EAGAIN)) + return 0; + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_send(int fd, void *buf, int len) +{ + int n; + + CATCH_EINTR(n = send(fd, buf, len, 0)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_sendto(int fd, void *buf, int len, void *to, int sock_len) +{ + int n; + + CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to, + sock_len)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +struct change_pre_exec_data { + int close_me; + int stdout_fd; +}; + +static void change_pre_exec(void *arg) +{ + struct change_pre_exec_data *data = arg; + + close(data->close_me); + dup2(data->stdout_fd, 1); +} + +static int change_tramp(char **argv, char *output, int output_len) +{ + int pid, fds[2], err; + struct change_pre_exec_data pe_data; + + err = os_pipe(fds, 1, 0); + if (err < 0) { + printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n", + -err); + return err; + } + pe_data.close_me = fds[0]; + pe_data.stdout_fd = fds[1]; + pid = run_helper(change_pre_exec, &pe_data, argv); + + if (pid > 0) /* Avoid hang as we won't get data in failure case. */ + read_output(fds[0], output, output_len); + + close(fds[0]); + close(fds[1]); + + if (pid > 0) + helper_wait(pid); + return pid; +} + +static void change(char *dev, char *what, unsigned char *addr, + unsigned char *netmask) +{ + char addr_buf[sizeof("255.255.255.255\0")]; + char netmask_buf[sizeof("255.255.255.255\0")]; + char version[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version, what, dev, addr_buf, + netmask_buf, NULL }; + char *output; + int output_len, pid; + + sprintf(version, "%d", UML_NET_VERSION); + sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); + sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], + netmask[2], netmask[3]); + + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + if (output == NULL) + printk(UM_KERN_ERR "change : failed to allocate output " + "buffer\n"); + + pid = change_tramp(argv, output, output_len); + if (pid < 0) { + kfree(output); + return; + } + + if (output != NULL) { + printk("%s", output); + kfree(output); + } +} + +void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) +{ + change(arg, "add", addr, netmask); +} + +void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) +{ + change(arg, "del", addr, netmask); +} + +char *split_if_spec(char *str, ...) +{ + char **arg, *end, *ret = NULL; + va_list ap; + + va_start(ap, str); + while ((arg = va_arg(ap, char **)) != NULL) { + if (*str == '\0') + goto out; + end = strchr(str, ','); + if (end != str) + *arg = str; + if (end == NULL) + goto out; + *end++ = '\0'; + str = end; + } + ret = str; +out: + va_end(ap); + return ret; +} diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c new file mode 100644 index 0000000000..87087763a4 --- /dev/null +++ b/arch/um/drivers/null.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <stddef.h> +#include <errno.h> +#include <fcntl.h> +#include "chan_user.h" +#include <os.h> + +/* This address is used only as a unique identifier */ +static int null_chan; + +static void *null_init(char *str, int device, const struct chan_opts *opts) +{ + return &null_chan; +} + +static int null_open(int input, int output, int primary, void *d, + char **dev_out) +{ + int fd; + + *dev_out = NULL; + + fd = open(DEV_NULL, O_RDWR); + return (fd < 0) ? -errno : fd; +} + +static int null_read(int fd, char *c_out, void *unused) +{ + return -ENODEV; +} + +static void null_free(void *data) +{ +} + +const struct chan_ops null_ops = { + .type = "null", + .init = null_init, + .open = null_open, + .close = generic_close, + .read = null_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = null_free, + .winch = 0, +}; diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c new file mode 100644 index 0000000000..25ee2c97ca --- /dev/null +++ b/arch/um/drivers/pcap_kern.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include <net_kern.h> +#include "pcap_user.h" + +struct pcap_init { + char *host_if; + int promisc; + int optimize; + char *filter; +}; + +void pcap_init_kern(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct pcap_data *ppri; + struct pcap_init *init = data; + + pri = netdev_priv(dev); + ppri = (struct pcap_data *) pri->user; + ppri->host_if = init->host_if; + ppri->promisc = init->promisc; + ppri->optimize = init->optimize; + ppri->filter = init->filter; + + printk("pcap backend, host interface %s\n", ppri->host_if); +} + +static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return pcap_user_read(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER, + (struct pcap_data *) &lp->user); +} + +static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return -EPERM; +} + +static const struct net_kern_info pcap_kern_info = { + .init = pcap_init_kern, + .protocol = eth_protocol, + .read = pcap_read, + .write = pcap_write, +}; + +int pcap_setup(char *str, char **mac_out, void *data) +{ + struct pcap_init *init = data; + char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; + int i; + + *init = ((struct pcap_init) + { .host_if = "eth0", + .promisc = 1, + .optimize = 0, + .filter = NULL }); + + remain = split_if_spec(str, &host_if, &init->filter, + &options[0], &options[1], mac_out, NULL); + if (remain != NULL) { + printk(KERN_ERR "pcap_setup - Extra garbage on " + "specification : '%s'\n", remain); + return 0; + } + + if (host_if != NULL) + init->host_if = host_if; + + for (i = 0; i < ARRAY_SIZE(options); i++) { + if (options[i] == NULL) + continue; + if (!strcmp(options[i], "promisc")) + init->promisc = 1; + else if (!strcmp(options[i], "nopromisc")) + init->promisc = 0; + else if (!strcmp(options[i], "optimize")) + init->optimize = 1; + else if (!strcmp(options[i], "nooptimize")) + init->optimize = 0; + else { + printk(KERN_ERR "pcap_setup : bad option - '%s'\n", + options[i]); + return 0; + } + } + + return 1; +} + +static struct transport pcap_transport = { + .list = LIST_HEAD_INIT(pcap_transport.list), + .name = "pcap", + .setup = pcap_setup, + .user = &pcap_user_info, + .kern = &pcap_kern_info, + .private_size = sizeof(struct pcap_data), + .setup_size = sizeof(struct pcap_init), +}; + +static int register_pcap(void) +{ + register_transport(&pcap_transport); + return 0; +} + +late_initcall(register_pcap); diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c new file mode 100644 index 0000000000..52ddda3e3b --- /dev/null +++ b/arch/um/drivers/pcap_user.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <errno.h> +#include <pcap.h> +#include <string.h> +#include <asm/types.h> +#include <net_user.h> +#include "pcap_user.h" +#include <um_malloc.h> + +#define PCAP_FD(p) (*(int *)(p)) + +static int pcap_user_init(void *data, void *dev) +{ + struct pcap_data *pri = data; + pcap_t *p; + char errors[PCAP_ERRBUF_SIZE]; + + p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER, + pri->promisc, 0, errors); + if (p == NULL) { + printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - " + "'%s'\n", errors); + return -EINVAL; + } + + pri->dev = dev; + pri->pcap = p; + return 0; +} + +static int pcap_user_open(void *data) +{ + struct pcap_data *pri = data; + __u32 netmask; + int err; + + if (pri->pcap == NULL) + return -ENODEV; + + if (pri->filter != NULL) { + err = dev_netmask(pri->dev, &netmask); + if (err < 0) { + printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n"); + return -EIO; + } + + pri->compiled = uml_kmalloc(sizeof(struct bpf_program), + UM_GFP_KERNEL); + if (pri->compiled == NULL) { + printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n"); + return -ENOMEM; + } + + err = pcap_compile(pri->pcap, + (struct bpf_program *) pri->compiled, + pri->filter, pri->optimize, netmask); + if (err < 0) { + printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - " + "'%s'\n", pcap_geterr(pri->pcap)); + goto out; + } + + err = pcap_setfilter(pri->pcap, pri->compiled); + if (err < 0) { + printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter " + "failed - '%s'\n", pcap_geterr(pri->pcap)); + goto out; + } + } + + return PCAP_FD(pri->pcap); + + out: + kfree(pri->compiled); + return -EIO; +} + +static void pcap_remove(void *data) +{ + struct pcap_data *pri = data; + + if (pri->compiled != NULL) + pcap_freecode(pri->compiled); + + if (pri->pcap != NULL) + pcap_close(pri->pcap); +} + +struct pcap_handler_data { + char *buffer; + int len; +}; + +static void handler(u_char *data, const struct pcap_pkthdr *header, + const u_char *packet) +{ + int len; + + struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; + + len = hdata->len < header->caplen ? hdata->len : header->caplen; + memcpy(hdata->buffer, packet, len); + hdata->len = len; +} + +int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) +{ + struct pcap_handler_data hdata = ((struct pcap_handler_data) + { .buffer = buffer, + .len = len }); + int n; + + n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); + if (n < 0) { + printk(UM_KERN_ERR "pcap_dispatch failed - %s\n", + pcap_geterr(pri->pcap)); + return -EIO; + } + else if (n == 0) + return 0; + return hdata.len; +} + +const struct net_user_info pcap_user_info = { + .init = pcap_user_init, + .open = pcap_user_open, + .close = NULL, + .remove = pcap_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h new file mode 100644 index 0000000000..216246f5f0 --- /dev/null +++ b/arch/um/drivers/pcap_user.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + */ + +#include <net_user.h> + +struct pcap_data { + char *host_if; + int promisc; + int optimize; + char *filter; + void *compiled; + void *pcap; + void *dev; +}; + +extern const struct net_user_info pcap_user_info; + +extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); + diff --git a/arch/um/drivers/port.h b/arch/um/drivers/port.h new file mode 100644 index 0000000000..9085b336e6 --- /dev/null +++ b/arch/um/drivers/port.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __PORT_H__ +#define __PORT_H__ + +extern void *port_data(int port); +extern int port_wait(void *data); +extern void port_kern_close(void *d); +extern int port_connection(int fd, int *socket_out, int *pid_out); +extern int port_listen_fd(int port); +extern void port_read(int fd, void *data); +extern void port_kern_free(void *d); +extern int port_rcv_fd(int fd); +extern void port_remove_dev(void *d); + +#endif + diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c new file mode 100644 index 0000000000..c52b3ff3c0 --- /dev/null +++ b/arch/um/drivers/port_kern.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <linux/completion.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include <asm/atomic.h> +#include <init.h> +#include <irq_kern.h> +#include <os.h> +#include "port.h" + +struct port_list { + struct list_head list; + atomic_t wait_count; + int has_connection; + struct completion done; + int port; + int fd; + spinlock_t lock; + struct list_head pending; + struct list_head connections; +}; + +struct port_dev { + struct port_list *port; + int helper_pid; + int telnetd_pid; +}; + +struct connection { + struct list_head list; + int fd; + int helper_pid; + int socket[2]; + int telnetd_pid; + struct port_list *port; +}; + +static irqreturn_t pipe_interrupt(int irq, void *data) +{ + struct connection *conn = data; + int fd; + + fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); + if (fd < 0) { + if (fd == -EAGAIN) + return IRQ_NONE; + + printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", + -fd); + os_close_file(conn->fd); + } + + list_del(&conn->list); + + conn->fd = fd; + list_add(&conn->list, &conn->port->connections); + + complete(&conn->port->done); + return IRQ_HANDLED; +} + +#define NO_WAITER_MSG \ + "****\n" \ + "There are currently no UML consoles waiting for port connections.\n" \ + "Either disconnect from one to make it available or activate some more\n" \ + "by enabling more consoles in the UML /etc/inittab.\n" \ + "****\n" + +static int port_accept(struct port_list *port) +{ + struct connection *conn; + int fd, socket[2], pid; + + fd = port_connection(port->fd, socket, &pid); + if (fd < 0) { + if (fd != -EAGAIN) + printk(KERN_ERR "port_accept : port_connection " + "returned %d\n", -fd); + goto out; + } + + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (conn == NULL) { + printk(KERN_ERR "port_accept : failed to allocate " + "connection\n"); + goto out_close; + } + *conn = ((struct connection) + { .list = LIST_HEAD_INIT(conn->list), + .fd = fd, + .socket = { socket[0], socket[1] }, + .telnetd_pid = pid, + .port = port }); + + if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, + IRQF_SHARED, "telnetd", conn) < 0) { + printk(KERN_ERR "port_accept : failed to get IRQ for " + "telnetd\n"); + goto out_free; + } + + if (atomic_read(&port->wait_count) == 0) { + os_write_file(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG)); + printk(KERN_ERR "No one waiting for port\n"); + } + list_add(&conn->list, &port->pending); + return 1; + + out_free: + kfree(conn); + out_close: + os_close_file(fd); + os_kill_process(pid, 1); + out: + return 0; +} + +static DEFINE_MUTEX(ports_mutex); +static LIST_HEAD(ports); + +static void port_work_proc(struct work_struct *unused) +{ + struct port_list *port; + struct list_head *ele; + unsigned long flags; + + local_irq_save(flags); + list_for_each(ele, &ports) { + port = list_entry(ele, struct port_list, list); + if (!port->has_connection) + continue; + + while (port_accept(port)) + ; + port->has_connection = 0; + } + local_irq_restore(flags); +} + +static DECLARE_WORK(port_work, port_work_proc); + +static irqreturn_t port_interrupt(int irq, void *data) +{ + struct port_list *port = data; + + port->has_connection = 1; + schedule_work(&port_work); + return IRQ_HANDLED; +} + +void *port_data(int port_num) +{ + struct list_head *ele; + struct port_list *port; + struct port_dev *dev = NULL; + int fd; + + mutex_lock(&ports_mutex); + list_for_each(ele, &ports) { + port = list_entry(ele, struct port_list, list); + if (port->port == port_num) + goto found; + } + port = kmalloc(sizeof(struct port_list), GFP_KERNEL); + if (port == NULL) { + printk(KERN_ERR "Allocation of port list failed\n"); + goto out; + } + + fd = port_listen_fd(port_num); + if (fd < 0) { + printk(KERN_ERR "binding to port %d failed, errno = %d\n", + port_num, -fd); + goto out_free; + } + + if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, + IRQF_SHARED, "port", port) < 0) { + printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); + goto out_close; + } + + *port = ((struct port_list) + { .list = LIST_HEAD_INIT(port->list), + .wait_count = ATOMIC_INIT(0), + .has_connection = 0, + .port = port_num, + .fd = fd, + .pending = LIST_HEAD_INIT(port->pending), + .connections = LIST_HEAD_INIT(port->connections) }); + spin_lock_init(&port->lock); + init_completion(&port->done); + list_add(&port->list, &ports); + + found: + dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL); + if (dev == NULL) { + printk(KERN_ERR "Allocation of port device entry failed\n"); + goto out; + } + + *dev = ((struct port_dev) { .port = port, + .helper_pid = -1, + .telnetd_pid = -1 }); + goto out; + + out_close: + os_close_file(fd); + out_free: + kfree(port); + out: + mutex_unlock(&ports_mutex); + return dev; +} + +int port_wait(void *data) +{ + struct port_dev *dev = data; + struct connection *conn; + struct port_list *port = dev->port; + int fd; + + atomic_inc(&port->wait_count); + while (1) { + fd = -ERESTARTSYS; + if (wait_for_completion_interruptible(&port->done)) + goto out; + + spin_lock(&port->lock); + + conn = list_entry(port->connections.next, struct connection, + list); + list_del(&conn->list); + spin_unlock(&port->lock); + + os_shutdown_socket(conn->socket[0], 1, 1); + os_close_file(conn->socket[0]); + os_shutdown_socket(conn->socket[1], 1, 1); + os_close_file(conn->socket[1]); + + /* This is done here because freeing an IRQ can't be done + * within the IRQ handler. So, pipe_interrupt always ups + * the semaphore regardless of whether it got a successful + * connection. Then we loop here throwing out failed + * connections until a good one is found. + */ + um_free_irq(TELNETD_IRQ, conn); + + if (conn->fd >= 0) + break; + os_close_file(conn->fd); + kfree(conn); + } + + fd = conn->fd; + dev->helper_pid = conn->helper_pid; + dev->telnetd_pid = conn->telnetd_pid; + kfree(conn); + out: + atomic_dec(&port->wait_count); + return fd; +} + +void port_remove_dev(void *d) +{ + struct port_dev *dev = d; + + if (dev->helper_pid != -1) + os_kill_process(dev->helper_pid, 0); + if (dev->telnetd_pid != -1) + os_kill_process(dev->telnetd_pid, 1); + dev->helper_pid = -1; + dev->telnetd_pid = -1; +} + +void port_kern_free(void *d) +{ + struct port_dev *dev = d; + + port_remove_dev(dev); + kfree(dev); +} + +static void free_port(void) +{ + struct list_head *ele; + struct port_list *port; + + list_for_each(ele, &ports) { + port = list_entry(ele, struct port_list, list); + free_irq_by_fd(port->fd); + os_close_file(port->fd); + } +} + +__uml_exitcall(free_port); diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c new file mode 100644 index 0000000000..3c62ae81df --- /dev/null +++ b/arch/um/drivers/port_user.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <termios.h> +#include <unistd.h> +#include <netinet/in.h> +#include "chan_user.h" +#include <os.h> +#include "port.h" +#include <um_malloc.h> + +struct port_chan { + int raw; + struct termios tt; + void *kernel_data; + char dev[sizeof("32768\0")]; +}; + +static void *port_init(char *str, int device, const struct chan_opts *opts) +{ + struct port_chan *data; + void *kern_data; + char *end; + int port; + + if (*str != ':') { + printk(UM_KERN_ERR "port_init : channel type 'port' must " + "specify a port number\n"); + return NULL; + } + str++; + port = strtoul(str, &end, 0); + if ((*end != '\0') || (end == str)) { + printk(UM_KERN_ERR "port_init : couldn't parse port '%s'\n", + str); + return NULL; + } + + kern_data = port_data(port); + if (kern_data == NULL) + return NULL; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + goto err; + + *data = ((struct port_chan) { .raw = opts->raw, + .kernel_data = kern_data }); + sprintf(data->dev, "%d", port); + + return data; + err: + port_kern_free(kern_data); + return NULL; +} + +static void port_free(void *d) +{ + struct port_chan *data = d; + + port_kern_free(data->kernel_data); + kfree(data); +} + +static int port_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct port_chan *data = d; + int fd, err; + + fd = port_wait(data->kernel_data); + if ((fd >= 0) && data->raw) { + CATCH_EINTR(err = tcgetattr(fd, &data->tt)); + if (err) + return err; + + err = raw(fd); + if (err) + return err; + } + *dev_out = data->dev; + return fd; +} + +static void port_close(int fd, void *d) +{ + struct port_chan *data = d; + + port_remove_dev(data->kernel_data); + os_close_file(fd); +} + +const struct chan_ops port_ops = { + .type = "port", + .init = port_init, + .open = port_open, + .close = port_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = port_free, + .winch = 1, +}; + +int port_listen_fd(int port) +{ + struct sockaddr_in addr; + int fd, err, arg; + + fd = socket(PF_INET, SOCK_STREAM, 0); + if (fd == -1) + return -errno; + + arg = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &arg, sizeof(arg)) < 0) { + err = -errno; + goto out; + } + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + err = -errno; + goto out; + } + + if (listen(fd, 1) < 0) { + err = -errno; + goto out; + } + + err = os_set_fd_block(fd, 0); + if (err < 0) + goto out; + + return fd; + out: + close(fd); + return err; +} + +struct port_pre_exec_data { + int sock_fd; + int pipe_fd; +}; + +static void port_pre_exec(void *arg) +{ + struct port_pre_exec_data *data = arg; + + dup2(data->sock_fd, 0); + dup2(data->sock_fd, 1); + dup2(data->sock_fd, 2); + close(data->sock_fd); + dup2(data->pipe_fd, 3); + shutdown(3, SHUT_RD); + close(data->pipe_fd); +} + +int port_connection(int fd, int *socket, int *pid_out) +{ + int new, err; + char *env; + char *argv[] = { "in.telnetd", "-L", + OS_LIB_PATH "/uml/port-helper", NULL }; + struct port_pre_exec_data data; + + if ((env = getenv("UML_PORT_HELPER"))) + argv[2] = env; + + new = accept(fd, NULL, 0); + if (new < 0) + return -errno; + + err = os_access(argv[2], X_OK); + if (err < 0) { + printk(UM_KERN_ERR "port_connection : error accessing port-helper " + "executable at %s: %s\n", argv[2], strerror(-err)); + if (env == NULL) + printk(UM_KERN_ERR "Set UML_PORT_HELPER environment " + "variable to path to uml-utilities port-helper " + "binary\n"); + goto out_close; + } + + err = os_pipe(socket, 0, 0); + if (err < 0) + goto out_close; + + data = ((struct port_pre_exec_data) + { .sock_fd = new, + .pipe_fd = socket[1] }); + + err = run_helper(port_pre_exec, &data, argv); + if (err < 0) + goto out_shutdown; + + *pid_out = err; + return new; + + out_shutdown: + shutdown(socket[0], SHUT_RDWR); + close(socket[0]); + shutdown(socket[1], SHUT_RDWR); + close(socket[1]); + out_close: + close(new); + return err; +} diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c new file mode 100644 index 0000000000..39c60068cf --- /dev/null +++ b/arch/um/drivers/pty.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <termios.h> +#include <sys/stat.h> +#include "chan_user.h" +#include <os.h> +#include <um_malloc.h> + +struct pty_chan { + void (*announce)(char *dev_name, int dev); + int dev; + int raw; + struct termios tt; + char dev_name[sizeof("/dev/pts/0123456\0")]; +}; + +static void *pty_chan_init(char *str, int device, const struct chan_opts *opts) +{ + struct pty_chan *data; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + + *data = ((struct pty_chan) { .announce = opts->announce, + .dev = device, + .raw = opts->raw }); + return data; +} + +static int pts_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct pty_chan *data = d; + char *dev; + int fd, err; + + fd = get_pty(); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "open_pts : Failed to open pts\n"); + return err; + } + + if (data->raw) { + CATCH_EINTR(err = tcgetattr(fd, &data->tt)); + if (err) + goto out_close; + + err = raw(fd); + if (err) + goto out_close; + } + + dev = ptsname(fd); + sprintf(data->dev_name, "%s", dev); + *dev_out = data->dev_name; + + if (data->announce) + (*data->announce)(dev, data->dev); + + return fd; + +out_close: + close(fd); + return err; +} + +static int getmaster(char *line) +{ + struct stat buf; + char *pty, *bank, *cp; + int master, err; + + pty = &line[strlen("/dev/ptyp")]; + for (bank = "pqrs"; *bank; bank++) { + line[strlen("/dev/pty")] = *bank; + *pty = '0'; + /* Did we hit the end ? */ + if ((stat(line, &buf) < 0) && (errno == ENOENT)) + break; + + for (cp = "0123456789abcdef"; *cp; cp++) { + *pty = *cp; + master = open(line, O_RDWR); + if (master >= 0) { + char *tp = &line[strlen("/dev/")]; + + /* verify slave side is usable */ + *tp = 't'; + err = access(line, R_OK | W_OK); + *tp = 'p'; + if (!err) + return master; + close(master); + } + } + } + + printk(UM_KERN_ERR "getmaster - no usable host pty devices\n"); + return -ENOENT; +} + +static int pty_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct pty_chan *data = d; + int fd, err; + char dev[sizeof("/dev/ptyxx\0")] = "/dev/ptyxx"; + + fd = getmaster(dev); + if (fd < 0) + return fd; + + if (data->raw) { + err = raw(fd); + if (err) { + close(fd); + return err; + } + } + + if (data->announce) + (*data->announce)(dev, data->dev); + + sprintf(data->dev_name, "%s", dev); + *dev_out = data->dev_name; + + return fd; +} + +const struct chan_ops pty_ops = { + .type = "pty", + .init = pty_chan_init, + .open = pty_open, + .close = generic_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 0, +}; + +const struct chan_ops pts_ops = { + .type = "pts", + .init = pty_chan_init, + .open = pts_open, + .close = generic_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 0, +}; diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c new file mode 100644 index 0000000000..da985e0dc6 --- /dev/null +++ b/arch/um/drivers/random.c @@ -0,0 +1,121 @@ +/* Copyright (C) 2005 - 2008 Jeff Dike <jdike@{linux.intel,addtoit}.com> */ + +/* Much of this ripped from drivers/char/hw_random.c, see there for other + * copyright. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ +#include <linux/sched/signal.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/miscdevice.h> +#include <linux/hw_random.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <init.h> +#include <irq_kern.h> +#include <os.h> + +/* + * core module information + */ +#define RNG_MODULE_NAME "hw_random" + +/* Changed at init time, in the non-modular case, and at module load + * time, in the module case. Presumably, the module subsystem + * protects against a module being loaded twice at the same time. + */ +static int random_fd = -1; +static struct hwrng hwrng; +static DECLARE_COMPLETION(have_data); + +static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block) +{ + int ret; + + for (;;) { + ret = os_read_file(random_fd, buf, max); + if (block && ret == -EAGAIN) { + add_sigio_fd(random_fd); + + ret = wait_for_completion_killable(&have_data); + + ignore_sigio_fd(random_fd); + deactivate_fd(random_fd, RANDOM_IRQ); + + if (ret < 0) + break; + } else { + break; + } + } + + return ret != -EAGAIN ? ret : 0; +} + +static irqreturn_t random_interrupt(int irq, void *data) +{ + complete(&have_data); + + return IRQ_HANDLED; +} + +/* + * rng_init - initialize RNG module + */ +static int __init rng_init (void) +{ + int err; + + err = os_open_file("/dev/random", of_read(OPENFLAGS()), 0); + if (err < 0) + goto out; + + random_fd = err; + err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt, + 0, "random", NULL); + if (err < 0) + goto err_out_cleanup_hw; + + sigio_broken(random_fd); + hwrng.name = RNG_MODULE_NAME; + hwrng.read = rng_dev_read; + + err = hwrng_register(&hwrng); + if (err) { + pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err); + goto err_out_cleanup_hw; + } +out: + return err; + +err_out_cleanup_hw: + os_close_file(random_fd); + random_fd = -1; + goto out; +} + +/* + * rng_cleanup - shutdown RNG module + */ + +static void cleanup(void) +{ + free_irq_by_fd(random_fd); + os_close_file(random_fd); +} + +static void __exit rng_cleanup(void) +{ + hwrng_unregister(&hwrng); + os_close_file(random_fd); +} + +module_init (rng_init); +module_exit (rng_cleanup); +__uml_exitcall(cleanup); + +MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/rtc.h b/arch/um/drivers/rtc.h new file mode 100644 index 0000000000..95e41c7d35 --- /dev/null +++ b/arch/um/drivers/rtc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#ifndef __UM_RTC_H__ +#define __UM_RTC_H__ + +int uml_rtc_start(bool timetravel); +int uml_rtc_enable_alarm(unsigned long long delta_seconds); +void uml_rtc_disable_alarm(void); +void uml_rtc_stop(bool timetravel); +void uml_rtc_send_timetravel_alarm(void); + +#endif /* __UM_RTC_H__ */ diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c new file mode 100644 index 0000000000..97ceb205cf --- /dev/null +++ b/arch/um/drivers/rtc_kern.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/platform_device.h> +#include <linux/time-internal.h> +#include <linux/suspend.h> +#include <linux/err.h> +#include <linux/rtc.h> +#include <kern_util.h> +#include <irq_kern.h> +#include <os.h> +#include "rtc.h" + +static time64_t uml_rtc_alarm_time; +static bool uml_rtc_alarm_enabled; +static struct rtc_device *uml_rtc; +static int uml_rtc_irq_fd, uml_rtc_irq; + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + +static void uml_rtc_time_travel_alarm(struct time_travel_event *ev) +{ + uml_rtc_send_timetravel_alarm(); +} + +static struct time_travel_event uml_rtc_alarm_event = { + .fn = uml_rtc_time_travel_alarm, +}; +#endif + +static int uml_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct timespec64 ts; + + /* Use this to get correct time in time-travel mode */ + read_persistent_clock64(&ts); + rtc_time64_to_tm(timespec64_to_ktime(ts) / NSEC_PER_SEC, tm); + + return 0; +} + +static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + rtc_time64_to_tm(uml_rtc_alarm_time, &alrm->time); + alrm->enabled = uml_rtc_alarm_enabled; + + return 0; +} + +static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) +{ + unsigned long long secs; + + if (!enable && !uml_rtc_alarm_enabled) + return 0; + + uml_rtc_alarm_enabled = enable; + + secs = uml_rtc_alarm_time - ktime_get_real_seconds(); + + if (time_travel_mode == TT_MODE_OFF) { + if (!enable) { + uml_rtc_disable_alarm(); + return 0; + } + + /* enable or update */ + return uml_rtc_enable_alarm(secs); + } else { + time_travel_del_event(¨_rtc_alarm_event); + + if (enable) + time_travel_add_event_rel(¨_rtc_alarm_event, + secs * NSEC_PER_SEC); + } + + return 0; +} + +static int uml_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + uml_rtc_alarm_irq_enable(dev, 0); + uml_rtc_alarm_time = rtc_tm_to_time64(&alrm->time); + uml_rtc_alarm_irq_enable(dev, alrm->enabled); + + return 0; +} + +static const struct rtc_class_ops uml_rtc_ops = { + .read_time = uml_rtc_read_time, + .read_alarm = uml_rtc_read_alarm, + .alarm_irq_enable = uml_rtc_alarm_irq_enable, + .set_alarm = uml_rtc_set_alarm, +}; + +static irqreturn_t uml_rtc_interrupt(int irq, void *data) +{ + unsigned long long c = 0; + + /* alarm triggered, it's now off */ + uml_rtc_alarm_enabled = false; + + os_read_file(uml_rtc_irq_fd, &c, sizeof(c)); + WARN_ON(c == 0); + + pm_system_wakeup(); + rtc_update_irq(uml_rtc, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static int uml_rtc_setup(void) +{ + int err; + + err = uml_rtc_start(time_travel_mode != TT_MODE_OFF); + if (WARN(err < 0, "err = %d\n", err)) + return err; + + uml_rtc_irq_fd = err; + + err = um_request_irq(UM_IRQ_ALLOC, uml_rtc_irq_fd, IRQ_READ, + uml_rtc_interrupt, 0, "rtc", NULL); + if (err < 0) { + uml_rtc_stop(time_travel_mode != TT_MODE_OFF); + return err; + } + + irq_set_irq_wake(err, 1); + + uml_rtc_irq = err; + return 0; +} + +static void uml_rtc_cleanup(void) +{ + um_free_irq(uml_rtc_irq, NULL); + uml_rtc_stop(time_travel_mode != TT_MODE_OFF); +} + +static int uml_rtc_probe(struct platform_device *pdev) +{ + int err; + + err = uml_rtc_setup(); + if (err) + return err; + + uml_rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(uml_rtc)) { + err = PTR_ERR(uml_rtc); + goto cleanup; + } + + uml_rtc->ops = ¨_rtc_ops; + + device_init_wakeup(&pdev->dev, 1); + + err = devm_rtc_register_device(uml_rtc); + if (err) + goto cleanup; + + return 0; +cleanup: + uml_rtc_cleanup(); + return err; +} + +static int uml_rtc_remove(struct platform_device *pdev) +{ + device_init_wakeup(&pdev->dev, 0); + uml_rtc_cleanup(); + return 0; +} + +static struct platform_driver uml_rtc_driver = { + .probe = uml_rtc_probe, + .remove = uml_rtc_remove, + .driver = { + .name = "uml-rtc", + }, +}; + +static int __init uml_rtc_init(void) +{ + struct platform_device *pdev; + int err; + + err = platform_driver_register(¨_rtc_driver); + if (err) + return err; + + pdev = platform_device_alloc("uml-rtc", 0); + if (!pdev) { + err = -ENOMEM; + goto unregister; + } + + err = platform_device_add(pdev); + if (err) + goto unregister; + return 0; + +unregister: + platform_device_put(pdev); + platform_driver_unregister(¨_rtc_driver); + return err; +} +device_initcall(uml_rtc_init); diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c new file mode 100644 index 0000000000..7c3cec4c68 --- /dev/null +++ b/arch/um/drivers/rtc_user.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <stdbool.h> +#include <os.h> +#include <errno.h> +#include <sched.h> +#include <unistd.h> +#include <kern_util.h> +#include <sys/select.h> +#include <stdio.h> +#include <sys/timerfd.h> +#include "rtc.h" + +static int uml_rtc_irq_fds[2]; + +void uml_rtc_send_timetravel_alarm(void) +{ + unsigned long long c = 1; + + CATCH_EINTR(write(uml_rtc_irq_fds[1], &c, sizeof(c))); +} + +int uml_rtc_start(bool timetravel) +{ + int err; + + if (timetravel) { + int err = os_pipe(uml_rtc_irq_fds, 1, 1); + if (err) + goto fail; + } else { + uml_rtc_irq_fds[0] = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); + if (uml_rtc_irq_fds[0] < 0) { + err = -errno; + goto fail; + } + + /* apparently timerfd won't send SIGIO, use workaround */ + sigio_broken(uml_rtc_irq_fds[0]); + err = add_sigio_fd(uml_rtc_irq_fds[0]); + if (err < 0) { + close(uml_rtc_irq_fds[0]); + goto fail; + } + } + + return uml_rtc_irq_fds[0]; +fail: + uml_rtc_stop(timetravel); + return err; +} + +int uml_rtc_enable_alarm(unsigned long long delta_seconds) +{ + struct itimerspec it = { + .it_value = { + .tv_sec = delta_seconds, + }, + }; + + if (timerfd_settime(uml_rtc_irq_fds[0], 0, &it, NULL)) + return -errno; + return 0; +} + +void uml_rtc_disable_alarm(void) +{ + uml_rtc_enable_alarm(0); +} + +void uml_rtc_stop(bool timetravel) +{ + if (timetravel) + os_close_file(uml_rtc_irq_fds[1]); + else + ignore_sigio_fd(uml_rtc_irq_fds[0]); + os_close_file(uml_rtc_irq_fds[0]); +} diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h new file mode 100644 index 0000000000..0f3b7ca994 --- /dev/null +++ b/arch/um/drivers/slip.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SLIP_H +#define __UM_SLIP_H + +#include "slip_common.h" + +struct slip_data { + void *dev; + char name[sizeof("slnnnnn\0")]; + char *addr; + char *gate_addr; + int slave; + struct slip_proto slip; +}; + +extern const struct net_user_info slip_user_info; + +extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); +extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); + +#endif diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c new file mode 100644 index 0000000000..20fe4f4274 --- /dev/null +++ b/arch/um/drivers/slip_common.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include "slip_common.h" +#include <net_user.h> + +int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip) +{ + int i, n, size, start; + + if(slip->more > 0){ + i = 0; + while(i < slip->more){ + size = slip_unesc(slip->ibuf[i++], slip->ibuf, + &slip->pos, &slip->esc); + if(size){ + memcpy(buf, slip->ibuf, size); + memmove(slip->ibuf, &slip->ibuf[i], + slip->more - i); + slip->more = slip->more - i; + return size; + } + } + slip->more = 0; + } + + n = net_read(fd, &slip->ibuf[slip->pos], + sizeof(slip->ibuf) - slip->pos); + if(n <= 0) + return n; + + start = slip->pos; + for(i = 0; i < n; i++){ + size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos, + &slip->esc); + if(size){ + memcpy(buf, slip->ibuf, size); + memmove(slip->ibuf, &slip->ibuf[start+i+1], + n - (i + 1)); + slip->more = n - (i + 1); + return size; + } + } + return 0; +} + +int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip) +{ + int actual, n; + + actual = slip_esc(buf, slip->obuf, len); + n = net_write(fd, slip->obuf, actual); + if(n < 0) + return n; + else return len; +} diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h new file mode 100644 index 0000000000..d3798b5caf --- /dev/null +++ b/arch/um/drivers/slip_common.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SLIP_COMMON_H +#define __UM_SLIP_COMMON_H + +#define BUF_SIZE 1500 + /* two bytes each for a (pathological) max packet of escaped chars + * + * terminating END char + initial END char */ +#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) + +/* SLIP protocol characters. */ +#define SLIP_END 0300 /* indicates end of frame */ +#define SLIP_ESC 0333 /* indicates byte stuffing */ +#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ +#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ + +static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos, + int *esc) +{ + int ret; + + switch(c){ + case SLIP_END: + *esc = 0; + ret=*pos; + *pos=0; + return(ret); + case SLIP_ESC: + *esc = 1; + return(0); + case SLIP_ESC_ESC: + if(*esc){ + *esc = 0; + c = SLIP_ESC; + } + break; + case SLIP_ESC_END: + if(*esc){ + *esc = 0; + c = SLIP_END; + } + break; + } + buf[(*pos)++] = c; + return(0); +} + +static inline int slip_esc(unsigned char *s, unsigned char *d, int len) +{ + unsigned char *ptr = d; + unsigned char c; + + /* + * Send an initial END character to flush out any + * data that may have accumulated in the receiver + * due to line noise. + */ + + *ptr++ = SLIP_END; + + /* + * For each byte in the packet, send the appropriate + * character sequence, according to the SLIP protocol. + */ + + while (len-- > 0) { + switch(c = *s++) { + case SLIP_END: + *ptr++ = SLIP_ESC; + *ptr++ = SLIP_ESC_END; + break; + case SLIP_ESC: + *ptr++ = SLIP_ESC; + *ptr++ = SLIP_ESC_ESC; + break; + default: + *ptr++ = c; + break; + } + } + *ptr++ = SLIP_END; + return (ptr - d); +} + +struct slip_proto { + unsigned char ibuf[ENC_BUF_SIZE]; + unsigned char obuf[ENC_BUF_SIZE]; + int more; /* more data: do not read fd until ibuf has been drained */ + int pos; + int esc; +}; + +static inline void slip_proto_init(struct slip_proto * slip) +{ + memset(slip->ibuf, 0, sizeof(slip->ibuf)); + memset(slip->obuf, 0, sizeof(slip->obuf)); + slip->more = 0; + slip->pos = 0; + slip->esc = 0; +} + +extern int slip_proto_read(int fd, void *buf, int len, + struct slip_proto *slip); +extern int slip_proto_write(int fd, void *buf, int len, + struct slip_proto *slip); + +#endif diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c new file mode 100644 index 0000000000..c58ccdcc16 --- /dev/null +++ b/arch/um/drivers/slip_kern.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/if_arp.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <net_kern.h> +#include "slip.h" + +struct slip_init { + char *gate_addr; +}; + +static void slip_init(struct net_device *dev, void *data) +{ + struct uml_net_private *private; + struct slip_data *spri; + struct slip_init *init = data; + + private = netdev_priv(dev); + spri = (struct slip_data *) private->user; + + memset(spri->name, 0, sizeof(spri->name)); + spri->addr = NULL; + spri->gate_addr = init->gate_addr; + spri->slave = -1; + spri->dev = dev; + + slip_proto_init(&spri->slip); + + dev->hard_header_len = 0; + dev->header_ops = NULL; + dev->addr_len = 0; + dev->type = ARPHRD_SLIP; + dev->tx_queue_len = 256; + dev->flags = IFF_NOARP; + printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); +} + +static unsigned short slip_protocol(struct sk_buff *skbuff) +{ + return htons(ETH_P_IP); +} + +static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu, + (struct slip_data *) &lp->user); +} + +static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slip_user_write(fd, skb->data, skb->len, + (struct slip_data *) &lp->user); +} + +static const struct net_kern_info slip_kern_info = { + .init = slip_init, + .protocol = slip_protocol, + .read = slip_read, + .write = slip_write, +}; + +static int slip_setup(char *str, char **mac_out, void *data) +{ + struct slip_init *init = data; + + *init = ((struct slip_init) { .gate_addr = NULL }); + + if (str[0] != '\0') + init->gate_addr = str; + return 1; +} + +static struct transport slip_transport = { + .list = LIST_HEAD_INIT(slip_transport.list), + .name = "slip", + .setup = slip_setup, + .user = &slip_user_info, + .kern = &slip_kern_info, + .private_size = sizeof(struct slip_data), + .setup_size = sizeof(struct slip_init), +}; + +static int register_slip(void) +{ + register_transport(&slip_transport); + return 0; +} + +late_initcall(register_slip); diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c new file mode 100644 index 0000000000..7334019c9e --- /dev/null +++ b/arch/um/drivers/slip_user.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <termios.h> +#include <sys/wait.h> +#include <net_user.h> +#include <os.h> +#include "slip.h" +#include <um_malloc.h> + +static int slip_user_init(void *data, void *dev) +{ + struct slip_data *pri = data; + + pri->dev = dev; + return 0; +} + +static int set_up_tty(int fd) +{ + int i; + struct termios tios; + + if (tcgetattr(fd, &tios) < 0) { + printk(UM_KERN_ERR "could not get initial terminal " + "attributes\n"); + return -1; + } + + tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; + tios.c_iflag = IGNBRK | IGNPAR; + tios.c_oflag = 0; + tios.c_lflag = 0; + for (i = 0; i < NCCS; i++) + tios.c_cc[i] = 0; + tios.c_cc[VMIN] = 1; + tios.c_cc[VTIME] = 0; + + cfsetospeed(&tios, B38400); + cfsetispeed(&tios, B38400); + + if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { + printk(UM_KERN_ERR "failed to set terminal attributes\n"); + return -1; + } + return 0; +} + +struct slip_pre_exec_data { + int stdin_fd; + int stdout_fd; + int close_me; +}; + +static void slip_pre_exec(void *arg) +{ + struct slip_pre_exec_data *data = arg; + + if (data->stdin_fd >= 0) + dup2(data->stdin_fd, 0); + dup2(data->stdout_fd, 1); + if (data->close_me >= 0) + close(data->close_me); +} + +static int slip_tramp(char **argv, int fd) +{ + struct slip_pre_exec_data pe_data; + char *output; + int pid, fds[2], err, output_len; + + err = os_pipe(fds, 1, 0); + if (err < 0) { + printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n", + -err); + goto out; + } + + err = 0; + pe_data.stdin_fd = fd; + pe_data.stdout_fd = fds[1]; + pe_data.close_me = fds[0]; + err = run_helper(slip_pre_exec, &pe_data, argv); + if (err < 0) + goto out_close; + pid = err; + + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + if (output == NULL) { + printk(UM_KERN_ERR "slip_tramp : failed to allocate output " + "buffer\n"); + os_kill_process(pid, 1); + err = -ENOMEM; + goto out_close; + } + + close(fds[1]); + read_output(fds[0], output, output_len); + printk("%s", output); + + err = helper_wait(pid); + close(fds[0]); + + kfree(output); + return err; + +out_close: + close(fds[0]); + close(fds[1]); +out: + return err; +} + +static int slip_open(void *data) +{ + struct slip_data *pri = data; + char version_buf[sizeof("nnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, + NULL }; + int sfd, mfd, err; + + err = get_pty(); + if (err < 0) { + printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n", + -err); + goto out; + } + mfd = err; + + err = open(ptsname(mfd), O_RDWR, 0); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't open tty for slip line, " + "err = %d\n", -err); + goto out_close; + } + sfd = err; + + err = set_up_tty(sfd); + if (err) + goto out_close2; + + pri->slave = sfd; + pri->slip.pos = 0; + pri->slip.esc = 0; + if (pri->gate_addr != NULL) { + sprintf(version_buf, "%d", UML_NET_VERSION); + strcpy(gate_buf, pri->gate_addr); + + err = slip_tramp(argv, sfd); + + if (err < 0) { + printk(UM_KERN_ERR "slip_tramp failed - err = %d\n", + -err); + goto out_close2; + } + err = os_get_ifname(pri->slave, pri->name); + if (err < 0) { + printk(UM_KERN_ERR "get_ifname failed, err = %d\n", + -err); + goto out_close2; + } + iter_addresses(pri->dev, open_addr, pri->name); + } + else { + err = os_set_slip(sfd); + if (err < 0) { + printk(UM_KERN_ERR "Failed to set slip discipline " + "encapsulation - err = %d\n", -err); + goto out_close2; + } + } + return mfd; +out_close2: + close(sfd); +out_close: + close(mfd); +out: + return err; +} + +static void slip_close(int fd, void *data) +{ + struct slip_data *pri = data; + char version_buf[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, + NULL }; + int err; + + if (pri->gate_addr != NULL) + iter_addresses(pri->dev, close_addr, pri->name); + + sprintf(version_buf, "%d", UML_NET_VERSION); + + err = slip_tramp(argv, pri->slave); + + if (err != 0) + printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err); + close(fd); + close(pri->slave); + pri->slave = -1; +} + +int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) +{ + return slip_proto_read(fd, buf, len, &pri->slip); +} + +int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) +{ + return slip_proto_write(fd, buf, len, &pri->slip); +} + +static void slip_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct slip_data *pri = data; + + if (pri->slave < 0) + return; + open_addr(addr, netmask, pri->name); +} + +static void slip_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct slip_data *pri = data; + + if (pri->slave < 0) + return; + close_addr(addr, netmask, pri->name); +} + +const struct net_user_info slip_user_info = { + .init = slip_user_init, + .open = slip_open, + .close = slip_close, + .remove = NULL, + .add_address = slip_add_addr, + .delete_address = slip_del_addr, + .mtu = BUF_SIZE, + .max_packet = BUF_SIZE, +}; diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h new file mode 100644 index 0000000000..4aef2b8824 --- /dev/null +++ b/arch/um/drivers/slirp.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SLIRP_H +#define __UM_SLIRP_H + +#include "slip_common.h" + +#define SLIRP_MAX_ARGS 100 +/* + * XXX this next definition is here because I don't understand why this + * initializer doesn't work in slirp_kern.c: + * + * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, + * + * or why I can't typecast like this: + * + * argv : (char* [SLIRP_MAX_ARGS])(init->argv), + */ +struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; + +struct slirp_data { + void *dev; + struct arg_list_dummy_wrapper argw; + int pid; + int slave; + struct slip_proto slip; +}; + +extern const struct net_user_info slirp_user_info; + +extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); +extern int slirp_user_write(int fd, void *buf, int len, + struct slirp_data *pri); + +#endif diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c new file mode 100644 index 0000000000..0a6151ee95 --- /dev/null +++ b/arch/um/drivers/slirp_kern.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/if_arp.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <net_kern.h> +#include <net_user.h> +#include "slirp.h" + +struct slirp_init { + struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ +}; + +static void slirp_init(struct net_device *dev, void *data) +{ + struct uml_net_private *private; + struct slirp_data *spri; + struct slirp_init *init = data; + int i; + + private = netdev_priv(dev); + spri = (struct slirp_data *) private->user; + + spri->argw = init->argw; + spri->pid = -1; + spri->slave = -1; + spri->dev = dev; + + slip_proto_init(&spri->slip); + + dev->hard_header_len = 0; + dev->header_ops = NULL; + dev->addr_len = 0; + dev->type = ARPHRD_SLIP; + dev->tx_queue_len = 256; + dev->flags = IFF_NOARP; + printk("SLIRP backend - command line:"); + for (i = 0; spri->argw.argv[i] != NULL; i++) + printk(" '%s'",spri->argw.argv[i]); + printk("\n"); +} + +static unsigned short slirp_protocol(struct sk_buff *skbuff) +{ + return htons(ETH_P_IP); +} + +static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu, + (struct slirp_data *) &lp->user); +} + +static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slirp_user_write(fd, skb->data, skb->len, + (struct slirp_data *) &lp->user); +} + +const struct net_kern_info slirp_kern_info = { + .init = slirp_init, + .protocol = slirp_protocol, + .read = slirp_read, + .write = slirp_write, +}; + +static int slirp_setup(char *str, char **mac_out, void *data) +{ + struct slirp_init *init = data; + int i=0; + + *init = ((struct slirp_init) { .argw = { { "slirp", NULL } } }); + + str = split_if_spec(str, mac_out, NULL); + + if (str == NULL) /* no command line given after MAC addr */ + return 1; + + do { + if (i >= SLIRP_MAX_ARGS - 1) { + printk(KERN_WARNING "slirp_setup: truncating slirp " + "arguments\n"); + break; + } + init->argw.argv[i++] = str; + while(*str && *str!=',') { + if (*str == '_') + *str=' '; + str++; + } + if (*str != ',') + break; + *str++ = '\0'; + } while (1); + + init->argw.argv[i] = NULL; + return 1; +} + +static struct transport slirp_transport = { + .list = LIST_HEAD_INIT(slirp_transport.list), + .name = "slirp", + .setup = slirp_setup, + .user = &slirp_user_info, + .kern = &slirp_kern_info, + .private_size = sizeof(struct slirp_data), + .setup_size = sizeof(struct slirp_init), +}; + +static int register_slirp(void) +{ + register_transport(&slirp_transport); + return 0; +} + +late_initcall(register_slirp); diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c new file mode 100644 index 0000000000..8f633e2e5f --- /dev/null +++ b/arch/um/drivers/slirp_user.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/wait.h> +#include <net_user.h> +#include <os.h> +#include "slirp.h" + +static int slirp_user_init(void *data, void *dev) +{ + struct slirp_data *pri = data; + + pri->dev = dev; + return 0; +} + +struct slirp_pre_exec_data { + int stdin_fd; + int stdout_fd; +}; + +static void slirp_pre_exec(void *arg) +{ + struct slirp_pre_exec_data *data = arg; + + if (data->stdin_fd != -1) + dup2(data->stdin_fd, 0); + if (data->stdout_fd != -1) + dup2(data->stdout_fd, 1); +} + +static int slirp_tramp(char **argv, int fd) +{ + struct slirp_pre_exec_data pe_data; + int pid; + + pe_data.stdin_fd = fd; + pe_data.stdout_fd = fd; + pid = run_helper(slirp_pre_exec, &pe_data, argv); + + return pid; +} + +static int slirp_open(void *data) +{ + struct slirp_data *pri = data; + int fds[2], pid, err; + + err = os_pipe(fds, 1, 1); + if (err) + return err; + + err = slirp_tramp(pri->argw.argv, fds[1]); + if (err < 0) { + printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err); + goto out; + } + pid = err; + + pri->slave = fds[1]; + pri->slip.pos = 0; + pri->slip.esc = 0; + pri->pid = err; + + return fds[0]; +out: + close(fds[0]); + close(fds[1]); + return err; +} + +static void slirp_close(int fd, void *data) +{ + struct slirp_data *pri = data; + int err; + + close(fd); + close(pri->slave); + + pri->slave = -1; + + if (pri->pid<1) { + printk(UM_KERN_ERR "slirp_close: no child process to shut " + "down\n"); + return; + } + +#if 0 + if (kill(pri->pid, SIGHUP)<0) { + printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed " + "(%d)\n", pri->pid, errno); + } +#endif + err = helper_wait(pri->pid); + if (err < 0) + return; + + pri->pid = -1; +} + +int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) +{ + return slip_proto_read(fd, buf, len, &pri->slip); +} + +int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) +{ + return slip_proto_write(fd, buf, len, &pri->slip); +} + +const struct net_user_info slirp_user_info = { + .init = slirp_user_init, + .open = slirp_open, + .close = slirp_close, + .remove = NULL, + .add_address = NULL, + .delete_address = NULL, + .mtu = BUF_SIZE, + .max_packet = BUF_SIZE, +}; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c new file mode 100644 index 0000000000..277cea3d30 --- /dev/null +++ b/arch/um/drivers/ssl.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) + */ + +#include <linux/fs.h> +#include <linux/tty.h> +#include <linux/tty_driver.h> +#include <linux/major.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/console.h> +#include <asm/termbits.h> +#include <asm/irq.h> +#include "chan.h" +#include <init.h> +#include <irq_user.h> +#include "mconsole_kern.h" + +static const int ssl_version = 1; + +#define NR_PORTS 64 + +static void ssl_announce(char *dev_name, int dev) +{ + printk(KERN_INFO "Serial line %d assigned device '%s'\n", dev, + dev_name); +} + +/* Almost const, except that xterm_title may be changed in an initcall */ +static struct chan_opts opts = { + .announce = ssl_announce, + .xterm_title = "Serial Line #%d", + .raw = 1, +}; + +static int ssl_config(char *str, char **error_out); +static int ssl_get_config(char *dev, char *str, int size, char **error_out); +static int ssl_remove(int n, char **error_out); + + +/* Const, except for .mc.list */ +static struct line_driver driver = { + .name = "UML serial line", + .device_name = "ttyS", + .major = TTY_MAJOR, + .minor_start = 64, + .type = TTY_DRIVER_TYPE_SERIAL, + .subtype = 0, + .read_irq_name = "ssl", + .write_irq_name = "ssl-write", + .mc = { + .list = LIST_HEAD_INIT(driver.mc.list), + .name = "ssl", + .config = ssl_config, + .get_config = ssl_get_config, + .id = line_id, + .remove = ssl_remove, + }, +}; + +/* The array is initialized by line_init, at initcall time. The + * elements are locked individually as needed. + */ +static char *conf[NR_PORTS]; +static char *def_conf = CONFIG_SSL_CHAN; +static struct line serial_lines[NR_PORTS]; + +static int ssl_config(char *str, char **error_out) +{ + return line_config(serial_lines, ARRAY_SIZE(serial_lines), str, &opts, + error_out); +} + +static int ssl_get_config(char *dev, char *str, int size, char **error_out) +{ + return line_get_config(dev, serial_lines, ARRAY_SIZE(serial_lines), str, + size, error_out); +} + +static int ssl_remove(int n, char **error_out) +{ + return line_remove(serial_lines, ARRAY_SIZE(serial_lines), n, + error_out); +} + +static int ssl_install(struct tty_driver *driver, struct tty_struct *tty) +{ + return line_install(driver, tty, &serial_lines[tty->index]); +} + +static const struct tty_operations ssl_ops = { + .open = line_open, + .close = line_close, + .write = line_write, + .write_room = line_write_room, + .chars_in_buffer = line_chars_in_buffer, + .flush_buffer = line_flush_buffer, + .flush_chars = line_flush_chars, + .throttle = line_throttle, + .unthrottle = line_unthrottle, + .install = ssl_install, + .hangup = line_hangup, +}; + +/* Changed by ssl_init and referenced by ssl_exit, which are both serialized + * by being an initcall and exitcall, respectively. + */ +static int ssl_init_done; + +static void ssl_console_write(struct console *c, const char *string, + unsigned len) +{ + struct line *line = &serial_lines[c->index]; + unsigned long flags; + + spin_lock_irqsave(&line->lock, flags); + console_write_chan(line->chan_out, string, len); + spin_unlock_irqrestore(&line->lock, flags); +} + +static struct tty_driver *ssl_console_device(struct console *c, int *index) +{ + *index = c->index; + return driver.driver; +} + +static int ssl_console_setup(struct console *co, char *options) +{ + struct line *line = &serial_lines[co->index]; + + return console_open_chan(line, co); +} + +/* No locking for register_console call - relies on single-threaded initcalls */ +static struct console ssl_cons = { + .name = "ttyS", + .write = ssl_console_write, + .device = ssl_console_device, + .setup = ssl_console_setup, + .flags = CON_PRINTBUFFER|CON_ANYTIME, + .index = -1, +}; + +static int ssl_init(void) +{ + char *new_title; + int err; + int i; + + printk(KERN_INFO "Initializing software serial port version %d\n", + ssl_version); + + err = register_lines(&driver, &ssl_ops, serial_lines, + ARRAY_SIZE(serial_lines)); + if (err) + return err; + + new_title = add_xterm_umid(opts.xterm_title); + if (new_title != NULL) + opts.xterm_title = new_title; + + for (i = 0; i < NR_PORTS; i++) { + char *error; + char *s = conf[i]; + if (!s) + s = def_conf; + if (setup_one_line(serial_lines, i, s, &opts, &error)) + printk(KERN_ERR "setup_one_line failed for " + "device %d : %s\n", i, error); + } + + ssl_init_done = 1; + register_console(&ssl_cons); + return 0; +} +late_initcall(ssl_init); + +static void ssl_exit(void) +{ + if (!ssl_init_done) + return; + close_lines(serial_lines, ARRAY_SIZE(serial_lines)); +} +__uml_exitcall(ssl_exit); + +static int ssl_chan_setup(char *str) +{ + line_setup(conf, NR_PORTS, &def_conf, str, "serial line"); + return 1; +} + +__setup("ssl", ssl_chan_setup); +__channel_help(ssl_chan_setup, "ssl"); + +static int ssl_non_raw_setup(char *str) +{ + opts.raw = 0; + return 1; +} +__setup("ssl-non-raw", ssl_non_raw_setup); +__channel_help(ssl_non_raw_setup, "set serial lines to non-raw mode"); diff --git a/arch/um/drivers/stderr_console.c b/arch/um/drivers/stderr_console.c new file mode 100644 index 0000000000..ecc3a58149 --- /dev/null +++ b/arch/um/drivers/stderr_console.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/console.h> + +#include "chan_user.h" + +/* ----------------------------------------------------------------------------- */ +/* trivial console driver -- simply dump everything to stderr */ + +/* + * Don't register by default -- as this registers very early in the + * boot process it becomes the default console. + * + * Initialized at init time. + */ +static int use_stderr_console = 0; + +static void stderr_console_write(struct console *console, const char *string, + unsigned len) +{ + generic_write(2 /* stderr */, string, len, NULL); +} + +static struct console stderr_console = { + .name = "stderr", + .write = stderr_console_write, + .flags = CON_PRINTBUFFER, +}; + +static int __init stderr_console_init(void) +{ + if (use_stderr_console) + register_console(&stderr_console); + return 0; +} +console_initcall(stderr_console_init); + +static int stderr_setup(char *str) +{ + if (!str) + return 0; + use_stderr_console = simple_strtoul(str,&str,0); + return 1; +} +__setup("stderr=", stderr_setup); + +/* The previous behavior of not unregistering led to /dev/console being + * impossible to open. My FC5 filesystem started having init die, and the + * system panicing because of this. Unregistering causes the real + * console to become the default console, and /dev/console can then be + * opened. Making this an initcall makes this happen late enough that + * there is no added value in dumping everything to stderr, and the + * normal console is good enough to show you all available output. + */ +static int __init unregister_stderr(void) +{ + unregister_console(&stderr_console); + + return 0; +} + +__initcall(unregister_stderr); diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c new file mode 100644 index 0000000000..1c239737d8 --- /dev/null +++ b/arch/um/drivers/stdio_console.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + */ + +#include <linux/posix_types.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/types.h> +#include <linux/major.h> +#include <linux/kdev_t.h> +#include <linux/console.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/hardirq.h> +#include <asm/current.h> +#include <asm/irq.h> +#include "stdio_console.h" +#include "chan.h" +#include <irq_user.h> +#include "mconsole_kern.h" +#include <init.h> + +#define MAX_TTYS (16) + +static void stdio_announce(char *dev_name, int dev) +{ + printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev, + dev_name); +} + +/* Almost const, except that xterm_title may be changed in an initcall */ +static struct chan_opts opts = { + .announce = stdio_announce, + .xterm_title = "Virtual Console #%d", + .raw = 1, +}; + +static int con_config(char *str, char **error_out); +static int con_get_config(char *dev, char *str, int size, char **error_out); +static int con_remove(int n, char **con_remove); + + +/* Const, except for .mc.list */ +static struct line_driver driver = { + .name = "UML console", + .device_name = "tty", + .major = TTY_MAJOR, + .minor_start = 0, + .type = TTY_DRIVER_TYPE_CONSOLE, + .subtype = SYSTEM_TYPE_CONSOLE, + .read_irq_name = "console", + .write_irq_name = "console-write", + .mc = { + .list = LIST_HEAD_INIT(driver.mc.list), + .name = "con", + .config = con_config, + .get_config = con_get_config, + .id = line_id, + .remove = con_remove, + }, +}; + +/* The array is initialized by line_init, at initcall time. The + * elements are locked individually as needed. + */ +static char *vt_conf[MAX_TTYS]; +static char *def_conf; +static struct line vts[MAX_TTYS]; + +static int con_config(char *str, char **error_out) +{ + return line_config(vts, ARRAY_SIZE(vts), str, &opts, error_out); +} + +static int con_get_config(char *dev, char *str, int size, char **error_out) +{ + return line_get_config(dev, vts, ARRAY_SIZE(vts), str, size, error_out); +} + +static int con_remove(int n, char **error_out) +{ + return line_remove(vts, ARRAY_SIZE(vts), n, error_out); +} + +/* Set in an initcall, checked in an exitcall */ +static int con_init_done; + +static int con_install(struct tty_driver *driver, struct tty_struct *tty) +{ + return line_install(driver, tty, &vts[tty->index]); +} + +static const struct tty_operations console_ops = { + .open = line_open, + .install = con_install, + .close = line_close, + .write = line_write, + .write_room = line_write_room, + .chars_in_buffer = line_chars_in_buffer, + .flush_buffer = line_flush_buffer, + .flush_chars = line_flush_chars, + .throttle = line_throttle, + .unthrottle = line_unthrottle, + .hangup = line_hangup, +}; + +static void uml_console_write(struct console *console, const char *string, + unsigned len) +{ + struct line *line = &vts[console->index]; + unsigned long flags; + + spin_lock_irqsave(&line->lock, flags); + console_write_chan(line->chan_out, string, len); + spin_unlock_irqrestore(&line->lock, flags); +} + +static struct tty_driver *uml_console_device(struct console *c, int *index) +{ + *index = c->index; + return driver.driver; +} + +static int uml_console_setup(struct console *co, char *options) +{ + struct line *line = &vts[co->index]; + + return console_open_chan(line, co); +} + +/* No locking for register_console call - relies on single-threaded initcalls */ +static struct console stdiocons = { + .name = "tty", + .write = uml_console_write, + .device = uml_console_device, + .setup = uml_console_setup, + .flags = CON_PRINTBUFFER|CON_ANYTIME, + .index = -1, +}; + +static int stdio_init(void) +{ + char *new_title; + int err; + int i; + + err = register_lines(&driver, &console_ops, vts, + ARRAY_SIZE(vts)); + if (err) + return err; + + printk(KERN_INFO "Initialized stdio console driver\n"); + + new_title = add_xterm_umid(opts.xterm_title); + if(new_title != NULL) + opts.xterm_title = new_title; + + for (i = 0; i < MAX_TTYS; i++) { + char *error; + char *s = vt_conf[i]; + if (!s) + s = def_conf; + if (!s) + s = i ? CONFIG_CON_CHAN : CONFIG_CON_ZERO_CHAN; + if (setup_one_line(vts, i, s, &opts, &error)) + printk(KERN_ERR "setup_one_line failed for " + "device %d : %s\n", i, error); + } + + con_init_done = 1; + register_console(&stdiocons); + return 0; +} +late_initcall(stdio_init); + +static void console_exit(void) +{ + if (!con_init_done) + return; + close_lines(vts, ARRAY_SIZE(vts)); +} +__uml_exitcall(console_exit); + +static int console_chan_setup(char *str) +{ + if (!strncmp(str, "sole=", 5)) /* console= option specifies tty */ + return 0; + + line_setup(vt_conf, MAX_TTYS, &def_conf, str, "console"); + return 1; +} +__setup("con", console_chan_setup); +__channel_help(console_chan_setup, "con"); diff --git a/arch/um/drivers/stdio_console.h b/arch/um/drivers/stdio_console.h new file mode 100644 index 0000000000..3a409ec23d --- /dev/null +++ b/arch/um/drivers/stdio_console.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __STDIO_CONSOLE_H +#define __STDIO_CONSOLE_H + +extern void save_console_flags(void); +#endif + diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c new file mode 100644 index 0000000000..884a762d21 --- /dev/null +++ b/arch/um/drivers/tty.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <errno.h> +#include <fcntl.h> +#include <termios.h> +#include "chan_user.h" +#include <os.h> +#include <um_malloc.h> + +struct tty_chan { + char *dev; + int raw; + struct termios tt; +}; + +static void *tty_chan_init(char *str, int device, const struct chan_opts *opts) +{ + struct tty_chan *data; + + if (*str != ':') { + printk(UM_KERN_ERR "tty_init : channel type 'tty' must specify " + "a device\n"); + return NULL; + } + str++; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + *data = ((struct tty_chan) { .dev = str, + .raw = opts->raw }); + + return data; +} + +static int tty_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct tty_chan *data = d; + int fd, err, mode = 0; + + if (input && output) + mode = O_RDWR; + else if (input) + mode = O_RDONLY; + else if (output) + mode = O_WRONLY; + + fd = open(data->dev, mode); + if (fd < 0) + return -errno; + + if (data->raw) { + CATCH_EINTR(err = tcgetattr(fd, &data->tt)); + if (err) + return err; + + err = raw(fd); + if (err) + return err; + } + + *dev_out = data->dev; + return fd; +} + +const struct chan_ops tty_ops = { + .type = "tty", + .init = tty_chan_init, + .open = tty_open, + .close = generic_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 0, +}; diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h new file mode 100644 index 0000000000..f016fe1549 --- /dev/null +++ b/arch/um/drivers/ubd.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) + */ + +#ifndef __UM_UBD_USER_H +#define __UM_UBD_USER_H + +extern int start_io_thread(unsigned long sp, int *fds_out); +extern int io_thread(void *arg); +extern int kernel_fd; + +extern int ubd_read_poll(int timeout); +extern int ubd_write_poll(int timeout); + +#define UBD_REQ_BUFFER_SIZE 64 + +#endif + diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c new file mode 100644 index 0000000000..50206feac5 --- /dev/null +++ b/arch/um/drivers/ubd_kern.c @@ -0,0 +1,1599 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Cambridge Greys Ltd + * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + */ + +/* 2001-09-28...2002-04-17 + * Partition stuff by James_McMechan@hotmail.com + * old style ubd by setting UBD_SHIFT to 0 + * 2002-09-27...2002-10-18 massive tinkering for 2.5 + * partitions have changed in 2.5 + * 2003-01-29 more tinkering for 2.5.59-1 + * This should now address the sysfs problems and has + * the symlink for devfs to allow for booting with + * the common /dev/ubd/discX/... names rather than + * only /dev/ubdN/discN this version also has lots of + * clean ups preparing for ubd-many. + * James McMechan + */ + +#define UBD_SHIFT 4 + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/ata.h> +#include <linux/hdreg.h> +#include <linux/major.h> +#include <linux/cdrom.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/platform_device.h> +#include <linux/scatterlist.h> +#include <asm/tlbflush.h> +#include <kern_util.h> +#include "mconsole_kern.h" +#include <init.h> +#include <irq_kern.h> +#include "ubd.h" +#include <os.h> +#include "cow.h" + +/* Max request size is determined by sector mask - 32K */ +#define UBD_MAX_REQUEST (8 * sizeof(long)) + +struct io_desc { + char *buffer; + unsigned long length; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; +}; + +struct io_thread_req { + struct request *req; + int fds[2]; + unsigned long offsets[2]; + unsigned long long offset; + int sectorsize; + int error; + + int desc_cnt; + /* io_desc has to be the last element of the struct */ + struct io_desc io_desc[]; +}; + + +static struct io_thread_req * (*irq_req_buffer)[]; +static struct io_thread_req *irq_remainder; +static int irq_remainder_size; + +static struct io_thread_req * (*io_req_buffer)[]; +static struct io_thread_req *io_remainder; +static int io_remainder_size; + + + +static inline int ubd_test_bit(__u64 bit, unsigned char *data) +{ + __u64 n; + int bits, off; + + bits = sizeof(data[0]) * 8; + n = bit / bits; + off = bit % bits; + return (data[n] & (1 << off)) != 0; +} + +static inline void ubd_set_bit(__u64 bit, unsigned char *data) +{ + __u64 n; + int bits, off; + + bits = sizeof(data[0]) * 8; + n = bit / bits; + off = bit % bits; + data[n] |= (1 << off); +} +/*End stuff from ubd_user.h*/ + +#define DRIVER_NAME "uml-blkdev" + +static DEFINE_MUTEX(ubd_lock); +static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ + +static int ubd_open(struct gendisk *disk, blk_mode_t mode); +static void ubd_release(struct gendisk *disk); +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, + unsigned int cmd, unsigned long arg); +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); + +#define MAX_DEV (16) + +static const struct block_device_operations ubd_blops = { + .owner = THIS_MODULE, + .open = ubd_open, + .release = ubd_release, + .ioctl = ubd_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, + .getgeo = ubd_getgeo, +}; + +/* Protected by ubd_lock */ +static struct gendisk *ubd_gendisk[MAX_DEV]; + +#ifdef CONFIG_BLK_DEV_UBD_SYNC +#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ + .cl = 1 }) +#else +#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ + .cl = 1 }) +#endif +static struct openflags global_openflags = OPEN_FLAGS; + +struct cow { + /* backing file name */ + char *file; + /* backing file fd */ + int fd; + unsigned long *bitmap; + unsigned long bitmap_len; + int bitmap_offset; + int data_offset; +}; + +#define MAX_SG 64 + +struct ubd { + /* name (and fd, below) of the file opened for writing, either the + * backing or the cow file. */ + char *file; + char *serial; + int count; + int fd; + __u64 size; + struct openflags boot_openflags; + struct openflags openflags; + unsigned shared:1; + unsigned no_cow:1; + unsigned no_trim:1; + struct cow cow; + struct platform_device pdev; + struct request_queue *queue; + struct blk_mq_tag_set tag_set; + spinlock_t lock; +}; + +#define DEFAULT_COW { \ + .file = NULL, \ + .fd = -1, \ + .bitmap = NULL, \ + .bitmap_offset = 0, \ + .data_offset = 0, \ +} + +#define DEFAULT_UBD { \ + .file = NULL, \ + .serial = NULL, \ + .count = 0, \ + .fd = -1, \ + .size = -1, \ + .boot_openflags = OPEN_FLAGS, \ + .openflags = OPEN_FLAGS, \ + .no_cow = 0, \ + .no_trim = 0, \ + .shared = 0, \ + .cow = DEFAULT_COW, \ + .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ +} + +/* Protected by ubd_lock */ +static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; + +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); + +static int fake_ide_setup(char *str) +{ + pr_warn("The fake_ide option has been removed\n"); + return 1; +} +__setup("fake_ide", fake_ide_setup); + +__uml_help(fake_ide_setup, +"fake_ide\n" +" Obsolete stub.\n\n" +); + +static int parse_unit(char **ptr) +{ + char *str = *ptr, *end; + int n = -1; + + if(isdigit(*str)) { + n = simple_strtoul(str, &end, 0); + if(end == str) + return -1; + *ptr = end; + } + else if (('a' <= *str) && (*str <= 'z')) { + n = *str - 'a'; + str++; + *ptr = str; + } + return n; +} + +/* If *index_out == -1 at exit, the passed option was a general one; + * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it + * should not be freed on exit. + */ +static int ubd_setup_common(char *str, int *index_out, char **error_out) +{ + struct ubd *ubd_dev; + struct openflags flags = global_openflags; + char *file, *backing_file, *serial; + int n, err = 0, i; + + if(index_out) *index_out = -1; + n = *str; + if(n == '='){ + str++; + if(!strcmp(str, "sync")){ + global_openflags = of_sync(global_openflags); + return err; + } + + pr_warn("fake major not supported any more\n"); + return 0; + } + + n = parse_unit(&str); + if(n < 0){ + *error_out = "Couldn't parse device number"; + return -EINVAL; + } + if(n >= MAX_DEV){ + *error_out = "Device number out of range"; + return 1; + } + + err = -EBUSY; + mutex_lock(&ubd_lock); + + ubd_dev = &ubd_devs[n]; + if(ubd_dev->file != NULL){ + *error_out = "Device is already configured"; + goto out; + } + + if (index_out) + *index_out = n; + + err = -EINVAL; + for (i = 0; i < sizeof("rscdt="); i++) { + switch (*str) { + case 'r': + flags.w = 0; + break; + case 's': + flags.s = 1; + break; + case 'd': + ubd_dev->no_cow = 1; + break; + case 'c': + ubd_dev->shared = 1; + break; + case 't': + ubd_dev->no_trim = 1; + break; + case '=': + str++; + goto break_loop; + default: + *error_out = "Expected '=' or flag letter " + "(r, s, c, t or d)"; + goto out; + } + str++; + } + + if (*str == '=') + *error_out = "Too many flags specified"; + else + *error_out = "Missing '='"; + goto out; + +break_loop: + file = strsep(&str, ",:"); + if (*file == '\0') + file = NULL; + + backing_file = strsep(&str, ",:"); + if (backing_file && *backing_file == '\0') + backing_file = NULL; + + serial = strsep(&str, ",:"); + if (serial && *serial == '\0') + serial = NULL; + + if (backing_file && ubd_dev->no_cow) { + *error_out = "Can't specify both 'd' and a cow file"; + goto out; + } + + err = 0; + ubd_dev->file = file; + ubd_dev->cow.file = backing_file; + ubd_dev->serial = serial; + ubd_dev->boot_openflags = flags; +out: + mutex_unlock(&ubd_lock); + return err; +} + +static int ubd_setup(char *str) +{ + char *error; + int err; + + err = ubd_setup_common(str, NULL, &error); + if(err) + printk(KERN_ERR "Failed to initialize device with \"%s\" : " + "%s\n", str, error); + return 1; +} + +__setup("ubd", ubd_setup); +__uml_help(ubd_setup, +"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n" +" This is used to associate a device with a file in the underlying\n" +" filesystem. When specifying two filenames, the first one is the\n" +" COW name and the second is the backing file name. As separator you can\n" +" use either a ':' or a ',': the first one allows writing things like;\n" +" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" +" while with a ',' the shell would not expand the 2nd '~'.\n" +" When using only one filename, UML will detect whether to treat it like\n" +" a COW file or a backing file. To override this detection, add the 'd'\n" +" flag:\n" +" ubd0d=BackingFile\n" +" Usually, there is a filesystem in the file, but \n" +" that's not required. Swap devices containing swap files can be\n" +" specified like this. Also, a file which doesn't contain a\n" +" filesystem can have its contents read in the virtual \n" +" machine by running 'dd' on the device. <n> must be in the range\n" +" 0 to 7. Appending an 'r' to the number will cause that device\n" +" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" +" an 's' will cause data to be written to disk on the host immediately.\n" +" 'c' will cause the device to be treated as being shared between multiple\n" +" UMLs and file locking will be turned off - this is appropriate for a\n" +" cluster filesystem and inappropriate at almost all other times.\n\n" +" 't' will disable trim/discard support on the device (enabled by default).\n\n" +" An optional device serial number can be exposed using the serial parameter\n" +" on the cmdline which is exposed as a sysfs entry. This is particularly\n" +" useful when a unique number should be given to the device. Note when\n" +" specifying a label, the filename2 must be also presented. It can be\n" +" an empty string, in which case the backing file is not used:\n" +" ubd0=File,,Serial\n" +); + +static int udb_setup(char *str) +{ + printk("udb%s specified on command line is almost certainly a ubd -> " + "udb TYPO\n", str); + return 1; +} + +__setup("udb", udb_setup); +__uml_help(udb_setup, +"udb\n" +" This option is here solely to catch ubd -> udb typos, which can be\n" +" to impossible to catch visually unless you specifically look for\n" +" them. The only result of any option starting with 'udb' is an error\n" +" in the boot output.\n\n" +); + +/* Only changed by ubd_init, which is an initcall. */ +static int thread_fd = -1; + +/* Function to read several request pointers at a time +* handling fractional reads if (and as) needed +*/ + +static int bulk_req_safe_read( + int fd, + struct io_thread_req * (*request_buffer)[], + struct io_thread_req **remainder, + int *remainder_size, + int max_recs + ) +{ + int n = 0; + int res = 0; + + if (*remainder_size > 0) { + memmove( + (char *) request_buffer, + (char *) remainder, *remainder_size + ); + n = *remainder_size; + } + + res = os_read_file( + fd, + ((char *) request_buffer) + *remainder_size, + sizeof(struct io_thread_req *)*max_recs + - *remainder_size + ); + if (res > 0) { + n += res; + if ((n % sizeof(struct io_thread_req *)) > 0) { + /* + * Read somehow returned not a multiple of dword + * theoretically possible, but never observed in the + * wild, so read routine must be able to handle it + */ + *remainder_size = n % sizeof(struct io_thread_req *); + WARN(*remainder_size > 0, "UBD IPC read returned a partial result"); + memmove( + remainder, + ((char *) request_buffer) + + (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *), + *remainder_size + ); + n = n - *remainder_size; + } + } else { + n = res; + } + return n; +} + +/* Called without dev->lock held, and only in interrupt context. */ +static void ubd_handler(void) +{ + int n; + int count; + + while(1){ + n = bulk_req_safe_read( + thread_fd, + irq_req_buffer, + &irq_remainder, + &irq_remainder_size, + UBD_REQ_BUFFER_SIZE + ); + if (n < 0) { + if(n == -EAGAIN) + break; + printk(KERN_ERR "spurious interrupt in ubd_handler, " + "err = %d\n", -n); + return; + } + for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + + if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { + blk_queue_max_discard_sectors(io_req->req->q, 0); + blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); + } + blk_mq_end_request(io_req->req, io_req->error); + kfree(io_req); + } + } +} + +static irqreturn_t ubd_intr(int irq, void *dev) +{ + ubd_handler(); + return IRQ_HANDLED; +} + +/* Only changed by ubd_init, which is an initcall. */ +static int io_pid = -1; + +static void kill_io_thread(void) +{ + if(io_pid != -1) + os_kill_process(io_pid, 1); +} + +__uml_exitcall(kill_io_thread); + +static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) +{ + char *file; + int fd; + int err; + + __u32 version; + __u32 align; + char *backing_file; + time64_t mtime; + unsigned long long size; + int sector_size; + int bitmap_offset; + + if (ubd_dev->file && ubd_dev->cow.file) { + file = ubd_dev->cow.file; + + goto out; + } + + fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0); + if (fd < 0) + return fd; + + err = read_cow_header(file_reader, &fd, &version, &backing_file, \ + &mtime, &size, §or_size, &align, &bitmap_offset); + os_close_file(fd); + + if(err == -EINVAL) + file = ubd_dev->file; + else + file = backing_file; + +out: + return os_file_size(file, size_out); +} + +static int read_cow_bitmap(int fd, void *buf, int offset, int len) +{ + int err; + + err = os_pread_file(fd, buf, len, offset); + if (err < 0) + return err; + + return 0; +} + +static int backing_file_mismatch(char *file, __u64 size, time64_t mtime) +{ + time64_t modtime; + unsigned long long actual; + int err; + + err = os_file_modtime(file, &modtime); + if (err < 0) { + printk(KERN_ERR "Failed to get modification time of backing " + "file \"%s\", err = %d\n", file, -err); + return err; + } + + err = os_file_size(file, &actual); + if (err < 0) { + printk(KERN_ERR "Failed to get size of backing file \"%s\", " + "err = %d\n", file, -err); + return err; + } + + if (actual != size) { + /*__u64 can be a long on AMD64 and with %lu GCC complains; so + * the typecast.*/ + printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " + "vs backing file\n", (unsigned long long) size, actual); + return -EINVAL; + } + if (modtime != mtime) { + printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " + "backing file\n", mtime, modtime); + return -EINVAL; + } + return 0; +} + +static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) +{ + struct uml_stat buf1, buf2; + int err; + + if (from_cmdline == NULL) + return 0; + if (!strcmp(from_cmdline, from_cow)) + return 0; + + err = os_stat_file(from_cmdline, &buf1); + if (err < 0) { + printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline, + -err); + return 0; + } + err = os_stat_file(from_cow, &buf2); + if (err < 0) { + printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow, + -err); + return 1; + } + if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) + return 0; + + printk(KERN_ERR "Backing file mismatch - \"%s\" requested, " + "\"%s\" specified in COW header of \"%s\"\n", + from_cmdline, from_cow, cow); + return 1; +} + +static int open_ubd_file(char *file, struct openflags *openflags, int shared, + char **backing_file_out, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out, + int *create_cow_out) +{ + time64_t mtime; + unsigned long long size; + __u32 version, align; + char *backing_file; + int fd, err, sectorsize, asked_switch, mode = 0644; + + fd = os_open_file(file, *openflags, mode); + if (fd < 0) { + if ((fd == -ENOENT) && (create_cow_out != NULL)) + *create_cow_out = 1; + if (!openflags->w || + ((fd != -EROFS) && (fd != -EACCES))) + return fd; + openflags->w = 0; + fd = os_open_file(file, *openflags, mode); + if (fd < 0) + return fd; + } + + if (shared) + printk(KERN_INFO "Not locking \"%s\" on the host\n", file); + else { + err = os_lock_file(fd, openflags->w); + if (err < 0) { + printk(KERN_ERR "Failed to lock '%s', err = %d\n", + file, -err); + goto out_close; + } + } + + /* Successful return case! */ + if (backing_file_out == NULL) + return fd; + + err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, + &size, §orsize, &align, bitmap_offset_out); + if (err && (*backing_file_out != NULL)) { + printk(KERN_ERR "Failed to read COW header from COW file " + "\"%s\", errno = %d\n", file, -err); + goto out_close; + } + if (err) + return fd; + + asked_switch = path_requires_switch(*backing_file_out, backing_file, + file); + + /* Allow switching only if no mismatch. */ + if (asked_switch && !backing_file_mismatch(*backing_file_out, size, + mtime)) { + printk(KERN_ERR "Switching backing file to '%s'\n", + *backing_file_out); + err = write_cow_header(file, fd, *backing_file_out, + sectorsize, align, &size); + if (err) { + printk(KERN_ERR "Switch failed, errno = %d\n", -err); + goto out_close; + } + } else { + *backing_file_out = backing_file; + err = backing_file_mismatch(*backing_file_out, size, mtime); + if (err) + goto out_close; + } + + cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + return fd; + out_close: + os_close_file(fd); + return err; +} + +static int create_cow_file(char *cow_file, char *backing_file, + struct openflags flags, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + int err, fd; + + flags.c = 1; + fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); + if (fd < 0) { + err = fd; + printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n", + cow_file, -err); + goto out; + } + + err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, + bitmap_offset_out, bitmap_len_out, + data_offset_out); + if (!err) + return fd; + os_close_file(fd); + out: + return err; +} + +static void ubd_close_dev(struct ubd *ubd_dev) +{ + os_close_file(ubd_dev->fd); + if(ubd_dev->cow.file == NULL) + return; + + os_close_file(ubd_dev->cow.fd); + vfree(ubd_dev->cow.bitmap); + ubd_dev->cow.bitmap = NULL; +} + +static int ubd_open_dev(struct ubd *ubd_dev) +{ + struct openflags flags; + char **back_ptr; + int err, create_cow, *create_ptr; + int fd; + + ubd_dev->openflags = ubd_dev->boot_openflags; + create_cow = 0; + create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; + back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; + + fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, + back_ptr, &ubd_dev->cow.bitmap_offset, + &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, + create_ptr); + + if((fd == -ENOENT) && create_cow){ + fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, + ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE, + &ubd_dev->cow.bitmap_offset, + &ubd_dev->cow.bitmap_len, + &ubd_dev->cow.data_offset); + if(fd >= 0){ + printk(KERN_INFO "Creating \"%s\" as COW file for " + "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); + } + } + + if(fd < 0){ + printk("Failed to open '%s', errno = %d\n", ubd_dev->file, + -fd); + return fd; + } + ubd_dev->fd = fd; + + if(ubd_dev->cow.file != NULL){ + blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long)); + + err = -ENOMEM; + ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); + if(ubd_dev->cow.bitmap == NULL){ + printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); + goto error; + } + flush_tlb_kernel_vm(); + + err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, + ubd_dev->cow.bitmap_offset, + ubd_dev->cow.bitmap_len); + if(err < 0) + goto error; + + flags = ubd_dev->openflags; + flags.w = 0; + err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, + NULL, NULL, NULL, NULL); + if(err < 0) goto error; + ubd_dev->cow.fd = err; + } + if (ubd_dev->no_trim == 0) { + ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; + blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); + blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); + } + blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); + return 0; + error: + os_close_file(ubd_dev->fd); + return err; +} + +static void ubd_device_release(struct device *dev) +{ + struct ubd *ubd_dev = dev_get_drvdata(dev); + + blk_mq_free_tag_set(&ubd_dev->tag_set); + *ubd_dev = ((struct ubd) DEFAULT_UBD); +} + +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct ubd *ubd_dev = disk->private_data; + + if (!ubd_dev) + return 0; + + return sprintf(buf, "%s", ubd_dev->serial); +} + +static DEVICE_ATTR_RO(serial); + +static struct attribute *ubd_attrs[] = { + &dev_attr_serial.attr, + NULL, +}; + +static umode_t ubd_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + return a->mode; +} + +static const struct attribute_group ubd_attr_group = { + .attrs = ubd_attrs, + .is_visible = ubd_attrs_are_visible, +}; + +static const struct attribute_group *ubd_attr_groups[] = { + &ubd_attr_group, + NULL, +}; + +static int ubd_disk_register(int major, u64 size, int unit, + struct gendisk *disk) +{ + disk->major = major; + disk->first_minor = unit << UBD_SHIFT; + disk->minors = 1 << UBD_SHIFT; + disk->fops = &ubd_blops; + set_capacity(disk, size / 512); + sprintf(disk->disk_name, "ubd%c", 'a' + unit); + + ubd_devs[unit].pdev.id = unit; + ubd_devs[unit].pdev.name = DRIVER_NAME; + ubd_devs[unit].pdev.dev.release = ubd_device_release; + dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); + platform_device_register(&ubd_devs[unit].pdev); + + disk->private_data = &ubd_devs[unit]; + disk->queue = ubd_devs[unit].queue; + return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); +} + +#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) + +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, +}; + +static int ubd_add(int n, char **error_out) +{ + struct ubd *ubd_dev = &ubd_devs[n]; + struct gendisk *disk; + int err = 0; + + if(ubd_dev->file == NULL) + goto out; + + err = ubd_file_size(ubd_dev, &ubd_dev->size); + if(err < 0){ + *error_out = "Couldn't determine size of device's file"; + goto out; + } + + ubd_dev->size = ROUND_BLOCK(ubd_dev->size); + + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; + + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) + goto out; + + disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); + goto out_cleanup_tags; + } + ubd_dev->queue = disk->queue; + + blk_queue_write_cache(ubd_dev->queue, true, false); + blk_queue_max_segments(ubd_dev->queue, MAX_SG); + blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); + err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); + if (err) + goto out_cleanup_disk; + + ubd_gendisk[n] = disk; + return 0; + +out_cleanup_disk: + put_disk(disk); +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); +out: + return err; +} + +static int ubd_config(char *str, char **error_out) +{ + int n, ret; + + /* This string is possibly broken up and stored, so it's only + * freed if ubd_setup_common fails, or if only general options + * were set. + */ + str = kstrdup(str, GFP_KERNEL); + if (str == NULL) { + *error_out = "Failed to allocate memory"; + return -ENOMEM; + } + + ret = ubd_setup_common(str, &n, error_out); + if (ret) + goto err_free; + + if (n == -1) { + ret = 0; + goto err_free; + } + + mutex_lock(&ubd_lock); + ret = ubd_add(n, error_out); + if (ret) + ubd_devs[n].file = NULL; + mutex_unlock(&ubd_lock); + +out: + return ret; + +err_free: + kfree(str); + goto out; +} + +static int ubd_get_config(char *name, char *str, int size, char **error_out) +{ + struct ubd *ubd_dev; + int n, len = 0; + + n = parse_unit(&name); + if((n >= MAX_DEV) || (n < 0)){ + *error_out = "ubd_get_config : device number out of range"; + return -1; + } + + ubd_dev = &ubd_devs[n]; + mutex_lock(&ubd_lock); + + if(ubd_dev->file == NULL){ + CONFIG_CHUNK(str, size, len, "", 1); + goto out; + } + + CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); + + if(ubd_dev->cow.file != NULL){ + CONFIG_CHUNK(str, size, len, ",", 0); + CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); + } + else CONFIG_CHUNK(str, size, len, "", 1); + + out: + mutex_unlock(&ubd_lock); + return len; +} + +static int ubd_id(char **str, int *start_out, int *end_out) +{ + int n; + + n = parse_unit(str); + *start_out = 0; + *end_out = MAX_DEV - 1; + return n; +} + +static int ubd_remove(int n, char **error_out) +{ + struct gendisk *disk = ubd_gendisk[n]; + struct ubd *ubd_dev; + int err = -ENODEV; + + mutex_lock(&ubd_lock); + + ubd_dev = &ubd_devs[n]; + + if(ubd_dev->file == NULL) + goto out; + + /* you cannot remove a open disk */ + err = -EBUSY; + if(ubd_dev->count > 0) + goto out; + + ubd_gendisk[n] = NULL; + if(disk != NULL){ + del_gendisk(disk); + put_disk(disk); + } + + err = 0; + platform_device_unregister(&ubd_dev->pdev); +out: + mutex_unlock(&ubd_lock); + return err; +} + +/* All these are called by mconsole in process context and without + * ubd-specific locks. The structure itself is const except for .list. + */ +static struct mc_device ubd_mc = { + .list = LIST_HEAD_INIT(ubd_mc.list), + .name = "ubd", + .config = ubd_config, + .get_config = ubd_get_config, + .id = ubd_id, + .remove = ubd_remove, +}; + +static int __init ubd_mc_init(void) +{ + mconsole_register_dev(&ubd_mc); + return 0; +} + +__initcall(ubd_mc_init); + +static int __init ubd0_init(void) +{ + struct ubd *ubd_dev = &ubd_devs[0]; + + mutex_lock(&ubd_lock); + if(ubd_dev->file == NULL) + ubd_dev->file = "root_fs"; + mutex_unlock(&ubd_lock); + + return 0; +} + +__initcall(ubd0_init); + +/* Used in ubd_init, which is an initcall */ +static struct platform_driver ubd_driver = { + .driver = { + .name = DRIVER_NAME, + }, +}; + +static int __init ubd_init(void) +{ + char *error; + int i, err; + + if (register_blkdev(UBD_MAJOR, "ubd")) + return -1; + + irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, + sizeof(struct io_thread_req *), + GFP_KERNEL + ); + irq_remainder = 0; + + if (irq_req_buffer == NULL) { + printk(KERN_ERR "Failed to initialize ubd buffering\n"); + return -1; + } + io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, + sizeof(struct io_thread_req *), + GFP_KERNEL + ); + + io_remainder = 0; + + if (io_req_buffer == NULL) { + printk(KERN_ERR "Failed to initialize ubd buffering\n"); + return -1; + } + platform_driver_register(&ubd_driver); + mutex_lock(&ubd_lock); + for (i = 0; i < MAX_DEV; i++){ + err = ubd_add(i, &error); + if(err) + printk(KERN_ERR "Failed to initialize ubd device %d :" + "%s\n", i, error); + } + mutex_unlock(&ubd_lock); + return 0; +} + +late_initcall(ubd_init); + +static int __init ubd_driver_init(void){ + unsigned long stack; + int err; + + /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ + if(global_openflags.s){ + printk(KERN_INFO "ubd: Synchronous mode\n"); + /* Letting ubd=sync be like using ubd#s= instead of ubd#= is + * enough. So use anyway the io thread. */ + } + stack = alloc_stack(0, 0); + io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd); + if(io_pid < 0){ + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); + io_pid = -1; + return 0; + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + 0, "ubd", ubd_devs); + if(err < 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return 0; +} + +device_initcall(ubd_driver_init); + +static int ubd_open(struct gendisk *disk, blk_mode_t mode) +{ + struct ubd *ubd_dev = disk->private_data; + int err = 0; + + mutex_lock(&ubd_mutex); + if(ubd_dev->count == 0){ + err = ubd_open_dev(ubd_dev); + if(err){ + printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", + disk->disk_name, ubd_dev->file, -err); + goto out; + } + } + ubd_dev->count++; + set_disk_ro(disk, !ubd_dev->openflags.w); +out: + mutex_unlock(&ubd_mutex); + return err; +} + +static void ubd_release(struct gendisk *disk) +{ + struct ubd *ubd_dev = disk->private_data; + + mutex_lock(&ubd_mutex); + if(--ubd_dev->count == 0) + ubd_close_dev(ubd_dev); + mutex_unlock(&ubd_mutex); +} + +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) +{ + __u64 sector = io_offset >> SECTOR_SHIFT; + int i, update_bitmap = 0; + + for (i = 0; i < length >> SECTOR_SHIFT; i++) { + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if (*cow_offset == (DIV_ROUND_UP(bitmap_len, + sizeof(unsigned long)) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, struct io_desc *segment, + unsigned long offset, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) +{ + __u64 sector = offset >> SECTOR_SHIFT; + int i; + + if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) + panic("Operation too long"); + + if (req_op(req->req) == REQ_OP_READ) { + for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &segment->sector_mask); + } + } else { + cowify_bitmap(offset, segment->length, &segment->sector_mask, + &segment->cow_offset, bitmap, bitmap_offset, + segment->bitmap_words, bitmap_len); + } +} + +static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, + struct request *req) +{ + struct bio_vec bvec; + struct req_iterator iter; + int i = 0; + unsigned long byte_offset = io_req->offset; + enum req_op op = req_op(req); + + if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { + io_req->io_desc[0].buffer = NULL; + io_req->io_desc[0].length = blk_rq_bytes(req); + } else { + rq_for_each_segment(bvec, req, iter) { + BUG_ON(i >= io_req->desc_cnt); + + io_req->io_desc[i].buffer = bvec_virt(&bvec); + io_req->io_desc[i].length = bvec.bv_len; + i++; + } + } + + if (dev->cow.file) { + for (i = 0; i < io_req->desc_cnt; i++) { + cowify_req(io_req, &io_req->io_desc[i], byte_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + byte_offset += io_req->io_desc[i].length; + } + + } +} + +static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, + int desc_cnt) +{ + struct io_thread_req *io_req; + int i; + + io_req = kmalloc(sizeof(*io_req) + + (desc_cnt * sizeof(struct io_desc)), + GFP_ATOMIC); + if (!io_req) + return NULL; + + io_req->req = req; + if (dev->cow.file) + io_req->fds[0] = dev->cow.fd; + else + io_req->fds[0] = dev->fd; + io_req->error = 0; + io_req->sectorsize = SECTOR_SIZE; + io_req->fds[1] = dev->fd; + io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + + for (i = 0 ; i < desc_cnt; i++) { + io_req->io_desc[i].sector_mask = 0; + io_req->io_desc[i].cow_offset = -1; + } + + return io_req; +} + +static int ubd_submit_request(struct ubd *dev, struct request *req) +{ + int segs = 0; + struct io_thread_req *io_req; + int ret; + enum req_op op = req_op(req); + + if (op == REQ_OP_FLUSH) + segs = 0; + else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) + segs = 1; + else + segs = blk_rq_nr_phys_segments(req); + + io_req = ubd_alloc_req(dev, req, segs); + if (!io_req) + return -ENOMEM; + + io_req->desc_cnt = segs; + if (segs) + ubd_map_req(dev, io_req, req); + + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (ret != sizeof(io_req)) { + if (ret != -EAGAIN) + pr_err("write to io thread failed: %d\n", -ret); + kfree(io_req); + } + return ret; +} + +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct ubd *ubd_dev = hctx->queue->queuedata; + struct request *req = bd->rq; + int ret = 0, res = BLK_STS_OK; + + blk_mq_start_request(req); + + spin_lock_irq(&ubd_dev->lock); + + switch (req_op(req)) { + case REQ_OP_FLUSH: + case REQ_OP_READ: + case REQ_OP_WRITE: + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + ret = ubd_submit_request(ubd_dev, req); + break; + default: + WARN_ON_ONCE(1); + res = BLK_STS_NOTSUPP; + } + + spin_unlock_irq(&ubd_dev->lock); + + if (ret < 0) { + if (ret == -ENOMEM) + res = BLK_STS_RESOURCE; + else + res = BLK_STS_DEV_RESOURCE; + } + + return res; +} + +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ubd *ubd_dev = bdev->bd_disk->private_data; + + geo->heads = 128; + geo->sectors = 32; + geo->cylinders = ubd_dev->size / (128 * 32 * 512); + return 0; +} + +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct ubd *ubd_dev = bdev->bd_disk->private_data; + u16 ubd_id[ATA_ID_WORDS]; + + switch (cmd) { + struct cdrom_volctrl volume; + case HDIO_GET_IDENTITY: + memset(&ubd_id, 0, ATA_ID_WORDS * 2); + ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512); + ubd_id[ATA_ID_HEADS] = 128; + ubd_id[ATA_ID_SECTORS] = 32; + if(copy_to_user((char __user *) arg, (char *) &ubd_id, + sizeof(ubd_id))) + return -EFAULT; + return 0; + + case CDROMVOLREAD: + if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) + return -EFAULT; + volume.channel0 = 255; + volume.channel1 = 255; + volume.channel2 = 255; + volume.channel3 = 255; + if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) + return -EFAULT; + return 0; + } + return -EINVAL; +} + +static int map_error(int error_code) +{ + switch (error_code) { + case 0: + return BLK_STS_OK; + case ENOSYS: + case EOPNOTSUPP: + return BLK_STS_NOTSUPP; + case ENOSPC: + return BLK_STS_NOSPC; + } + return BLK_STS_IOERR; +} + +/* + * Everything from here onwards *IS NOT PART OF THE KERNEL* + * + * The following functions are part of UML hypervisor code. + * All functions from here onwards are executed as a helper + * thread and are not allowed to execute any kernel functions. + * + * Any communication must occur strictly via shared memory and IPC. + * + * Do not add printks, locks, kernel memory operations, etc - it + * will result in unpredictable behaviour and/or crashes. + */ + +static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) +{ + int n; + + if (segment->cow_offset == -1) + return map_error(0); + + n = os_pwrite_file(req->fds[1], &segment->bitmap_words, + sizeof(segment->bitmap_words), segment->cow_offset); + if (n != sizeof(segment->bitmap_words)) + return map_error(-n); + + return map_error(0); +} + +static void do_io(struct io_thread_req *req, struct io_desc *desc) +{ + char *buf = NULL; + unsigned long len; + int n, nsectors, start, end, bit; + __u64 off; + + /* FLUSH is really a special case, we cannot "case" it with others */ + + if (req_op(req->req) == REQ_OP_FLUSH) { + /* fds[0] is always either the rw image or our cow file */ + req->error = map_error(-os_sync_file(req->fds[0])); + return; + } + + nsectors = desc->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + if (desc->buffer != NULL) + buf = &desc->buffer[start * req->sectorsize]; + + switch (req_op(req->req)) { + case REQ_OP_READ: + n = 0; + do { + buf = &buf[n]; + len -= n; + n = os_pread_file(req->fds[bit], buf, len, off); + if (n < 0) { + req->error = map_error(-n); + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + break; + case REQ_OP_WRITE: + n = os_pwrite_file(req->fds[bit], buf, len, off); + if(n != len){ + req->error = map_error(-n); + return; + } + break; + case REQ_OP_DISCARD: + n = os_falloc_punch(req->fds[bit], off, len); + if (n) { + req->error = map_error(-n); + return; + } + break; + case REQ_OP_WRITE_ZEROES: + n = os_falloc_zeroes(req->fds[bit], off, len); + if (n) { + req->error = map_error(-n); + return; + } + break; + default: + WARN_ON_ONCE(1); + req->error = BLK_STS_NOTSUPP; + return; + } + + start = end; + } while(start < nsectors); + + req->offset += len; + req->error = update_bitmap(req, desc); +} + +/* Changed in start_io_thread, which is serialized by being called only + * from ubd_init, which is an initcall. + */ +int kernel_fd = -1; + +/* Only changed by the io thread. XXX: currently unused. */ +static int io_count; + +int io_thread(void *arg) +{ + int n, count, written, res; + + os_fix_helper_signals(); + + while(1){ + n = bulk_req_safe_read( + kernel_fd, + io_req_buffer, + &io_remainder, + &io_remainder_size, + UBD_REQ_BUFFER_SIZE + ); + if (n <= 0) { + if (n == -EAGAIN) + ubd_read_poll(-1); + + continue; + } + + for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { + struct io_thread_req *req = (*io_req_buffer)[count]; + int i; + + io_count++; + for (i = 0; !req->error && i < req->desc_cnt; i++) + do_io(req, &(req->io_desc[i])); + + } + + written = 0; + + do { + res = os_write_file(kernel_fd, + ((char *) io_req_buffer) + written, + n - written); + if (res >= 0) { + written += res; + } + if (written < n) { + ubd_write_poll(-1); + } + } while (written < n); + } + + return 0; +} diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c new file mode 100644 index 0000000000..a1afe414ce --- /dev/null +++ b/arch/um/drivers/ubd_user.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016 Anton Ivanov (aivanov@brocade.com) + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <netinet/in.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <endian.h> +#include <byteswap.h> + +#include "ubd.h" +#include <os.h> +#include <poll.h> + +struct pollfd kernel_pollfd; + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; + } + + kernel_fd = fds[0]; + kernel_pollfd.fd = kernel_fd; + kernel_pollfd.events = POLLIN; + *fd_out = fds[1]; + + err = os_set_fd_block(*fd_out, 0); + err = os_set_fd_block(kernel_fd, 0); + if (err) { + printk("start_io_thread - failed to set nonblocking I/O.\n"); + goto out_close; + } + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); + if(pid < 0){ + err = -errno; + printk("start_io_thread - clone failed : errno = %d\n", errno); + goto out_close; + } + + return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return err; +} + +int ubd_read_poll(int timeout) +{ + kernel_pollfd.events = POLLIN; + return poll(&kernel_pollfd, 1, timeout); +} +int ubd_write_poll(int timeout) +{ + kernel_pollfd.events = POLLOUT; + return poll(&kernel_pollfd, 1, timeout); +} + diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h new file mode 100644 index 0000000000..fe39bee1e3 --- /dev/null +++ b/arch/um/drivers/umcast.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __DRIVERS_UMCAST_H +#define __DRIVERS_UMCAST_H + +#include <net_user.h> + +struct umcast_data { + char *addr; + unsigned short lport; + unsigned short rport; + void *listen_addr; + void *remote_addr; + int ttl; + int unicast; + void *dev; +}; + +extern const struct net_user_info umcast_user_info; + +extern int umcast_user_write(int fd, void *buf, int len, + struct umcast_data *pri); + +#endif diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c new file mode 100644 index 0000000000..595a54f2b9 --- /dev/null +++ b/arch/um/drivers/umcast_kern.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * user-mode-linux networking multicast transport + * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * based on the existing uml-networking code, which is + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include "umcast.h" +#include <net_kern.h> + +struct umcast_init { + char *addr; + int lport; + int rport; + int ttl; + bool unicast; +}; + +static void umcast_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct umcast_data *dpri; + struct umcast_init *init = data; + + pri = netdev_priv(dev); + dpri = (struct umcast_data *) pri->user; + dpri->addr = init->addr; + dpri->lport = init->lport; + dpri->rport = init->rport; + dpri->unicast = init->unicast; + dpri->ttl = init->ttl; + dpri->dev = dev; + + if (dpri->unicast) { + printk(KERN_INFO "ucast backend address: %s:%u listen port: " + "%u\n", dpri->addr, dpri->rport, dpri->lport); + } else { + printk(KERN_INFO "mcast backend multicast address: %s:%u, " + "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl); + } +} + +static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return umcast_user_write(fd, skb->data, skb->len, + (struct umcast_data *) &lp->user); +} + +static const struct net_kern_info umcast_kern_info = { + .init = umcast_init, + .protocol = eth_protocol, + .read = umcast_read, + .write = umcast_write, +}; + +static int mcast_setup(char *str, char **mac_out, void *data) +{ + struct umcast_init *init = data; + char *port_str = NULL, *ttl_str = NULL, *remain; + char *last; + + *init = ((struct umcast_init) + { .addr = "239.192.168.1", + .lport = 1102, + .ttl = 1 }); + + remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, + NULL); + if (remain != NULL) { + printk(KERN_ERR "mcast_setup - Extra garbage on " + "specification : '%s'\n", remain); + return 0; + } + + if (port_str != NULL) { + init->lport = simple_strtoul(port_str, &last, 10); + if ((*last != '\0') || (last == port_str)) { + printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", + port_str); + return 0; + } + } + + if (ttl_str != NULL) { + init->ttl = simple_strtoul(ttl_str, &last, 10); + if ((*last != '\0') || (last == ttl_str)) { + printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", + ttl_str); + return 0; + } + } + + init->unicast = false; + init->rport = init->lport; + + printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, + init->lport, init->ttl); + + return 1; +} + +static int ucast_setup(char *str, char **mac_out, void *data) +{ + struct umcast_init *init = data; + char *lport_str = NULL, *rport_str = NULL, *remain; + char *last; + + *init = ((struct umcast_init) + { .addr = "", + .lport = 1102, + .rport = 1102 }); + + remain = split_if_spec(str, mac_out, &init->addr, + &lport_str, &rport_str, NULL); + if (remain != NULL) { + printk(KERN_ERR "ucast_setup - Extra garbage on " + "specification : '%s'\n", remain); + return 0; + } + + if (lport_str != NULL) { + init->lport = simple_strtoul(lport_str, &last, 10); + if ((*last != '\0') || (last == lport_str)) { + printk(KERN_ERR "ucast_setup - Bad listen port : " + "'%s'\n", lport_str); + return 0; + } + } + + if (rport_str != NULL) { + init->rport = simple_strtoul(rport_str, &last, 10); + if ((*last != '\0') || (last == rport_str)) { + printk(KERN_ERR "ucast_setup - Bad remote port : " + "'%s'\n", rport_str); + return 0; + } + } + + init->unicast = true; + + printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n", + init->lport, init->addr, init->rport); + + return 1; +} + +static struct transport mcast_transport = { + .list = LIST_HEAD_INIT(mcast_transport.list), + .name = "mcast", + .setup = mcast_setup, + .user = &umcast_user_info, + .kern = &umcast_kern_info, + .private_size = sizeof(struct umcast_data), + .setup_size = sizeof(struct umcast_init), +}; + +static struct transport ucast_transport = { + .list = LIST_HEAD_INIT(ucast_transport.list), + .name = "ucast", + .setup = ucast_setup, + .user = &umcast_user_info, + .kern = &umcast_kern_info, + .private_size = sizeof(struct umcast_data), + .setup_size = sizeof(struct umcast_init), +}; + +static int register_umcast(void) +{ + register_transport(&mcast_transport); + register_transport(&ucast_transport); + return 0; +} + +late_initcall(register_umcast); diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c new file mode 100644 index 0000000000..b50b13cff0 --- /dev/null +++ b/arch/um/drivers/umcast_user.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * user-mode-linux networking multicast transport + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> + * + * based on the existing uml-networking code, which is + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * + * + */ + +#include <unistd.h> +#include <errno.h> +#include <netinet/in.h> +#include "umcast.h" +#include <net_user.h> +#include <um_malloc.h> + +static struct sockaddr_in *new_addr(char *addr, unsigned short port) +{ + struct sockaddr_in *sin; + + sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL); + if (sin == NULL) { + printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in " + "failed\n"); + return NULL; + } + sin->sin_family = AF_INET; + if (addr) + sin->sin_addr.s_addr = in_aton(addr); + else + sin->sin_addr.s_addr = INADDR_ANY; + sin->sin_port = htons(port); + return sin; +} + +static int umcast_user_init(void *data, void *dev) +{ + struct umcast_data *pri = data; + + pri->remote_addr = new_addr(pri->addr, pri->rport); + if (pri->unicast) + pri->listen_addr = new_addr(NULL, pri->lport); + else + pri->listen_addr = pri->remote_addr; + pri->dev = dev; + return 0; +} + +static void umcast_remove(void *data) +{ + struct umcast_data *pri = data; + + kfree(pri->listen_addr); + if (pri->unicast) + kfree(pri->remote_addr); + pri->listen_addr = pri->remote_addr = NULL; +} + +static int umcast_open(void *data) +{ + struct umcast_data *pri = data; + struct sockaddr_in *lsin = pri->listen_addr; + struct sockaddr_in *rsin = pri->remote_addr; + struct ip_mreq mreq; + int fd, yes = 1, err = -EINVAL; + + + if ((!pri->unicast && lsin->sin_addr.s_addr == 0) || + (rsin->sin_addr.s_addr == 0) || + (lsin->sin_port == 0) || (rsin->sin_port == 0)) + goto out; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "umcast_open : data socket failed, " + "errno = %d\n", errno); + goto out; + } + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { + err = -errno; + printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, " + "errno = %d\n", errno); + goto out_close; + } + + if (!pri->unicast) { + /* set ttl according to config */ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, + sizeof(pri->ttl)) < 0) { + err = -errno; + printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL " + "failed, error = %d\n", errno); + goto out_close; + } + + /* set LOOP, so data does get fed back to local sockets */ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, + &yes, sizeof(yes)) < 0) { + err = -errno; + printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP " + "failed, error = %d\n", errno); + goto out_close; + } + } + + /* bind socket to the address */ + if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) { + err = -errno; + printk(UM_KERN_ERR "umcast_open : data bind failed, " + "errno = %d\n", errno); + goto out_close; + } + + if (!pri->unicast) { + /* subscribe to the multicast group */ + mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr; + mreq.imr_interface.s_addr = 0; + if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) { + err = -errno; + printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP " + "failed, error = %d\n", errno); + printk(UM_KERN_ERR "There appears not to be a " + "multicast-capable network interface on the " + "host.\n"); + printk(UM_KERN_ERR "eth0 should be configured in order " + "to use the multicast transport.\n"); + goto out_close; + } + } + + return fd; + + out_close: + close(fd); + out: + return err; +} + +static void umcast_close(int fd, void *data) +{ + struct umcast_data *pri = data; + + if (!pri->unicast) { + struct ip_mreq mreq; + struct sockaddr_in *lsin = pri->listen_addr; + + mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr; + mreq.imr_interface.s_addr = 0; + if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) { + printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP " + "failed, error = %d\n", errno); + } + } + + close(fd); +} + +int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri) +{ + struct sockaddr_in *data_addr = pri->remote_addr; + + return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); +} + +const struct net_user_info umcast_user_info = { + .init = umcast_user_init, + .open = umcast_open, + .close = umcast_close, + .remove = umcast_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h new file mode 100644 index 0000000000..cab0379e61 --- /dev/null +++ b/arch/um/drivers/vde.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). + */ + +#ifndef __UM_VDE_H__ +#define __UM_VDE_H__ + +struct vde_data { + char *vde_switch; + char *descr; + void *args; + void *conn; + void *dev; +}; + +struct vde_init { + char *vde_switch; + char *descr; + int port; + char *group; + int mode; +}; + +extern const struct net_user_info vde_user_info; + +extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init); + +extern int vde_user_read(void *conn, void *buf, int len); +extern int vde_user_write(void *conn, void *buf, int len); + +#endif diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c new file mode 100644 index 0000000000..bc6f22cbfb --- /dev/null +++ b/arch/um/drivers/vde_kern.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). + * + * Transport usage: + * ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description> + * + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include <net_kern.h> +#include <net_user.h> +#include "vde.h" + +static void vde_init(struct net_device *dev, void *data) +{ + struct vde_init *init = data; + struct uml_net_private *pri; + struct vde_data *vpri; + + pri = netdev_priv(dev); + vpri = (struct vde_data *) pri->user; + + vpri->vde_switch = init->vde_switch; + vpri->descr = init->descr ? init->descr : "UML vde_transport"; + vpri->args = NULL; + vpri->conn = NULL; + vpri->dev = dev; + + printk("vde backend - %s, ", vpri->vde_switch ? + vpri->vde_switch : "(default socket)"); + + vde_init_libstuff(vpri, init); + + printk("\n"); +} + +static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + struct vde_data *pri = (struct vde_data *) &lp->user; + + if (pri->conn != NULL) + return vde_user_read(pri->conn, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); + + printk(KERN_ERR "vde_read - we have no VDECONN to read from"); + return -EBADF; +} + +static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + struct vde_data *pri = (struct vde_data *) &lp->user; + + if (pri->conn != NULL) + return vde_user_write((void *)pri->conn, skb->data, + skb->len); + + printk(KERN_ERR "vde_write - we have no VDECONN to write to"); + return -EBADF; +} + +static const struct net_kern_info vde_kern_info = { + .init = vde_init, + .protocol = eth_protocol, + .read = vde_read, + .write = vde_write, +}; + +static int vde_setup(char *str, char **mac_out, void *data) +{ + struct vde_init *init = data; + char *remain, *port_str = NULL, *mode_str = NULL, *last; + + *init = ((struct vde_init) + { .vde_switch = NULL, + .descr = NULL, + .port = 0, + .group = NULL, + .mode = 0 }); + + remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str, + &init->group, &mode_str, &init->descr, NULL); + + if (remain != NULL) + printk(KERN_WARNING "vde_setup - Ignoring extra data :" + "'%s'\n", remain); + + if (port_str != NULL) { + init->port = simple_strtoul(port_str, &last, 10); + if ((*last != '\0') || (last == port_str)) { + printk(KERN_ERR "vde_setup - Bad port : '%s'\n", + port_str); + return 0; + } + } + + if (mode_str != NULL) { + init->mode = simple_strtoul(mode_str, &last, 8); + if ((*last != '\0') || (last == mode_str)) { + printk(KERN_ERR "vde_setup - Bad mode : '%s'\n", + mode_str); + return 0; + } + } + + printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ? + init->vde_switch : "(default socket)"); + + return 1; +} + +static struct transport vde_transport = { + .list = LIST_HEAD_INIT(vde_transport.list), + .name = "vde", + .setup = vde_setup, + .user = &vde_user_info, + .kern = &vde_kern_info, + .private_size = sizeof(struct vde_data), + .setup_size = sizeof(struct vde_init), +}; + +static int register_vde(void) +{ + register_transport(&vde_transport); + return 0; +} + +late_initcall(register_vde); diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c new file mode 100644 index 0000000000..bc7dc4e1e4 --- /dev/null +++ b/arch/um/drivers/vde_user.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). + */ + +#include <stddef.h> +#include <errno.h> +#include <libvdeplug.h> +#include <net_user.h> +#include <um_malloc.h> +#include "vde.h" + +static int vde_user_init(void *data, void *dev) +{ + struct vde_data *pri = data; + VDECONN *conn = NULL; + int err = -EINVAL; + + pri->dev = dev; + + conn = vde_open(pri->vde_switch, pri->descr, pri->args); + + if (conn == NULL) { + err = -errno; + printk(UM_KERN_ERR "vde_user_init: vde_open failed, " + "errno = %d\n", errno); + return err; + } + + printk(UM_KERN_INFO "vde backend - connection opened\n"); + + pri->conn = conn; + + return 0; +} + +static int vde_user_open(void *data) +{ + struct vde_data *pri = data; + + if (pri->conn != NULL) + return vde_datafd(pri->conn); + + printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open"); + return -EINVAL; +} + +static void vde_remove(void *data) +{ + struct vde_data *pri = data; + + if (pri->conn != NULL) { + printk(UM_KERN_INFO "vde backend - closing connection\n"); + vde_close(pri->conn); + pri->conn = NULL; + kfree(pri->args); + pri->args = NULL; + return; + } + + printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove"); +} + +const struct net_user_info vde_user_info = { + .init = vde_user_init, + .open = vde_user_open, + .close = NULL, + .remove = vde_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; + +void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) +{ + struct vde_open_args *args; + + vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); + if (vpri->args == NULL) { + printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args " + "allocation failed"); + return; + } + + args = vpri->args; + + args->port = init->port; + args->group = init->group; + args->mode = init->mode ? init->mode : 0700; + + args->port ? printk("port %d", args->port) : + printk("undefined port"); +} + +int vde_user_read(void *conn, void *buf, int len) +{ + VDECONN *vconn = conn; + int rv; + + if (vconn == NULL) + return 0; + + rv = vde_recv(vconn, buf, len, 0); + if (rv < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (rv == 0) + return -ENOTCONN; + + return rv; +} + +int vde_user_write(void *conn, void *buf, int len) +{ + VDECONN *vconn = conn; + + if (vconn == NULL) + return 0; + + return vde_send(vconn, buf, len, 0); +} + diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c new file mode 100644 index 0000000000..131b7cb295 --- /dev/null +++ b/arch/um/drivers/vector_kern.c @@ -0,0 +1,1766 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 - 2019 Cambridge Greys Limited + * Copyright (C) 2011 - 2014 Cisco Systems Inc + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + */ + +#include <linux/memblock.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/firmware.h> +#include <linux/fs.h> +#include <uapi/linux/filter.h> +#include <init.h> +#include <irq_kern.h> +#include <irq_user.h> +#include <net_kern.h> +#include <os.h> +#include "mconsole_kern.h" +#include "vector_user.h" +#include "vector_kern.h" + +/* + * Adapted from network devices with the following major changes: + * All transports are static - simplifies the code significantly + * Multiple FDs/IRQs per device + * Vector IO optionally used for read/write, falling back to legacy + * based on configuration and/or availability + * Configuration is no longer positional - L2TPv3 and GRE require up to + * 10 parameters, passing this as positional is not fit for purpose. + * Only socket transports are supported + */ + + +#define DRIVER_NAME "uml-vector" +struct vector_cmd_line_arg { + struct list_head list; + int unit; + char *arguments; +}; + +struct vector_device { + struct list_head list; + struct net_device *dev; + struct platform_device pdev; + int unit; + int opened; +}; + +static LIST_HEAD(vec_cmd_line); + +static DEFINE_SPINLOCK(vector_devices_lock); +static LIST_HEAD(vector_devices); + +static int driver_registered; + +static void vector_eth_configure(int n, struct arglist *def); +static int vector_mmsg_rx(struct vector_private *vp, int budget); + +/* Argument accessors to set variables (and/or set default values) + * mtu, buffer sizing, default headroom, etc + */ + +#define DEFAULT_HEADROOM 2 +#define SAFETY_MARGIN 32 +#define DEFAULT_VECTOR_SIZE 64 +#define TX_SMALL_PACKET 128 +#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1) + +static const struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[] = { + { "rx_queue_max" }, + { "rx_queue_running_average" }, + { "tx_queue_max" }, + { "tx_queue_running_average" }, + { "rx_encaps_errors" }, + { "tx_timeout_count" }, + { "tx_restart_queue" }, + { "tx_kicks" }, + { "tx_flow_control_xon" }, + { "tx_flow_control_xoff" }, + { "rx_csum_offload_good" }, + { "rx_csum_offload_errors"}, + { "sg_ok"}, + { "sg_linearized"}, +}; + +#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys) + +static void vector_reset_stats(struct vector_private *vp) +{ + vp->estats.rx_queue_max = 0; + vp->estats.rx_queue_running_average = 0; + vp->estats.tx_queue_max = 0; + vp->estats.tx_queue_running_average = 0; + vp->estats.rx_encaps_errors = 0; + vp->estats.tx_timeout_count = 0; + vp->estats.tx_restart_queue = 0; + vp->estats.tx_kicks = 0; + vp->estats.tx_flow_control_xon = 0; + vp->estats.tx_flow_control_xoff = 0; + vp->estats.sg_ok = 0; + vp->estats.sg_linearized = 0; +} + +static int get_mtu(struct arglist *def) +{ + char *mtu = uml_vector_fetch_arg(def, "mtu"); + long result; + + if (mtu != NULL) { + if (kstrtoul(mtu, 10, &result) == 0) + if ((result < (1 << 16) - 1) && (result >= 576)) + return result; + } + return ETH_MAX_PACKET; +} + +static char *get_bpf_file(struct arglist *def) +{ + return uml_vector_fetch_arg(def, "bpffile"); +} + +static bool get_bpf_flash(struct arglist *def) +{ + char *allow = uml_vector_fetch_arg(def, "bpfflash"); + long result; + + if (allow != NULL) { + if (kstrtoul(allow, 10, &result) == 0) + return (allow > 0); + } + return false; +} + +static int get_depth(struct arglist *def) +{ + char *mtu = uml_vector_fetch_arg(def, "depth"); + long result; + + if (mtu != NULL) { + if (kstrtoul(mtu, 10, &result) == 0) + return result; + } + return DEFAULT_VECTOR_SIZE; +} + +static int get_headroom(struct arglist *def) +{ + char *mtu = uml_vector_fetch_arg(def, "headroom"); + long result; + + if (mtu != NULL) { + if (kstrtoul(mtu, 10, &result) == 0) + return result; + } + return DEFAULT_HEADROOM; +} + +static int get_req_size(struct arglist *def) +{ + char *gro = uml_vector_fetch_arg(def, "gro"); + long result; + + if (gro != NULL) { + if (kstrtoul(gro, 10, &result) == 0) { + if (result > 0) + return 65536; + } + } + return get_mtu(def) + ETH_HEADER_OTHER + + get_headroom(def) + SAFETY_MARGIN; +} + + +static int get_transport_options(struct arglist *def) +{ + char *transport = uml_vector_fetch_arg(def, "transport"); + char *vector = uml_vector_fetch_arg(def, "vec"); + + int vec_rx = VECTOR_RX; + int vec_tx = VECTOR_TX; + long parsed; + int result = 0; + + if (transport == NULL) + return -EINVAL; + + if (vector != NULL) { + if (kstrtoul(vector, 10, &parsed) == 0) { + if (parsed == 0) { + vec_rx = 0; + vec_tx = 0; + } + } + } + + if (get_bpf_flash(def)) + result = VECTOR_BPF_FLASH; + + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) + return result; + if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) + return (result | vec_rx | VECTOR_BPF); + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) + return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS); + return (result | vec_rx | vec_tx); +} + + +/* A mini-buffer for packet drop read + * All of our supported transports are datagram oriented and we always + * read using recvmsg or recvmmsg. If we pass a buffer which is smaller + * than the packet size it still counts as full packet read and will + * clean the incoming stream to keep sigio/epoll happy + */ + +#define DROP_BUFFER_SIZE 32 + +static char *drop_buffer; + +/* Array backed queues optimized for bulk enqueue/dequeue and + * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. + * For more details and full design rationale see + * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt + */ + + +/* + * Advance the mmsg queue head by n = advance. Resets the queue to + * maximum enqueue/dequeue-at-once capacity if possible. Called by + * dequeuers. Caller must hold the head_lock! + */ + +static int vector_advancehead(struct vector_queue *qi, int advance) +{ + int queue_depth; + + qi->head = + (qi->head + advance) + % qi->max_depth; + + + spin_lock(&qi->tail_lock); + qi->queue_depth -= advance; + + /* we are at 0, use this to + * reset head and tail so we can use max size vectors + */ + + if (qi->queue_depth == 0) { + qi->head = 0; + qi->tail = 0; + } + queue_depth = qi->queue_depth; + spin_unlock(&qi->tail_lock); + return queue_depth; +} + +/* Advance the queue tail by n = advance. + * This is called by enqueuers which should hold the + * head lock already + */ + +static int vector_advancetail(struct vector_queue *qi, int advance) +{ + int queue_depth; + + qi->tail = + (qi->tail + advance) + % qi->max_depth; + spin_lock(&qi->head_lock); + qi->queue_depth += advance; + queue_depth = qi->queue_depth; + spin_unlock(&qi->head_lock); + return queue_depth; +} + +static int prep_msg(struct vector_private *vp, + struct sk_buff *skb, + struct iovec *iov) +{ + int iov_index = 0; + int nr_frags, frag; + skb_frag_t *skb_frag; + + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags > MAX_IOV_SIZE) { + if (skb_linearize(skb) != 0) + goto drop; + } + if (vp->header_size > 0) { + iov[iov_index].iov_len = vp->header_size; + vp->form_header(iov[iov_index].iov_base, skb, vp); + iov_index++; + } + iov[iov_index].iov_base = skb->data; + if (nr_frags > 0) { + iov[iov_index].iov_len = skb->len - skb->data_len; + vp->estats.sg_ok++; + } else + iov[iov_index].iov_len = skb->len; + iov_index++; + for (frag = 0; frag < nr_frags; frag++) { + skb_frag = &skb_shinfo(skb)->frags[frag]; + iov[iov_index].iov_base = skb_frag_address_safe(skb_frag); + iov[iov_index].iov_len = skb_frag_size(skb_frag); + iov_index++; + } + return iov_index; +drop: + return -1; +} +/* + * Generic vector enqueue with support for forming headers using transport + * specific callback. Allows GRE, L2TPv3, RAW and other transports + * to use a common enqueue procedure in vector mode + */ + +static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) +{ + struct vector_private *vp = netdev_priv(qi->dev); + int queue_depth; + int packet_len; + struct mmsghdr *mmsg_vector = qi->mmsg_vector; + int iov_count; + + spin_lock(&qi->tail_lock); + spin_lock(&qi->head_lock); + queue_depth = qi->queue_depth; + spin_unlock(&qi->head_lock); + + if (skb) + packet_len = skb->len; + + if (queue_depth < qi->max_depth) { + + *(qi->skbuff_vector + qi->tail) = skb; + mmsg_vector += qi->tail; + iov_count = prep_msg( + vp, + skb, + mmsg_vector->msg_hdr.msg_iov + ); + if (iov_count < 1) + goto drop; + mmsg_vector->msg_hdr.msg_iovlen = iov_count; + mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; + mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; + queue_depth = vector_advancetail(qi, 1); + } else + goto drop; + spin_unlock(&qi->tail_lock); + return queue_depth; +drop: + qi->dev->stats.tx_dropped++; + if (skb != NULL) { + packet_len = skb->len; + dev_consume_skb_any(skb); + netdev_completed_queue(qi->dev, 1, packet_len); + } + spin_unlock(&qi->tail_lock); + return queue_depth; +} + +static int consume_vector_skbs(struct vector_queue *qi, int count) +{ + struct sk_buff *skb; + int skb_index; + int bytes_compl = 0; + + for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) { + skb = *(qi->skbuff_vector + skb_index); + /* mark as empty to ensure correct destruction if + * needed + */ + bytes_compl += skb->len; + *(qi->skbuff_vector + skb_index) = NULL; + dev_consume_skb_any(skb); + } + qi->dev->stats.tx_bytes += bytes_compl; + qi->dev->stats.tx_packets += count; + netdev_completed_queue(qi->dev, count, bytes_compl); + return vector_advancehead(qi, count); +} + +/* + * Generic vector deque via sendmmsg with support for forming headers + * using transport specific callback. Allows GRE, L2TPv3, RAW and + * other transports to use a common dequeue procedure in vector mode + */ + + +static int vector_send(struct vector_queue *qi) +{ + struct vector_private *vp = netdev_priv(qi->dev); + struct mmsghdr *send_from; + int result = 0, send_len, queue_depth = qi->max_depth; + + if (spin_trylock(&qi->head_lock)) { + if (spin_trylock(&qi->tail_lock)) { + /* update queue_depth to current value */ + queue_depth = qi->queue_depth; + spin_unlock(&qi->tail_lock); + while (queue_depth > 0) { + /* Calculate the start of the vector */ + send_len = queue_depth; + send_from = qi->mmsg_vector; + send_from += qi->head; + /* Adjust vector size if wraparound */ + if (send_len + qi->head > qi->max_depth) + send_len = qi->max_depth - qi->head; + /* Try to TX as many packets as possible */ + if (send_len > 0) { + result = uml_vector_sendmmsg( + vp->fds->tx_fd, + send_from, + send_len, + 0 + ); + vp->in_write_poll = + (result != send_len); + } + /* For some of the sendmmsg error scenarios + * we may end being unsure in the TX success + * for all packets. It is safer to declare + * them all TX-ed and blame the network. + */ + if (result < 0) { + if (net_ratelimit()) + netdev_err(vp->dev, "sendmmsg err=%i\n", + result); + vp->in_error = true; + result = send_len; + } + if (result > 0) { + queue_depth = + consume_vector_skbs(qi, result); + /* This is equivalent to an TX IRQ. + * Restart the upper layers to feed us + * more packets. + */ + if (result > vp->estats.tx_queue_max) + vp->estats.tx_queue_max = result; + vp->estats.tx_queue_running_average = + (vp->estats.tx_queue_running_average + result) >> 1; + } + netif_wake_queue(qi->dev); + /* if TX is busy, break out of the send loop, + * poll write IRQ will reschedule xmit for us + */ + if (result != send_len) { + vp->estats.tx_restart_queue++; + break; + } + } + } + spin_unlock(&qi->head_lock); + } + return queue_depth; +} + +/* Queue destructor. Deliberately stateless so we can use + * it in queue cleanup if initialization fails. + */ + +static void destroy_queue(struct vector_queue *qi) +{ + int i; + struct iovec *iov; + struct vector_private *vp = netdev_priv(qi->dev); + struct mmsghdr *mmsg_vector; + + if (qi == NULL) + return; + /* deallocate any skbuffs - we rely on any unused to be + * set to NULL. + */ + if (qi->skbuff_vector != NULL) { + for (i = 0; i < qi->max_depth; i++) { + if (*(qi->skbuff_vector + i) != NULL) + dev_kfree_skb_any(*(qi->skbuff_vector + i)); + } + kfree(qi->skbuff_vector); + } + /* deallocate matching IOV structures including header buffs */ + if (qi->mmsg_vector != NULL) { + mmsg_vector = qi->mmsg_vector; + for (i = 0; i < qi->max_depth; i++) { + iov = mmsg_vector->msg_hdr.msg_iov; + if (iov != NULL) { + if ((vp->header_size > 0) && + (iov->iov_base != NULL)) + kfree(iov->iov_base); + kfree(iov); + } + mmsg_vector++; + } + kfree(qi->mmsg_vector); + } + kfree(qi); +} + +/* + * Queue constructor. Create a queue with a given side. + */ +static struct vector_queue *create_queue( + struct vector_private *vp, + int max_size, + int header_size, + int num_extra_frags) +{ + struct vector_queue *result; + int i; + struct iovec *iov; + struct mmsghdr *mmsg_vector; + + result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL); + if (result == NULL) + return NULL; + result->max_depth = max_size; + result->dev = vp->dev; + result->mmsg_vector = kmalloc( + (sizeof(struct mmsghdr) * max_size), GFP_KERNEL); + if (result->mmsg_vector == NULL) + goto out_mmsg_fail; + result->skbuff_vector = kmalloc( + (sizeof(void *) * max_size), GFP_KERNEL); + if (result->skbuff_vector == NULL) + goto out_skb_fail; + + /* further failures can be handled safely by destroy_queue*/ + + mmsg_vector = result->mmsg_vector; + for (i = 0; i < max_size; i++) { + /* Clear all pointers - we use non-NULL as marking on + * what to free on destruction + */ + *(result->skbuff_vector + i) = NULL; + mmsg_vector->msg_hdr.msg_iov = NULL; + mmsg_vector++; + } + mmsg_vector = result->mmsg_vector; + result->max_iov_frags = num_extra_frags; + for (i = 0; i < max_size; i++) { + if (vp->header_size > 0) + iov = kmalloc_array(3 + num_extra_frags, + sizeof(struct iovec), + GFP_KERNEL + ); + else + iov = kmalloc_array(2 + num_extra_frags, + sizeof(struct iovec), + GFP_KERNEL + ); + if (iov == NULL) + goto out_fail; + mmsg_vector->msg_hdr.msg_iov = iov; + mmsg_vector->msg_hdr.msg_iovlen = 1; + mmsg_vector->msg_hdr.msg_control = NULL; + mmsg_vector->msg_hdr.msg_controllen = 0; + mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT; + mmsg_vector->msg_hdr.msg_name = NULL; + mmsg_vector->msg_hdr.msg_namelen = 0; + if (vp->header_size > 0) { + iov->iov_base = kmalloc(header_size, GFP_KERNEL); + if (iov->iov_base == NULL) + goto out_fail; + iov->iov_len = header_size; + mmsg_vector->msg_hdr.msg_iovlen = 2; + iov++; + } + iov->iov_base = NULL; + iov->iov_len = 0; + mmsg_vector++; + } + spin_lock_init(&result->head_lock); + spin_lock_init(&result->tail_lock); + result->queue_depth = 0; + result->head = 0; + result->tail = 0; + return result; +out_skb_fail: + kfree(result->mmsg_vector); +out_mmsg_fail: + kfree(result); + return NULL; +out_fail: + destroy_queue(result); + return NULL; +} + +/* + * We do not use the RX queue as a proper wraparound queue for now + * This is not necessary because the consumption via napi_gro_receive() + * happens in-line. While we can try using the return code of + * netif_rx() for flow control there are no drivers doing this today. + * For this RX specific use we ignore the tail/head locks and + * just read into a prepared queue filled with skbuffs. + */ + +static struct sk_buff *prep_skb( + struct vector_private *vp, + struct user_msghdr *msg) +{ + int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN; + struct sk_buff *result; + int iov_index = 0, len; + struct iovec *iov = msg->msg_iov; + int err, nr_frags, frag; + skb_frag_t *skb_frag; + + if (vp->req_size <= linear) + len = linear; + else + len = vp->req_size; + result = alloc_skb_with_frags( + linear, + len - vp->max_packet, + 3, + &err, + GFP_ATOMIC + ); + if (vp->header_size > 0) + iov_index++; + if (result == NULL) { + iov[iov_index].iov_base = NULL; + iov[iov_index].iov_len = 0; + goto done; + } + skb_reserve(result, vp->headroom); + result->dev = vp->dev; + skb_put(result, vp->max_packet); + result->data_len = len - vp->max_packet; + result->len += len - vp->max_packet; + skb_reset_mac_header(result); + result->ip_summed = CHECKSUM_NONE; + iov[iov_index].iov_base = result->data; + iov[iov_index].iov_len = vp->max_packet; + iov_index++; + + nr_frags = skb_shinfo(result)->nr_frags; + for (frag = 0; frag < nr_frags; frag++) { + skb_frag = &skb_shinfo(result)->frags[frag]; + iov[iov_index].iov_base = skb_frag_address_safe(skb_frag); + if (iov[iov_index].iov_base != NULL) + iov[iov_index].iov_len = skb_frag_size(skb_frag); + else + iov[iov_index].iov_len = 0; + iov_index++; + } +done: + msg->msg_iovlen = iov_index; + return result; +} + + +/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ + +static void prep_queue_for_rx(struct vector_queue *qi) +{ + struct vector_private *vp = netdev_priv(qi->dev); + struct mmsghdr *mmsg_vector = qi->mmsg_vector; + void **skbuff_vector = qi->skbuff_vector; + int i; + + if (qi->queue_depth == 0) + return; + for (i = 0; i < qi->queue_depth; i++) { + /* it is OK if allocation fails - recvmmsg with NULL data in + * iov argument still performs an RX, just drops the packet + * This allows us stop faffing around with a "drop buffer" + */ + + *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr); + skbuff_vector++; + mmsg_vector++; + } + qi->queue_depth = 0; +} + +static struct vector_device *find_device(int n) +{ + struct vector_device *device; + struct list_head *ele; + + spin_lock(&vector_devices_lock); + list_for_each(ele, &vector_devices) { + device = list_entry(ele, struct vector_device, list); + if (device->unit == n) + goto out; + } + device = NULL; + out: + spin_unlock(&vector_devices_lock); + return device; +} + +static int vector_parse(char *str, int *index_out, char **str_out, + char **error_out) +{ + int n, len, err; + char *start = str; + + len = strlen(str); + + while ((*str != ':') && (strlen(str) > 1)) + str++; + if (*str != ':') { + *error_out = "Expected ':' after device number"; + return -EINVAL; + } + *str = '\0'; + + err = kstrtouint(start, 0, &n); + if (err < 0) { + *error_out = "Bad device number"; + return err; + } + + str++; + if (find_device(n)) { + *error_out = "Device already configured"; + return -EINVAL; + } + + *index_out = n; + *str_out = str; + return 0; +} + +static int vector_config(char *str, char **error_out) +{ + int err, n; + char *params; + struct arglist *parsed; + + err = vector_parse(str, &n, ¶ms, error_out); + if (err != 0) + return err; + + /* This string is broken up and the pieces used by the underlying + * driver. We should copy it to make sure things do not go wrong + * later. + */ + + params = kstrdup(params, GFP_KERNEL); + if (params == NULL) { + *error_out = "vector_config failed to strdup string"; + return -ENOMEM; + } + + parsed = uml_parse_vector_ifspec(params); + + if (parsed == NULL) { + *error_out = "vector_config failed to parse parameters"; + kfree(params); + return -EINVAL; + } + + vector_eth_configure(n, parsed); + return 0; +} + +static int vector_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if ((*end != '\0') || (end == *str)) + return -1; + + *start_out = n; + *end_out = n; + *str = end; + return n; +} + +static int vector_remove(int n, char **error_out) +{ + struct vector_device *vec_d; + struct net_device *dev; + struct vector_private *vp; + + vec_d = find_device(n); + if (vec_d == NULL) + return -ENODEV; + dev = vec_d->dev; + vp = netdev_priv(dev); + if (vp->fds != NULL) + return -EBUSY; + unregister_netdev(dev); + platform_device_unregister(&vec_d->pdev); + return 0; +} + +/* + * There is no shared per-transport initialization code, so + * we will just initialize each interface one by one and + * add them to a list + */ + +static struct platform_driver uml_net_driver = { + .driver = { + .name = DRIVER_NAME, + }, +}; + + +static void vector_device_release(struct device *dev) +{ + struct vector_device *device = dev_get_drvdata(dev); + struct net_device *netdev = device->dev; + + list_del(&device->list); + kfree(device); + free_netdev(netdev); +} + +/* Bog standard recv using recvmsg - not used normally unless the user + * explicitly specifies not to use recvmmsg vector RX. + */ + +static int vector_legacy_rx(struct vector_private *vp) +{ + int pkt_len; + struct user_msghdr hdr; + struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */ + int iovpos = 0; + struct sk_buff *skb; + int header_check; + + hdr.msg_name = NULL; + hdr.msg_namelen = 0; + hdr.msg_iov = (struct iovec *) &iov; + hdr.msg_control = NULL; + hdr.msg_controllen = 0; + hdr.msg_flags = 0; + + if (vp->header_size > 0) { + iov[0].iov_base = vp->header_rxbuffer; + iov[0].iov_len = vp->header_size; + } + + skb = prep_skb(vp, &hdr); + + if (skb == NULL) { + /* Read a packet into drop_buffer and don't do + * anything with it. + */ + iov[iovpos].iov_base = drop_buffer; + iov[iovpos].iov_len = DROP_BUFFER_SIZE; + hdr.msg_iovlen = 1; + vp->dev->stats.rx_dropped++; + } + + pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0); + if (pkt_len < 0) { + vp->in_error = true; + return pkt_len; + } + + if (skb != NULL) { + if (pkt_len > vp->header_size) { + if (vp->header_size > 0) { + header_check = vp->verify_header( + vp->header_rxbuffer, skb, vp); + if (header_check < 0) { + dev_kfree_skb_irq(skb); + vp->dev->stats.rx_dropped++; + vp->estats.rx_encaps_errors++; + return 0; + } + if (header_check > 0) { + vp->estats.rx_csum_offload_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + pskb_trim(skb, pkt_len - vp->rx_header_size); + skb->protocol = eth_type_trans(skb, skb->dev); + vp->dev->stats.rx_bytes += skb->len; + vp->dev->stats.rx_packets++; + napi_gro_receive(&vp->napi, skb); + } else { + dev_kfree_skb_irq(skb); + } + } + return pkt_len; +} + +/* + * Packet at a time TX which falls back to vector TX if the + * underlying transport is busy. + */ + + + +static int writev_tx(struct vector_private *vp, struct sk_buff *skb) +{ + struct iovec iov[3 + MAX_IOV_SIZE]; + int iov_count, pkt_len = 0; + + iov[0].iov_base = vp->header_txbuffer; + iov_count = prep_msg(vp, skb, (struct iovec *) &iov); + + if (iov_count < 1) + goto drop; + + pkt_len = uml_vector_writev( + vp->fds->tx_fd, + (struct iovec *) &iov, + iov_count + ); + + if (pkt_len < 0) + goto drop; + + netif_trans_update(vp->dev); + netif_wake_queue(vp->dev); + + if (pkt_len > 0) { + vp->dev->stats.tx_bytes += skb->len; + vp->dev->stats.tx_packets++; + } else { + vp->dev->stats.tx_dropped++; + } + consume_skb(skb); + return pkt_len; +drop: + vp->dev->stats.tx_dropped++; + consume_skb(skb); + if (pkt_len < 0) + vp->in_error = true; + return pkt_len; +} + +/* + * Receive as many messages as we can in one call using the special + * mmsg vector matched to an skb vector which we prepared earlier. + */ + +static int vector_mmsg_rx(struct vector_private *vp, int budget) +{ + int packet_count, i; + struct vector_queue *qi = vp->rx_queue; + struct sk_buff *skb; + struct mmsghdr *mmsg_vector = qi->mmsg_vector; + void **skbuff_vector = qi->skbuff_vector; + int header_check; + + /* Refresh the vector and make sure it is with new skbs and the + * iovs are updated to point to them. + */ + + prep_queue_for_rx(qi); + + /* Fire the Lazy Gun - get as many packets as we can in one go. */ + + if (budget > qi->max_depth) + budget = qi->max_depth; + + packet_count = uml_vector_recvmmsg( + vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); + + if (packet_count < 0) + vp->in_error = true; + + if (packet_count <= 0) + return packet_count; + + /* We treat packet processing as enqueue, buffer refresh as dequeue + * The queue_depth tells us how many buffers have been used and how + * many do we need to prep the next time prep_queue_for_rx() is called. + */ + + qi->queue_depth = packet_count; + + for (i = 0; i < packet_count; i++) { + skb = (*skbuff_vector); + if (mmsg_vector->msg_len > vp->header_size) { + if (vp->header_size > 0) { + header_check = vp->verify_header( + mmsg_vector->msg_hdr.msg_iov->iov_base, + skb, + vp + ); + if (header_check < 0) { + /* Overlay header failed to verify - discard. + * We can actually keep this skb and reuse it, + * but that will make the prep logic too + * complex. + */ + dev_kfree_skb_irq(skb); + vp->estats.rx_encaps_errors++; + continue; + } + if (header_check > 0) { + vp->estats.rx_csum_offload_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + pskb_trim(skb, + mmsg_vector->msg_len - vp->rx_header_size); + skb->protocol = eth_type_trans(skb, skb->dev); + /* + * We do not need to lock on updating stats here + * The interrupt loop is non-reentrant. + */ + vp->dev->stats.rx_bytes += skb->len; + vp->dev->stats.rx_packets++; + napi_gro_receive(&vp->napi, skb); + } else { + /* Overlay header too short to do anything - discard. + * We can actually keep this skb and reuse it, + * but that will make the prep logic too complex. + */ + if (skb != NULL) + dev_kfree_skb_irq(skb); + } + (*skbuff_vector) = NULL; + /* Move to the next buffer element */ + mmsg_vector++; + skbuff_vector++; + } + if (packet_count > 0) { + if (vp->estats.rx_queue_max < packet_count) + vp->estats.rx_queue_max = packet_count; + vp->estats.rx_queue_running_average = + (vp->estats.rx_queue_running_average + packet_count) >> 1; + } + return packet_count; +} + +static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + int queue_depth = 0; + + if (vp->in_error) { + deactivate_fd(vp->fds->rx_fd, vp->rx_irq); + if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0)) + deactivate_fd(vp->fds->tx_fd, vp->tx_irq); + return NETDEV_TX_BUSY; + } + + if ((vp->options & VECTOR_TX) == 0) { + writev_tx(vp, skb); + return NETDEV_TX_OK; + } + + /* We do BQL only in the vector path, no point doing it in + * packet at a time mode as there is no device queue + */ + + netdev_sent_queue(vp->dev, skb->len); + queue_depth = vector_enqueue(vp->tx_queue, skb); + + if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) { + mod_timer(&vp->tl, vp->coalesce); + return NETDEV_TX_OK; + } else { + queue_depth = vector_send(vp->tx_queue); + if (queue_depth > 0) + napi_schedule(&vp->napi); + } + + return NETDEV_TX_OK; +} + +static irqreturn_t vector_rx_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct vector_private *vp = netdev_priv(dev); + + if (!netif_running(dev)) + return IRQ_NONE; + napi_schedule(&vp->napi); + return IRQ_HANDLED; + +} + +static irqreturn_t vector_tx_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct vector_private *vp = netdev_priv(dev); + + if (!netif_running(dev)) + return IRQ_NONE; + /* We need to pay attention to it only if we got + * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise + * we ignore it. In the future, it may be worth + * it to improve the IRQ controller a bit to make + * tweaking the IRQ mask less costly + */ + + napi_schedule(&vp->napi); + return IRQ_HANDLED; + +} + +static int irq_rr; + +static int vector_net_close(struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + unsigned long flags; + + netif_stop_queue(dev); + del_timer(&vp->tl); + + if (vp->fds == NULL) + return 0; + + /* Disable and free all IRQS */ + if (vp->rx_irq > 0) { + um_free_irq(vp->rx_irq, dev); + vp->rx_irq = 0; + } + if (vp->tx_irq > 0) { + um_free_irq(vp->tx_irq, dev); + vp->tx_irq = 0; + } + napi_disable(&vp->napi); + netif_napi_del(&vp->napi); + if (vp->fds->rx_fd > 0) { + if (vp->bpf) + uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); + os_close_file(vp->fds->rx_fd); + vp->fds->rx_fd = -1; + } + if (vp->fds->tx_fd > 0) { + os_close_file(vp->fds->tx_fd); + vp->fds->tx_fd = -1; + } + if (vp->bpf != NULL) + kfree(vp->bpf->filter); + kfree(vp->bpf); + vp->bpf = NULL; + kfree(vp->fds->remote_addr); + kfree(vp->transport_data); + kfree(vp->header_rxbuffer); + kfree(vp->header_txbuffer); + if (vp->rx_queue != NULL) + destroy_queue(vp->rx_queue); + if (vp->tx_queue != NULL) + destroy_queue(vp->tx_queue); + kfree(vp->fds); + vp->fds = NULL; + spin_lock_irqsave(&vp->lock, flags); + vp->opened = false; + vp->in_error = false; + spin_unlock_irqrestore(&vp->lock, flags); + return 0; +} + +static int vector_poll(struct napi_struct *napi, int budget) +{ + struct vector_private *vp = container_of(napi, struct vector_private, napi); + int work_done = 0; + int err; + bool tx_enqueued = false; + + if ((vp->options & VECTOR_TX) != 0) + tx_enqueued = (vector_send(vp->tx_queue) > 0); + if ((vp->options & VECTOR_RX) > 0) + err = vector_mmsg_rx(vp, budget); + else { + err = vector_legacy_rx(vp); + if (err > 0) + err = 1; + } + if (err > 0) + work_done += err; + + if (tx_enqueued || err > 0) + napi_schedule(napi); + if (work_done < budget) + napi_complete_done(napi, work_done); + return work_done; +} + +static void vector_reset_tx(struct work_struct *work) +{ + struct vector_private *vp = + container_of(work, struct vector_private, reset_tx); + netdev_reset_queue(vp->dev); + netif_start_queue(vp->dev); + netif_wake_queue(vp->dev); +} + +static int vector_net_open(struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + unsigned long flags; + int err = -EINVAL; + struct vector_device *vdevice; + + spin_lock_irqsave(&vp->lock, flags); + if (vp->opened) { + spin_unlock_irqrestore(&vp->lock, flags); + return -ENXIO; + } + vp->opened = true; + spin_unlock_irqrestore(&vp->lock, flags); + + vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed)); + + vp->fds = uml_vector_user_open(vp->unit, vp->parsed); + + if (vp->fds == NULL) + goto out_close; + + if (build_transport_data(vp) < 0) + goto out_close; + + if ((vp->options & VECTOR_RX) > 0) { + vp->rx_queue = create_queue( + vp, + get_depth(vp->parsed), + vp->rx_header_size, + MAX_IOV_SIZE + ); + vp->rx_queue->queue_depth = get_depth(vp->parsed); + } else { + vp->header_rxbuffer = kmalloc( + vp->rx_header_size, + GFP_KERNEL + ); + if (vp->header_rxbuffer == NULL) + goto out_close; + } + if ((vp->options & VECTOR_TX) > 0) { + vp->tx_queue = create_queue( + vp, + get_depth(vp->parsed), + vp->header_size, + MAX_IOV_SIZE + ); + } else { + vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL); + if (vp->header_txbuffer == NULL) + goto out_close; + } + + netif_napi_add_weight(vp->dev, &vp->napi, vector_poll, + get_depth(vp->parsed)); + napi_enable(&vp->napi); + + /* READ IRQ */ + err = um_request_irq( + irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, + IRQ_READ, vector_rx_interrupt, + IRQF_SHARED, dev->name, dev); + if (err < 0) { + netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err); + err = -ENETUNREACH; + goto out_close; + } + vp->rx_irq = irq_rr + VECTOR_BASE_IRQ; + dev->irq = irq_rr + VECTOR_BASE_IRQ; + irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; + + /* WRITE IRQ - we need it only if we have vector TX */ + if ((vp->options & VECTOR_TX) > 0) { + err = um_request_irq( + irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd, + IRQ_WRITE, vector_tx_interrupt, + IRQF_SHARED, dev->name, dev); + if (err < 0) { + netdev_err(dev, + "vector_open: failed to get tx irq(%d)\n", err); + err = -ENETUNREACH; + goto out_close; + } + vp->tx_irq = irq_rr + VECTOR_BASE_IRQ; + irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; + } + + if ((vp->options & VECTOR_QDISC_BYPASS) != 0) { + if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd)) + vp->options |= VECTOR_BPF; + } + if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL)) + vp->bpf = uml_vector_default_bpf(dev->dev_addr); + + if (vp->bpf != NULL) + uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); + + netif_start_queue(dev); + vector_reset_stats(vp); + + /* clear buffer - it can happen that the host side of the interface + * is full when we get here. In this case, new data is never queued, + * SIGIOs never arrive, and the net never works. + */ + + napi_schedule(&vp->napi); + + vdevice = find_device(vp->unit); + vdevice->opened = 1; + + if ((vp->options & VECTOR_TX) != 0) + add_timer(&vp->tl); + return 0; +out_close: + vector_net_close(dev); + return err; +} + + +static void vector_net_set_multicast_list(struct net_device *dev) +{ + /* TODO: - we can do some BPF games here */ + return; +} + +static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct vector_private *vp = netdev_priv(dev); + + vp->estats.tx_timeout_count++; + netif_trans_update(dev); + schedule_work(&vp->reset_tx); +} + +static netdev_features_t vector_fix_features(struct net_device *dev, + netdev_features_t features) +{ + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + return features; +} + +static int vector_set_features(struct net_device *dev, + netdev_features_t features) +{ + struct vector_private *vp = netdev_priv(dev); + /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is + * no way to negotiate it on raw sockets, so we can change + * only our side. + */ + if (features & NETIF_F_GRO) + /* All new frame buffers will be GRO-sized */ + vp->req_size = 65536; + else + /* All new frame buffers will be normal sized */ + vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN; + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void vector_net_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + vector_rx_interrupt(dev->irq, dev); + enable_irq(dev->irq); +} +#endif + +static void vector_net_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); +} + +static int vector_net_load_bpf_flash(struct net_device *dev, + struct ethtool_flash *efl) +{ + struct vector_private *vp = netdev_priv(dev); + struct vector_device *vdevice; + const struct firmware *fw; + int result = 0; + + if (!(vp->options & VECTOR_BPF_FLASH)) { + netdev_err(dev, "loading firmware not permitted: %s\n", efl->data); + return -1; + } + + spin_lock(&vp->lock); + + if (vp->bpf != NULL) { + if (vp->opened) + uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); + kfree(vp->bpf->filter); + vp->bpf->filter = NULL; + } else { + vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC); + if (vp->bpf == NULL) { + netdev_err(dev, "failed to allocate memory for firmware\n"); + goto flash_fail; + } + } + + vdevice = find_device(vp->unit); + + if (request_firmware(&fw, efl->data, &vdevice->pdev.dev)) + goto flash_fail; + + vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC); + if (!vp->bpf->filter) + goto free_buffer; + + vp->bpf->len = fw->size / sizeof(struct sock_filter); + release_firmware(fw); + + if (vp->opened) + result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); + + spin_unlock(&vp->lock); + + return result; + +free_buffer: + release_firmware(fw); + +flash_fail: + spin_unlock(&vp->lock); + if (vp->bpf != NULL) + kfree(vp->bpf->filter); + kfree(vp->bpf); + vp->bpf = NULL; + return -1; +} + +static void vector_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct vector_private *vp = netdev_priv(netdev); + + ring->rx_max_pending = vp->rx_queue->max_depth; + ring->tx_max_pending = vp->tx_queue->max_depth; + ring->rx_pending = vp->rx_queue->max_depth; + ring->tx_pending = vp->tx_queue->max_depth; +} + +static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch (stringset) { + case ETH_SS_TEST: + *buf = '\0'; + break; + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + default: + WARN_ON(1); + break; + } +} + +static int vector_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_TEST: + return 0; + case ETH_SS_STATS: + return VECTOR_NUM_STATS; + default: + return -EOPNOTSUPP; + } +} + +static void vector_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *estats, + u64 *tmp_stats) +{ + struct vector_private *vp = netdev_priv(dev); + + memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); +} + +static int vector_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct vector_private *vp = netdev_priv(netdev); + + ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ; + return 0; +} + +static int vector_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct vector_private *vp = netdev_priv(netdev); + + vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000; + if (vp->coalesce == 0) + vp->coalesce = 1; + return 0; +} + +static const struct ethtool_ops vector_net_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS, + .get_drvinfo = vector_net_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = ethtool_op_get_ts_info, + .get_ringparam = vector_get_ringparam, + .get_strings = vector_get_strings, + .get_sset_count = vector_get_sset_count, + .get_ethtool_stats = vector_get_ethtool_stats, + .get_coalesce = vector_get_coalesce, + .set_coalesce = vector_set_coalesce, + .flash_device = vector_net_load_bpf_flash, +}; + + +static const struct net_device_ops vector_netdev_ops = { + .ndo_open = vector_net_open, + .ndo_stop = vector_net_close, + .ndo_start_xmit = vector_net_start_xmit, + .ndo_set_rx_mode = vector_net_set_multicast_list, + .ndo_tx_timeout = vector_net_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_fix_features = vector_fix_features, + .ndo_set_features = vector_set_features, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = vector_net_poll_controller, +#endif +}; + +static void vector_timer_expire(struct timer_list *t) +{ + struct vector_private *vp = from_timer(vp, t, tl); + + vp->estats.tx_kicks++; + napi_schedule(&vp->napi); +} + + + +static void vector_eth_configure( + int n, + struct arglist *def + ) +{ + struct vector_device *device; + struct net_device *dev; + struct vector_private *vp; + int err; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (device == NULL) { + printk(KERN_ERR "eth_configure failed to allocate struct " + "vector_device\n"); + return; + } + dev = alloc_etherdev(sizeof(struct vector_private)); + if (dev == NULL) { + printk(KERN_ERR "eth_configure: failed to allocate struct " + "net_device for vec%d\n", n); + goto out_free_device; + } + + dev->mtu = get_mtu(def); + + INIT_LIST_HEAD(&device->list); + device->unit = n; + + /* If this name ends up conflicting with an existing registered + * netdevice, that is OK, register_netdev{,ice}() will notice this + * and fail. + */ + snprintf(dev->name, sizeof(dev->name), "vec%d", n); + uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); + vp = netdev_priv(dev); + + /* sysfs register */ + if (!driver_registered) { + platform_driver_register(¨_net_driver); + driver_registered = 1; + } + device->pdev.id = n; + device->pdev.name = DRIVER_NAME; + device->pdev.dev.release = vector_device_release; + dev_set_drvdata(&device->pdev.dev, device); + if (platform_device_register(&device->pdev)) + goto out_free_netdev; + SET_NETDEV_DEV(dev, &device->pdev.dev); + + device->dev = dev; + + *vp = ((struct vector_private) + { + .list = LIST_HEAD_INIT(vp->list), + .dev = dev, + .unit = n, + .options = get_transport_options(def), + .rx_irq = 0, + .tx_irq = 0, + .parsed = def, + .max_packet = get_mtu(def) + ETH_HEADER_OTHER, + /* TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + .headroom = get_headroom(def), + .form_header = NULL, + .verify_header = NULL, + .header_rxbuffer = NULL, + .header_txbuffer = NULL, + .header_size = 0, + .rx_header_size = 0, + .rexmit_scheduled = false, + .opened = false, + .transport_data = NULL, + .in_write_poll = false, + .coalesce = 2, + .req_size = get_req_size(def), + .in_error = false, + .bpf = NULL + }); + + dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); + INIT_WORK(&vp->reset_tx, vector_reset_tx); + + timer_setup(&vp->tl, vector_timer_expire, 0); + spin_lock_init(&vp->lock); + + /* FIXME */ + dev->netdev_ops = &vector_netdev_ops; + dev->ethtool_ops = &vector_net_ethtool_ops; + dev->watchdog_timeo = (HZ >> 1); + /* primary IRQ - fixme */ + dev->irq = 0; /* we will adjust this once opened */ + + rtnl_lock(); + err = register_netdevice(dev); + rtnl_unlock(); + if (err) + goto out_undo_user_init; + + spin_lock(&vector_devices_lock); + list_add(&device->list, &vector_devices); + spin_unlock(&vector_devices_lock); + + return; + +out_undo_user_init: + return; +out_free_netdev: + free_netdev(dev); +out_free_device: + kfree(device); +} + + + + +/* + * Invoked late in the init + */ + +static int __init vector_init(void) +{ + struct list_head *ele; + struct vector_cmd_line_arg *def; + struct arglist *parsed; + + list_for_each(ele, &vec_cmd_line) { + def = list_entry(ele, struct vector_cmd_line_arg, list); + parsed = uml_parse_vector_ifspec(def->arguments); + if (parsed != NULL) + vector_eth_configure(def->unit, parsed); + } + return 0; +} + + +/* Invoked at initial argument parsing, only stores + * arguments until a proper vector_init is called + * later + */ + +static int __init vector_setup(char *str) +{ + char *error; + int n, err; + struct vector_cmd_line_arg *new; + + err = vector_parse(str, &n, &str, &error); + if (err) { + printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n", + str, error); + return 1; + } + new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); + if (!new) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*new)); + INIT_LIST_HEAD(&new->list); + new->unit = n; + new->arguments = str; + list_add_tail(&new->list, &vec_cmd_line); + return 1; +} + +__setup("vec", vector_setup); +__uml_help(vector_setup, +"vec[0-9]+:<option>=<value>,<option>=<value>\n" +" Configure a vector io network device.\n\n" +); + +late_initcall(vector_init); + +static struct mc_device vector_mc = { + .list = LIST_HEAD_INIT(vector_mc.list), + .name = "vec", + .config = vector_config, + .get_config = NULL, + .id = vector_id, + .remove = vector_remove, +}; + +#ifdef CONFIG_INET +static int vector_inetaddr_event( + struct notifier_block *this, + unsigned long event, + void *ptr) +{ + return NOTIFY_DONE; +} + +static struct notifier_block vector_inetaddr_notifier = { + .notifier_call = vector_inetaddr_event, +}; + +static void inet_register(void) +{ + register_inetaddr_notifier(&vector_inetaddr_notifier); +} +#else +static inline void inet_register(void) +{ +} +#endif + +static int vector_net_init(void) +{ + mconsole_register_dev(&vector_mc); + inet_register(); + return 0; +} + +__initcall(vector_net_init); + + + diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h new file mode 100644 index 0000000000..2a1fa8e0f3 --- /dev/null +++ b/arch/um/drivers/vector_kern.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_VECTOR_KERN_H +#define __UM_VECTOR_KERN_H + +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/list.h> +#include <linux/ctype.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> + +#include "vector_user.h" + +/* Queue structure specially adapted for multiple enqueue/dequeue + * in a mmsgrecv/mmsgsend context + */ + +/* Dequeue method */ + +#define QUEUE_SENDMSG 0 +#define QUEUE_SENDMMSG 1 + +#define VECTOR_RX 1 +#define VECTOR_TX (1 << 1) +#define VECTOR_BPF (1 << 2) +#define VECTOR_QDISC_BYPASS (1 << 3) +#define VECTOR_BPF_FLASH (1 << 4) + +#define ETH_MAX_PACKET 1500 +#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */ + +#define MAX_FILTER_PROG (2 << 16) + +struct vector_queue { + struct mmsghdr *mmsg_vector; + void **skbuff_vector; + /* backlink to device which owns us */ + struct net_device *dev; + spinlock_t head_lock; + spinlock_t tail_lock; + int queue_depth, head, tail, max_depth, max_iov_frags; + short options; +}; + +struct vector_estats { + uint64_t rx_queue_max; + uint64_t rx_queue_running_average; + uint64_t tx_queue_max; + uint64_t tx_queue_running_average; + uint64_t rx_encaps_errors; + uint64_t tx_timeout_count; + uint64_t tx_restart_queue; + uint64_t tx_kicks; + uint64_t tx_flow_control_xon; + uint64_t tx_flow_control_xoff; + uint64_t rx_csum_offload_good; + uint64_t rx_csum_offload_errors; + uint64_t sg_ok; + uint64_t sg_linearized; +}; + +#define VERIFY_HEADER_NOK -1 +#define VERIFY_HEADER_OK 0 +#define VERIFY_CSUM_OK 1 + +struct vector_private { + struct list_head list; + spinlock_t lock; + struct net_device *dev; + struct napi_struct napi ____cacheline_aligned; + + int unit; + + /* Timeout timer in TX */ + + struct timer_list tl; + + /* Scheduled "remove device" work */ + struct work_struct reset_tx; + struct vector_fds *fds; + + struct vector_queue *rx_queue; + struct vector_queue *tx_queue; + + int rx_irq; + int tx_irq; + + struct arglist *parsed; + + void *transport_data; /* transport specific params if needed */ + + int max_packet; + int req_size; /* different from max packet - used for TSO */ + int headroom; + + int options; + + /* remote address if any - some transports will leave this as null */ + + int header_size; + int rx_header_size; + int coalesce; + + void *header_rxbuffer; + void *header_txbuffer; + + int (*form_header)(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp); + int (*verify_header)(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp); + + spinlock_t stats_lock; + + bool rexmit_scheduled; + bool opened; + bool in_write_poll; + bool in_error; + + /* guest allowed to use ethtool flash to load bpf */ + bool bpf_via_flash; + + /* ethtool stats */ + + struct vector_estats estats; + struct sock_fprog *bpf; + + char user[]; +}; + +extern int build_transport_data(struct vector_private *vp); + +#endif diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c new file mode 100644 index 0000000000..0794d23f07 --- /dev/null +++ b/arch/um/drivers/vector_transports.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 - Cambridge Greys Limited + * Copyright (C) 2011 - 2014 Cisco Systems Inc + */ + +#include <linux/etherdevice.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <asm/byteorder.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/virtio_net.h> +#include <linux/virtio_net.h> +#include <linux/virtio_byteorder.h> +#include <linux/netdev_features.h> +#include "vector_user.h" +#include "vector_kern.h" + +#define GOOD_LINEAR 512 +#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface" + +struct gre_minimal_header { + uint16_t header; + uint16_t arptype; +}; + + +struct uml_gre_data { + uint32_t rx_key; + uint32_t tx_key; + uint32_t sequence; + + bool ipv6; + bool has_sequence; + bool pin_sequence; + bool checksum; + bool key; + struct gre_minimal_header expected_header; + + uint32_t checksum_offset; + uint32_t key_offset; + uint32_t sequence_offset; + +}; + +struct uml_l2tpv3_data { + uint64_t rx_cookie; + uint64_t tx_cookie; + uint64_t rx_session; + uint64_t tx_session; + uint32_t counter; + + bool udp; + bool ipv6; + bool has_counter; + bool pin_counter; + bool cookie; + bool cookie_is_64; + + uint32_t cookie_offset; + uint32_t session_offset; + uint32_t counter_offset; +}; + +static int l2tpv3_form_header(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp) +{ + struct uml_l2tpv3_data *td = vp->transport_data; + uint32_t *counter; + + if (td->udp) + *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET); + (*(uint32_t *) (header + td->session_offset)) = td->tx_session; + + if (td->cookie) { + if (td->cookie_is_64) + (*(uint64_t *)(header + td->cookie_offset)) = + td->tx_cookie; + else + (*(uint32_t *)(header + td->cookie_offset)) = + td->tx_cookie; + } + if (td->has_counter) { + counter = (uint32_t *)(header + td->counter_offset); + if (td->pin_counter) { + *counter = 0; + } else { + td->counter++; + *counter = cpu_to_be32(td->counter); + } + } + return 0; +} + +static int gre_form_header(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp) +{ + struct uml_gre_data *td = vp->transport_data; + uint32_t *sequence; + *((uint32_t *) header) = *((uint32_t *) &td->expected_header); + if (td->key) + (*(uint32_t *) (header + td->key_offset)) = td->tx_key; + if (td->has_sequence) { + sequence = (uint32_t *)(header + td->sequence_offset); + if (td->pin_sequence) + *sequence = 0; + else + *sequence = cpu_to_be32(++td->sequence); + } + return 0; +} + +static int raw_form_header(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp) +{ + struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header; + + virtio_net_hdr_from_skb( + skb, + vheader, + virtio_legacy_is_little_endian(), + false, + 0 + ); + + return 0; +} + +static int l2tpv3_verify_header( + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) +{ + struct uml_l2tpv3_data *td = vp->transport_data; + uint32_t *session; + uint64_t cookie; + + if ((!td->udp) && (!td->ipv6)) + header += sizeof(struct iphdr) /* fix for ipv4 raw */; + + /* we do not do a strict check for "data" packets as per + * the RFC spec because the pure IP spec does not have + * that anyway. + */ + + if (td->cookie) { + if (td->cookie_is_64) + cookie = *(uint64_t *)(header + td->cookie_offset); + else + cookie = *(uint32_t *)(header + td->cookie_offset); + if (cookie != td->rx_cookie) { + if (net_ratelimit()) + netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id"); + return -1; + } + } + session = (uint32_t *) (header + td->session_offset); + if (*session != td->rx_session) { + if (net_ratelimit()) + netdev_err(vp->dev, "uml_l2tpv3: session mismatch"); + return -1; + } + return 0; +} + +static int gre_verify_header( + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) +{ + + uint32_t key; + struct uml_gre_data *td = vp->transport_data; + + if (!td->ipv6) + header += sizeof(struct iphdr) /* fix for ipv4 raw */; + + if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) { + if (net_ratelimit()) + netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x", + *((uint32_t *) &td->expected_header), + *((uint32_t *) header) + ); + return -1; + } + + if (td->key) { + key = (*(uint32_t *)(header + td->key_offset)); + if (key != td->rx_key) { + if (net_ratelimit()) + netdev_err(vp->dev, "unknown key id %0x, expecting %0x", + key, td->rx_key); + return -1; + } + } + return 0; +} + +static int raw_verify_header( + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) +{ + struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header; + + if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) && + (vp->req_size != 65536)) { + if (net_ratelimit()) + netdev_err( + vp->dev, + GSO_ERROR + ); + } + if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0) + return 1; + + virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian()); + return 0; +} + +static bool get_uint_param( + struct arglist *def, char *param, unsigned int *result) +{ + char *arg = uml_vector_fetch_arg(def, param); + + if (arg != NULL) { + if (kstrtoint(arg, 0, result) == 0) + return true; + } + return false; +} + +static bool get_ulong_param( + struct arglist *def, char *param, unsigned long *result) +{ + char *arg = uml_vector_fetch_arg(def, param); + + if (arg != NULL) { + if (kstrtoul(arg, 0, result) == 0) + return true; + return true; + } + return false; +} + +static int build_gre_transport_data(struct vector_private *vp) +{ + struct uml_gre_data *td; + int temp_int; + int temp_rx; + int temp_tx; + + vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL); + if (vp->transport_data == NULL) + return -ENOMEM; + td = vp->transport_data; + td->sequence = 0; + + td->expected_header.arptype = GRE_IRB; + td->expected_header.header = 0; + + vp->form_header = &gre_form_header; + vp->verify_header = &gre_verify_header; + vp->header_size = 4; + td->key_offset = 4; + td->sequence_offset = 4; + td->checksum_offset = 4; + + td->ipv6 = false; + if (get_uint_param(vp->parsed, "v6", &temp_int)) { + if (temp_int > 0) + td->ipv6 = true; + } + td->key = false; + if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) { + if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) { + td->key = true; + td->expected_header.header |= GRE_MODE_KEY; + td->rx_key = cpu_to_be32(temp_rx); + td->tx_key = cpu_to_be32(temp_tx); + vp->header_size += 4; + td->sequence_offset += 4; + } else { + return -EINVAL; + } + } + + td->sequence = false; + if (get_uint_param(vp->parsed, "sequence", &temp_int)) { + if (temp_int > 0) { + vp->header_size += 4; + td->has_sequence = true; + td->expected_header.header |= GRE_MODE_SEQUENCE; + if (get_uint_param( + vp->parsed, "pin_sequence", &temp_int)) { + if (temp_int > 0) + td->pin_sequence = true; + } + } + } + vp->rx_header_size = vp->header_size; + if (!td->ipv6) + vp->rx_header_size += sizeof(struct iphdr); + return 0; +} + +static int build_l2tpv3_transport_data(struct vector_private *vp) +{ + + struct uml_l2tpv3_data *td; + int temp_int, temp_rxs, temp_txs; + unsigned long temp_rx; + unsigned long temp_tx; + + vp->transport_data = kmalloc( + sizeof(struct uml_l2tpv3_data), GFP_KERNEL); + + if (vp->transport_data == NULL) + return -ENOMEM; + + td = vp->transport_data; + + vp->form_header = &l2tpv3_form_header; + vp->verify_header = &l2tpv3_verify_header; + td->counter = 0; + + vp->header_size = 4; + td->session_offset = 0; + td->cookie_offset = 4; + td->counter_offset = 4; + + + td->ipv6 = false; + if (get_uint_param(vp->parsed, "v6", &temp_int)) { + if (temp_int > 0) + td->ipv6 = true; + } + + if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) { + if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) { + td->tx_session = cpu_to_be32(temp_txs); + td->rx_session = cpu_to_be32(temp_rxs); + } else { + return -EINVAL; + } + } else { + return -EINVAL; + } + + td->cookie_is_64 = false; + if (get_uint_param(vp->parsed, "cookie64", &temp_int)) { + if (temp_int > 0) + td->cookie_is_64 = true; + } + td->cookie = false; + if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) { + if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) { + td->cookie = true; + if (td->cookie_is_64) { + td->rx_cookie = cpu_to_be64(temp_rx); + td->tx_cookie = cpu_to_be64(temp_tx); + vp->header_size += 8; + td->counter_offset += 8; + } else { + td->rx_cookie = cpu_to_be32(temp_rx); + td->tx_cookie = cpu_to_be32(temp_tx); + vp->header_size += 4; + td->counter_offset += 4; + } + } else { + return -EINVAL; + } + } + + td->has_counter = false; + if (get_uint_param(vp->parsed, "counter", &temp_int)) { + if (temp_int > 0) { + td->has_counter = true; + vp->header_size += 4; + if (get_uint_param( + vp->parsed, "pin_counter", &temp_int)) { + if (temp_int > 0) + td->pin_counter = true; + } + } + } + + if (get_uint_param(vp->parsed, "udp", &temp_int)) { + if (temp_int > 0) { + td->udp = true; + vp->header_size += 4; + td->counter_offset += 4; + td->session_offset += 4; + td->cookie_offset += 4; + } + } + + vp->rx_header_size = vp->header_size; + if ((!td->ipv6) && (!td->udp)) + vp->rx_header_size += sizeof(struct iphdr); + + return 0; +} + +static int build_raw_transport_data(struct vector_private *vp) +{ + if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { + if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd)) + return -1; + vp->form_header = &raw_form_header; + vp->verify_header = &raw_verify_header; + vp->header_size = sizeof(struct virtio_net_hdr); + vp->rx_header_size = sizeof(struct virtio_net_hdr); + vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO); + vp->dev->features |= + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_GRO); + netdev_info( + vp->dev, + "raw: using vnet headers for tso and tx/rx checksum" + ); + } + return 0; +} + +static int build_hybrid_transport_data(struct vector_private *vp) +{ + if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { + vp->form_header = &raw_form_header; + vp->verify_header = &raw_verify_header; + vp->header_size = sizeof(struct virtio_net_hdr); + vp->rx_header_size = sizeof(struct virtio_net_hdr); + vp->dev->hw_features |= + (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + vp->dev->features |= + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + netdev_info( + vp->dev, + "tap/raw hybrid: using vnet headers for tso and tx/rx checksum" + ); + } else { + return 0; /* do not try to enable tap too if raw failed */ + } + if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) + return 0; + return -1; +} + +static int build_tap_transport_data(struct vector_private *vp) +{ + /* "Pure" tap uses the same fd for rx and tx */ + if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) { + vp->form_header = &raw_form_header; + vp->verify_header = &raw_verify_header; + vp->header_size = sizeof(struct virtio_net_hdr); + vp->rx_header_size = sizeof(struct virtio_net_hdr); + vp->dev->hw_features |= + (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + vp->dev->features |= + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + netdev_info( + vp->dev, + "tap: using vnet headers for tso and tx/rx checksum" + ); + return 0; + } + return -1; +} + + +static int build_bess_transport_data(struct vector_private *vp) +{ + vp->form_header = NULL; + vp->verify_header = NULL; + vp->header_size = 0; + vp->rx_header_size = 0; + return 0; +} + +int build_transport_data(struct vector_private *vp) +{ + char *transport = uml_vector_fetch_arg(vp->parsed, "transport"); + + if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) + return build_gre_transport_data(vp); + if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) + return build_l2tpv3_transport_data(vp); + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) + return build_raw_transport_data(vp); + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) + return build_tap_transport_data(vp); + if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) + return build_hybrid_transport_data(vp); + if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0) + return build_bess_transport_data(vp); + return 0; +} + diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c new file mode 100644 index 0000000000..c719e1ec46 --- /dev/null +++ b/arch/um/drivers/vector_user.c @@ -0,0 +1,858 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdbool.h> +#include <stdio.h> +#include <unistd.h> +#include <stdarg.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <sys/ioctl.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <arpa/inet.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <netinet/ip.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include <linux/virtio_net.h> +#include <netdb.h> +#include <stdlib.h> +#include <os.h> +#include <limits.h> +#include <um_malloc.h> +#include "vector_user.h" + +#define ID_GRE 0 +#define ID_L2TPV3 1 +#define ID_BESS 2 +#define ID_MAX 2 + +#define TOKEN_IFNAME "ifname" +#define TOKEN_SCRIPT "ifup" + +#define TRANS_RAW "raw" +#define TRANS_RAW_LEN strlen(TRANS_RAW) + +#define TRANS_FD "fd" +#define TRANS_FD_LEN strlen(TRANS_FD) + +#define VNET_HDR_FAIL "could not enable vnet headers on fd %d" +#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" +#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" +#define UNIX_BIND_FAIL "unix_open : could not bind socket err=%i" +#define BPF_ATTACH_FAIL "Failed to attach filter size %d prog %px to %d, err %d\n" +#define BPF_DETACH_FAIL "Failed to detach filter size %d prog %px to %d, err %d\n" + +#define MAX_UN_LEN 107 + +static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static const char *template = "tapXXXXXX"; + +/* This is very ugly and brute force lookup, but it is done + * only once at initialization so not worth doing hashes or + * anything more intelligent + */ + +char *uml_vector_fetch_arg(struct arglist *ifspec, char *token) +{ + int i; + + for (i = 0; i < ifspec->numargs; i++) { + if (strcmp(ifspec->tokens[i], token) == 0) + return ifspec->values[i]; + } + return NULL; + +} + +struct arglist *uml_parse_vector_ifspec(char *arg) +{ + struct arglist *result; + int pos, len; + bool parsing_token = true, next_starts = true; + + if (arg == NULL) + return NULL; + result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL); + if (result == NULL) + return NULL; + result->numargs = 0; + len = strlen(arg); + for (pos = 0; pos < len; pos++) { + if (next_starts) { + if (parsing_token) { + result->tokens[result->numargs] = arg + pos; + } else { + result->values[result->numargs] = arg + pos; + result->numargs++; + } + next_starts = false; + } + if (*(arg + pos) == '=') { + if (parsing_token) + parsing_token = false; + else + goto cleanup; + next_starts = true; + (*(arg + pos)) = '\0'; + } + if (*(arg + pos) == ',') { + parsing_token = true; + next_starts = true; + (*(arg + pos)) = '\0'; + } + } + return result; +cleanup: + printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg); + kfree(result); + return NULL; +} + +/* + * Socket/FD configuration functions. These return an structure + * of rx and tx descriptors to cover cases where these are not + * the same (f.e. read via raw socket and write via tap). + */ + +#define PATH_NET_TUN "/dev/net/tun" + + +static int create_tap_fd(char *iface) +{ + struct ifreq ifr; + int fd = -1; + int err = -ENOMEM, offload; + + fd = open(PATH_NET_TUN, O_RDWR); + if (fd < 0) { + printk(UM_KERN_ERR "uml_tap: failed to open tun device\n"); + goto tap_fd_cleanup; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err != 0) { + printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n"); + goto tap_fd_cleanup; + } + + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; + ioctl(fd, TUNSETOFFLOAD, offload); + return fd; +tap_fd_cleanup: + if (fd >= 0) + os_close_file(fd); + return err; +} + +static int create_raw_fd(char *iface, int flags, int proto) +{ + struct ifreq ifr; + int fd = -1; + struct sockaddr_ll sock; + int err = -ENOMEM; + + fd = socket(AF_PACKET, SOCK_RAW, flags); + if (fd == -1) { + err = -errno; + goto raw_fd_cleanup; + } + memset(&ifr, 0, sizeof(ifr)); + strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); + if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { + err = -errno; + goto raw_fd_cleanup; + } + + sock.sll_family = AF_PACKET; + sock.sll_protocol = htons(proto); + sock.sll_ifindex = ifr.ifr_ifindex; + + if (bind(fd, + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { + err = -errno; + goto raw_fd_cleanup; + } + return fd; +raw_fd_cleanup: + printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); + if (fd >= 0) + os_close_file(fd); + return err; +} + + +static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) +{ + int fd = -1, i; + char *iface; + struct vector_fds *result = NULL; + bool dynamic = false; + char dynamic_ifname[IFNAMSIZ]; + char *argv[] = {NULL, NULL, NULL, NULL}; + + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); + if (iface == NULL) { + dynamic = true; + iface = dynamic_ifname; + srand(getpid()); + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n"); + goto tap_cleanup; + } + result->rx_fd = -1; + result->tx_fd = -1; + result->remote_addr = NULL; + result->remote_addr_size = 0; + + /* TAP */ + do { + if (dynamic) { + strcpy(iface, template); + for (i = 0; i < strlen(iface); i++) { + if (iface[i] == 'X') { + iface[i] = padchar[rand() % strlen(padchar)]; + } + } + } + fd = create_tap_fd(iface); + if ((fd < 0) && (!dynamic)) { + printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n"); + goto tap_cleanup; + } + result->tx_fd = fd; + result->rx_fd = fd; + } while (fd < 0); + + argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT); + if (argv[0]) { + argv[1] = iface; + run_helper(NULL, NULL, argv); + } + + return result; +tap_cleanup: + printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd); + kfree(result); + return NULL; +} + +static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec) +{ + char *iface; + struct vector_fds *result = NULL; + char *argv[] = {NULL, NULL, NULL, NULL}; + + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); + if (iface == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n"); + goto hybrid_cleanup; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n"); + goto hybrid_cleanup; + } + result->rx_fd = -1; + result->tx_fd = -1; + result->remote_addr = NULL; + result->remote_addr_size = 0; + + /* TAP */ + + result->tx_fd = create_tap_fd(iface); + if (result->tx_fd < 0) { + printk(UM_KERN_ERR "uml_tap: failed to create tun interface: %i\n", result->tx_fd); + goto hybrid_cleanup; + } + + /* RAW */ + + result->rx_fd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL); + if (result->rx_fd == -1) { + printk(UM_KERN_ERR + "uml_tap: failed to create paired raw socket: %i\n", result->rx_fd); + goto hybrid_cleanup; + } + + argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT); + if (argv[0]) { + argv[1] = iface; + run_helper(NULL, NULL, argv); + } + return result; +hybrid_cleanup: + printk(UM_KERN_ERR "user_init_hybrid: init failed"); + kfree(result); + return NULL; +} + +static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id) +{ + int fd = -1; + int socktype; + char *src, *dst; + struct vector_fds *result = NULL; + struct sockaddr_un *local_addr = NULL, *remote_addr = NULL; + + src = uml_vector_fetch_arg(ifspec, "src"); + dst = uml_vector_fetch_arg(ifspec, "dst"); + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "unix open:cannot allocate remote addr"); + goto unix_cleanup; + } + remote_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (remote_addr == NULL) { + printk(UM_KERN_ERR "unix open:cannot allocate remote addr"); + goto unix_cleanup; + } + + switch (id) { + case ID_BESS: + socktype = SOCK_SEQPACKET; + if ((src != NULL) && (strlen(src) <= MAX_UN_LEN)) { + local_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (local_addr == NULL) { + printk(UM_KERN_ERR "bess open:cannot allocate local addr"); + goto unix_cleanup; + } + local_addr->sun_family = AF_UNIX; + memcpy(local_addr->sun_path, src, strlen(src) + 1); + } + if ((dst == NULL) || (strlen(dst) > MAX_UN_LEN)) + goto unix_cleanup; + remote_addr->sun_family = AF_UNIX; + memcpy(remote_addr->sun_path, dst, strlen(dst) + 1); + break; + default: + printk(KERN_ERR "Unsupported unix socket type\n"); + return NULL; + } + + fd = socket(AF_UNIX, socktype, 0); + if (fd == -1) { + printk(UM_KERN_ERR + "unix open: could not open socket, error = %d", + -errno + ); + goto unix_cleanup; + } + if (local_addr != NULL) { + if (bind(fd, (struct sockaddr *) local_addr, sizeof(struct sockaddr_un))) { + printk(UM_KERN_ERR UNIX_BIND_FAIL, errno); + goto unix_cleanup; + } + } + switch (id) { + case ID_BESS: + if (connect(fd, (const struct sockaddr *) remote_addr, sizeof(struct sockaddr_un)) < 0) { + printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno); + goto unix_cleanup; + } + break; + } + result->rx_fd = fd; + result->tx_fd = fd; + result->remote_addr_size = sizeof(struct sockaddr_un); + result->remote_addr = remote_addr; + return result; +unix_cleanup: + if (fd >= 0) + os_close_file(fd); + kfree(remote_addr); + kfree(result); + return NULL; +} + +static int strtofd(const char *nptr) +{ + long fd; + char *endptr; + + if (nptr == NULL) + return -1; + + errno = 0; + fd = strtol(nptr, &endptr, 10); + if (nptr == endptr || + errno != 0 || + *endptr != '\0' || + fd < 0 || + fd > INT_MAX) { + return -1; + } + return fd; +} + +static struct vector_fds *user_init_fd_fds(struct arglist *ifspec) +{ + int fd = -1; + char *fdarg = NULL; + struct vector_fds *result = NULL; + + fdarg = uml_vector_fetch_arg(ifspec, "fd"); + fd = strtofd(fdarg); + if (fd == -1) { + printk(UM_KERN_ERR "fd open: bad or missing fd argument"); + goto fd_cleanup; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "fd open: allocation failed"); + goto fd_cleanup; + } + + result->rx_fd = fd; + result->tx_fd = fd; + result->remote_addr_size = 0; + result->remote_addr = NULL; + return result; + +fd_cleanup: + if (fd >= 0) + os_close_file(fd); + kfree(result); + return NULL; +} + +static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) +{ + int rxfd = -1, txfd = -1; + int err = -ENOMEM; + char *iface; + struct vector_fds *result = NULL; + char *argv[] = {NULL, NULL, NULL, NULL}; + + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); + if (iface == NULL) + goto raw_cleanup; + + rxfd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL); + if (rxfd == -1) { + err = -errno; + goto raw_cleanup; + } + txfd = create_raw_fd(iface, 0, ETH_P_IP); /* Turn off RX on this fd */ + if (txfd == -1) { + err = -errno; + goto raw_cleanup; + } + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result != NULL) { + result->rx_fd = rxfd; + result->tx_fd = txfd; + result->remote_addr = NULL; + result->remote_addr_size = 0; + } + argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT); + if (argv[0]) { + argv[1] = iface; + run_helper(NULL, NULL, argv); + } + return result; +raw_cleanup: + printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); + kfree(result); + return NULL; +} + + +bool uml_raw_enable_qdisc_bypass(int fd) +{ + int optval = 1; + + if (setsockopt(fd, + SOL_PACKET, PACKET_QDISC_BYPASS, + &optval, sizeof(optval)) != 0) { + return false; + } + return true; +} + +bool uml_raw_enable_vnet_headers(int fd) +{ + int optval = 1; + + if (setsockopt(fd, + SOL_PACKET, PACKET_VNET_HDR, + &optval, sizeof(optval)) != 0) { + printk(UM_KERN_INFO VNET_HDR_FAIL, fd); + return false; + } + return true; +} +bool uml_tap_enable_vnet_headers(int fd) +{ + unsigned int features; + int len = sizeof(struct virtio_net_hdr); + + if (ioctl(fd, TUNGETFEATURES, &features) == -1) { + printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno)); + return false; + } + if ((features & IFF_VNET_HDR) == 0) { + printk(UM_KERN_INFO "tapraw: No VNET HEADER support"); + return false; + } + ioctl(fd, TUNSETVNETHDRSZ, &len); + return true; +} + +static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id) +{ + int err = -ENOMEM; + int fd = -1, gairet; + struct addrinfo srchints; + struct addrinfo dsthints; + bool v6, udp; + char *value; + char *src, *dst, *srcport, *dstport; + struct addrinfo *gairesult = NULL; + struct vector_fds *result = NULL; + + + value = uml_vector_fetch_arg(ifspec, "v6"); + v6 = false; + udp = false; + if (value != NULL) { + if (strtol((const char *) value, NULL, 10) > 0) + v6 = true; + } + + value = uml_vector_fetch_arg(ifspec, "udp"); + if (value != NULL) { + if (strtol((const char *) value, NULL, 10) > 0) + udp = true; + } + src = uml_vector_fetch_arg(ifspec, "src"); + dst = uml_vector_fetch_arg(ifspec, "dst"); + srcport = uml_vector_fetch_arg(ifspec, "srcport"); + dstport = uml_vector_fetch_arg(ifspec, "dstport"); + + memset(&dsthints, 0, sizeof(dsthints)); + + if (v6) + dsthints.ai_family = AF_INET6; + else + dsthints.ai_family = AF_INET; + + switch (id) { + case ID_GRE: + dsthints.ai_socktype = SOCK_RAW; + dsthints.ai_protocol = IPPROTO_GRE; + break; + case ID_L2TPV3: + if (udp) { + dsthints.ai_socktype = SOCK_DGRAM; + dsthints.ai_protocol = 0; + } else { + dsthints.ai_socktype = SOCK_RAW; + dsthints.ai_protocol = IPPROTO_L2TP; + } + break; + default: + printk(KERN_ERR "Unsupported socket type\n"); + return NULL; + } + memcpy(&srchints, &dsthints, sizeof(struct addrinfo)); + + gairet = getaddrinfo(src, srcport, &dsthints, &gairesult); + if ((gairet != 0) || (gairesult == NULL)) { + printk(UM_KERN_ERR + "socket_open : could not resolve src, error = %s", + gai_strerror(gairet) + ); + return NULL; + } + fd = socket(gairesult->ai_family, + gairesult->ai_socktype, gairesult->ai_protocol); + if (fd == -1) { + printk(UM_KERN_ERR + "socket_open : could not open socket, error = %d", + -errno + ); + goto cleanup; + } + if (bind(fd, + (struct sockaddr *) gairesult->ai_addr, + gairesult->ai_addrlen)) { + printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno); + goto cleanup; + } + + if (gairesult != NULL) + freeaddrinfo(gairesult); + + gairesult = NULL; + + gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult); + if ((gairet != 0) || (gairesult == NULL)) { + printk(UM_KERN_ERR + "socket_open : could not resolve dst, error = %s", + gai_strerror(gairet) + ); + return NULL; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result != NULL) { + result->rx_fd = fd; + result->tx_fd = fd; + result->remote_addr = uml_kmalloc( + gairesult->ai_addrlen, UM_GFP_KERNEL); + if (result->remote_addr == NULL) + goto cleanup; + result->remote_addr_size = gairesult->ai_addrlen; + memcpy( + result->remote_addr, + gairesult->ai_addr, + gairesult->ai_addrlen + ); + } + freeaddrinfo(gairesult); + return result; +cleanup: + if (gairesult != NULL) + freeaddrinfo(gairesult); + printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err); + if (fd >= 0) + os_close_file(fd); + if (result != NULL) { + kfree(result->remote_addr); + kfree(result); + } + return NULL; +} + +struct vector_fds *uml_vector_user_open( + int unit, + struct arglist *parsed +) +{ + char *transport; + + if (parsed == NULL) { + printk(UM_KERN_ERR "no parsed config for unit %d\n", unit); + return NULL; + } + transport = uml_vector_fetch_arg(parsed, "transport"); + if (transport == NULL) { + printk(UM_KERN_ERR "missing transport for unit %d\n", unit); + return NULL; + } + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) + return user_init_raw_fds(parsed); + if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) + return user_init_hybrid_fds(parsed); + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) + return user_init_tap_fds(parsed); + if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) + return user_init_socket_fds(parsed, ID_GRE); + if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) + return user_init_socket_fds(parsed, ID_L2TPV3); + if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0) + return user_init_unix_fds(parsed, ID_BESS); + if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) + return user_init_fd_fds(parsed); + return NULL; +} + + +int uml_vector_sendmsg(int fd, void *hdr, int flags) +{ + int n; + + CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr, flags)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_recvmsg(int fd, void *hdr, int flags) +{ + int n; + struct msghdr *msg = (struct msghdr *) hdr; + + CATCH_EINTR(n = readv(fd, msg->msg_iov, msg->msg_iovlen)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_writev(int fd, void *hdr, int iovcount) +{ + int n; + + CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount)); + if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS))) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_sendmmsg( + int fd, + void *msgvec, + unsigned int vlen, + unsigned int flags) +{ + int n; + + CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags)); + if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS))) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_recvmmsg( + int fd, + void *msgvec, + unsigned int vlen, + unsigned int flags) +{ + int n; + + CATCH_EINTR( + n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} +int uml_vector_attach_bpf(int fd, void *bpf) +{ + struct sock_fprog *prog = bpf; + + int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, sizeof(struct sock_fprog)); + + if (err < 0) + printk(KERN_ERR BPF_ATTACH_FAIL, prog->len, prog->filter, fd, -errno); + return err; +} + +int uml_vector_detach_bpf(int fd, void *bpf) +{ + struct sock_fprog *prog = bpf; + + int err = setsockopt(fd, SOL_SOCKET, SO_DETACH_FILTER, bpf, sizeof(struct sock_fprog)); + if (err < 0) + printk(KERN_ERR BPF_DETACH_FAIL, prog->len, prog->filter, fd, -errno); + return err; +} +void *uml_vector_default_bpf(const void *mac) +{ + struct sock_filter *bpf; + uint32_t *mac1 = (uint32_t *)(mac + 2); + uint16_t *mac2 = (uint16_t *) mac; + struct sock_fprog *bpf_prog; + + bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL); + if (bpf_prog) { + bpf_prog->len = DEFAULT_BPF_LEN; + bpf_prog->filter = NULL; + } else { + return NULL; + } + bpf = uml_kmalloc( + sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL); + if (bpf) { + bpf_prog->filter = bpf; + /* ld [8] */ + bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 }; + /* jeq #0xMAC[2-6] jt 2 jf 5*/ + bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)}; + /* ldh [6] */ + bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 }; + /* jeq #0xMAC[0-1] jt 4 jf 5 */ + bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)}; + /* ret #0 */ + bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 }; + /* ret #0x40000 */ + bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 }; + } else { + kfree(bpf_prog); + bpf_prog = NULL; + } + return bpf_prog; +} + +/* Note - this function requires a valid mac being passed as an arg */ + +void *uml_vector_user_bpf(char *filename) +{ + struct sock_filter *bpf; + struct sock_fprog *bpf_prog; + struct stat statbuf; + int res, ffd = -1; + + if (filename == NULL) + return NULL; + + if (stat(filename, &statbuf) < 0) { + printk(KERN_ERR "Error %d reading bpf file", -errno); + return false; + } + bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL); + if (bpf_prog == NULL) { + printk(KERN_ERR "Failed to allocate bpf prog buffer"); + return NULL; + } + bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter); + bpf_prog->filter = NULL; + ffd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if (ffd < 0) { + printk(KERN_ERR "Error %d opening bpf file", -errno); + goto bpf_failed; + } + bpf = uml_kmalloc(statbuf.st_size, UM_GFP_KERNEL); + if (bpf == NULL) { + printk(KERN_ERR "Failed to allocate bpf buffer"); + goto bpf_failed; + } + bpf_prog->filter = bpf; + res = os_read_file(ffd, bpf, statbuf.st_size); + if (res < statbuf.st_size) { + printk(KERN_ERR "Failed to read bpf program %s, error %d", filename, res); + kfree(bpf); + goto bpf_failed; + } + os_close_file(ffd); + return bpf_prog; +bpf_failed: + if (ffd > 0) + os_close_file(ffd); + kfree(bpf_prog); + return NULL; +} diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h new file mode 100644 index 0000000000..59ed5f9e6e --- /dev/null +++ b/arch/um/drivers/vector_user.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_VECTOR_USER_H +#define __UM_VECTOR_USER_H + +#define MAXVARGS 20 + +#define TOKEN_IFNAME "ifname" + +#define TRANS_RAW "raw" +#define TRANS_RAW_LEN strlen(TRANS_RAW) + +#define TRANS_TAP "tap" +#define TRANS_TAP_LEN strlen(TRANS_TAP) + +#define TRANS_GRE "gre" +#define TRANS_GRE_LEN strlen(TRANS_GRE) + +#define TRANS_L2TPV3 "l2tpv3" +#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3) + +#define TRANS_HYBRID "hybrid" +#define TRANS_HYBRID_LEN strlen(TRANS_HYBRID) + +#define TRANS_BESS "bess" +#define TRANS_BESS_LEN strlen(TRANS_BESS) + +#define DEFAULT_BPF_LEN 6 + +#ifndef IPPROTO_GRE +#define IPPROTO_GRE 0x2F +#endif + +#define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */ +#define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */ +#define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */ +#define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */ + +#define GRE_IRB cpu_to_be16(0x6558) + +#define L2TPV3_DATA_PACKET 0x30000 + +/* IANA-assigned IP protocol ID for L2TPv3 */ + +#ifndef IPPROTO_L2TP +#define IPPROTO_L2TP 0x73 +#endif + +struct arglist { + int numargs; + char *tokens[MAXVARGS]; + char *values[MAXVARGS]; +}; + +/* Separating read and write FDs allows us to have different + * rx and tx method. Example - read tap via raw socket using + * recvmmsg, write using legacy tap write calls + */ + +struct vector_fds { + int rx_fd; + int tx_fd; + void *remote_addr; + int remote_addr_size; +}; + +#define VECTOR_READ 1 + +extern struct arglist *uml_parse_vector_ifspec(char *arg); + +extern struct vector_fds *uml_vector_user_open( + int unit, + struct arglist *parsed +); + +extern char *uml_vector_fetch_arg( + struct arglist *ifspec, + char *token +); + +extern int uml_vector_recvmsg(int fd, void *hdr, int flags); +extern int uml_vector_sendmsg(int fd, void *hdr, int flags); +extern int uml_vector_writev(int fd, void *hdr, int iovcount); +extern int uml_vector_sendmmsg( + int fd, void *msgvec, + unsigned int vlen, + unsigned int flags +); +extern int uml_vector_recvmmsg( + int fd, + void *msgvec, + unsigned int vlen, + unsigned int flags +); +extern void *uml_vector_default_bpf(const void *mac); +extern void *uml_vector_user_bpf(char *filename); +extern int uml_vector_attach_bpf(int fd, void *bpf); +extern int uml_vector_detach_bpf(int fd, void *bpf); +extern bool uml_raw_enable_qdisc_bypass(int fd); +extern bool uml_raw_enable_vnet_headers(int fd); +extern bool uml_tap_enable_vnet_headers(int fd); + + +#endif diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h new file mode 100644 index 0000000000..6f147cd3c9 --- /dev/null +++ b/arch/um/drivers/vhost_user.h @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Vhost-user protocol */ + +#ifndef __VHOST_USER_H__ +#define __VHOST_USER_H__ + +/* Message flags */ +#define VHOST_USER_FLAG_REPLY BIT(2) +#define VHOST_USER_FLAG_NEED_REPLY BIT(3) +/* Feature bits */ +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +/* Protocol feature bits */ +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 +/* Vring state index masks */ +#define VHOST_USER_VRING_INDEX_MASK 0xff +#define VHOST_USER_VRING_POLL_MASK BIT(8) + +/* Supported version */ +#define VHOST_USER_VERSION 1 +/* Supported transport features */ +#define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES) +/* Supported protocol features */ +#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) + +enum vhost_user_request { + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SEND_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, + VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, + VHOST_USER_VRING_KICK = 35, +}; + +enum vhost_user_slave_request { + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, +}; + +struct vhost_user_header { + /* + * Use enum vhost_user_request for outgoing messages, + * uses enum vhost_user_slave_request for incoming ones. + */ + u32 request; + u32 flags; + u32 size; +} __packed; + +struct vhost_user_config { + u32 offset; + u32 size; + u32 flags; + u8 payload[]; /* Variable length */ +} __packed; + +struct vhost_user_vring_state { + u32 index; + u32 num; +} __packed; + +struct vhost_user_vring_addr { + u32 index; + u32 flags; + u64 desc, used, avail, log; +} __packed; + +struct vhost_user_mem_region { + u64 guest_addr; + u64 size; + u64 user_addr; + u64 mmap_offset; +} __packed; + +struct vhost_user_mem_regions { + u32 num; + u32 padding; + struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */ +} __packed; + +union vhost_user_payload { + u64 integer; + struct vhost_user_config config; + struct vhost_user_vring_state vring_state; + struct vhost_user_vring_addr vring_addr; + struct vhost_user_mem_regions mem_regions; +}; + +struct vhost_user_msg { + struct vhost_user_header header; + union vhost_user_payload payload; +} __packed; + +#endif diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c new file mode 100644 index 0000000000..97a37c0629 --- /dev/null +++ b/arch/um/drivers/virt-pci.c @@ -0,0 +1,1082 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/logic_iomem.h> +#include <linux/of_platform.h> +#include <linux/irqdomain.h> +#include <linux/virtio_pcidev.h> +#include <linux/virtio-uml.h> +#include <linux/delay.h> +#include <linux/msi.h> +#include <asm/unaligned.h> +#include <irq_kern.h> + +#define MAX_DEVICES 8 +#define MAX_MSI_VECTORS 32 +#define CFG_SPACE_SIZE 4096 + +/* for MSI-X we have a 32-bit payload */ +#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) +#define NUM_IRQ_MSGS 10 + +#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1)) +#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1) + +struct um_pci_device { + struct virtio_device *vdev; + + /* for now just standard BARs */ + u8 resptr[PCI_STD_NUM_BARS]; + + struct virtqueue *cmd_vq, *irq_vq; + +#define UM_PCI_STAT_WAITING 0 + unsigned long status; + + int irq; + + bool platform; +}; + +struct um_pci_device_reg { + struct um_pci_device *dev; + void __iomem *iomem; +}; + +static struct pci_host_bridge *bridge; +static DEFINE_MUTEX(um_pci_mtx); +static struct um_pci_device *um_pci_platform_device; +static struct um_pci_device_reg um_pci_devices[MAX_DEVICES]; +static struct fwnode_handle *um_pci_fwnode; +static struct irq_domain *um_pci_inner_domain; +static struct irq_domain *um_pci_msi_domain; +static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; + +static unsigned int um_pci_max_delay_us = 40000; +module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644); + +struct um_pci_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; + +static struct um_pci_message_buffer __percpu *um_pci_msg_bufs; + +static int um_pci_send_cmd(struct um_pci_device *dev, + struct virtio_pcidev_msg *cmd, + unsigned int cmd_size, + const void *extra, unsigned int extra_size, + void *out, unsigned int out_size) +{ + struct scatterlist out_sg, extra_sg, in_sg; + struct scatterlist *sgs_list[] = { + [0] = &out_sg, + [1] = extra ? &extra_sg : &in_sg, + [2] = extra ? &in_sg : NULL, + }; + struct um_pci_message_buffer *buf; + int delay_count = 0; + int ret, len; + bool posted; + + if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) + return -EINVAL; + + switch (cmd->op) { + case VIRTIO_PCIDEV_OP_CFG_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: + /* in PCI, writes are posted, so don't wait */ + posted = !out; + WARN_ON(!posted); + break; + default: + posted = false; + break; + } + + buf = get_cpu_var(um_pci_msg_bufs); + if (buf) + memcpy(buf, cmd, cmd_size); + + if (posted) { + u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); + + if (ncmd) { + memcpy(ncmd, cmd, cmd_size); + if (extra) + memcpy(ncmd + cmd_size, extra, extra_size); + cmd = (void *)ncmd; + cmd_size += extra_size; + extra = NULL; + extra_size = 0; + } else { + /* try without allocating memory */ + posted = false; + cmd = (void *)buf; + } + } else { + cmd = (void *)buf; + } + + sg_init_one(&out_sg, cmd, cmd_size); + if (extra) + sg_init_one(&extra_sg, extra, extra_size); + if (out) + sg_init_one(&in_sg, out, out_size); + + /* add to internal virtio queue */ + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, + extra ? 2 : 1, + out ? 1 : 0, + posted ? cmd : HANDLE_NO_FREE(cmd), + GFP_ATOMIC); + if (ret) { + if (posted) + kfree(cmd); + goto out; + } + + if (posted) { + virtqueue_kick(dev->cmd_vq); + ret = 0; + goto out; + } + + /* kick and poll for getting a response on the queue */ + set_bit(UM_PCI_STAT_WAITING, &dev->status); + virtqueue_kick(dev->cmd_vq); + + while (1) { + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); + + if (completed == HANDLE_NO_FREE(cmd)) + break; + + if (completed && !HANDLE_IS_NO_FREE(completed)) + kfree(completed); + + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || + ++delay_count > um_pci_max_delay_us, + "um virt-pci delay: %d", delay_count)) { + ret = -EIO; + break; + } + udelay(1); + } + clear_bit(UM_PCI_STAT_WAITING, &dev->status); + +out: + put_cpu_var(um_pci_msg_bufs); + return ret; +} + +static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, + int size) +{ + struct um_pci_device_reg *reg = priv; + struct um_pci_device *dev = reg->dev; + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_READ, + .size = size, + .addr = offset, + }; + /* buf->data is maximum size - we may only use parts of it */ + struct um_pci_message_buffer *buf; + u8 *data; + unsigned long ret = ULONG_MAX; + size_t bytes = sizeof(buf->data); + + if (!dev) + return ULONG_MAX; + + buf = get_cpu_var(um_pci_msg_bufs); + data = buf->data; + + if (buf) + memset(data, 0xff, bytes); + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid config space read size %d\n", size); + goto out; + } + + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) + goto out; + + switch (size) { + case 1: + ret = data[0]; + break; + case 2: + ret = le16_to_cpup((void *)data); + break; + case 4: + ret = le32_to_cpup((void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + ret = le64_to_cpup((void *)data); + break; +#endif + default: + break; + } + +out: + put_cpu_var(um_pci_msg_bufs); + return ret; +} + +static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + struct um_pci_device_reg *reg = priv; + struct um_pci_device *dev = reg->dev; + struct { + struct virtio_pcidev_msg hdr; + /* maximum size - we may only use parts of it */ + u8 data[8]; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .size = size, + .addr = offset, + }, + }; + + if (!dev) + return; + + switch (size) { + case 1: + msg.data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)msg.data); + break; + case 4: + put_unaligned_le32(val, (void *)msg.data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)msg.data); + break; +#endif + default: + WARN(1, "invalid config space write size %d\n", size); + return; + } + + WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); +} + +static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { + .read = um_pci_cfgspace_read, + .write = um_pci_cfgspace_write, +}; + +static void um_pci_bar_copy_from(void *priv, void *buffer, + unsigned int offset, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_READ, + .bar = *resptr, + .size = size, + .addr = offset, + }; + + memset(buffer, 0xff, size); + + um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); +} + +static unsigned long um_pci_bar_read(void *priv, unsigned int offset, + int size) +{ + /* buf->data is maximum size - we may only use parts of it */ + struct um_pci_message_buffer *buf; + u8 *data; + unsigned long ret = ULONG_MAX; + + buf = get_cpu_var(um_pci_msg_bufs); + data = buf->data; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid config space read size %d\n", size); + goto out; + } + + um_pci_bar_copy_from(priv, data, offset, size); + + switch (size) { + case 1: + ret = data[0]; + break; + case 2: + ret = le16_to_cpup((void *)data); + break; + case 4: + ret = le32_to_cpup((void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + ret = le64_to_cpup((void *)data); + break; +#endif + default: + break; + } + +out: + put_cpu_var(um_pci_msg_bufs); + return ret; +} + +static void um_pci_bar_copy_to(void *priv, unsigned int offset, + const void *buffer, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, + .bar = *resptr, + .size = size, + .addr = offset, + }; + + um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); +} + +static void um_pci_bar_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + default: + WARN(1, "invalid config space write size %d\n", size); + return; + } + + um_pci_bar_copy_to(priv, offset, data, size); +} + +static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + struct { + struct virtio_pcidev_msg hdr; + u8 data; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .bar = *resptr, + .size = size, + .addr = offset, + }, + .data = value, + }; + + um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); +} + +static const struct logic_iomem_ops um_pci_device_bar_ops = { + .read = um_pci_bar_read, + .write = um_pci_bar_write, + .set = um_pci_bar_set, + .copy_from = um_pci_bar_copy_from, + .copy_to = um_pci_bar_copy_to, +}; + +static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn, + int where) +{ + struct um_pci_device_reg *dev; + unsigned int busn = bus->number; + + if (busn > 0) + return NULL; + + /* not allowing functions for now ... */ + if (devfn % 8) + return NULL; + + if (devfn / 8 >= ARRAY_SIZE(um_pci_devices)) + return NULL; + + dev = &um_pci_devices[devfn / 8]; + if (!dev) + return NULL; + + return (void __iomem *)((unsigned long)dev->iomem + where); +} + +static struct pci_ops um_pci_ops = { + .map_bus = um_pci_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +static void um_pci_rescan(void) +{ + pci_lock_rescan_remove(); + pci_rescan_bus(bridge->bus); + pci_unlock_rescan_remove(); +} + +static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) + kfree(buf); + else if (kick) + virtqueue_kick(vq); +} + +static void um_pci_handle_irq_message(struct virtqueue *vq, + struct virtio_pcidev_msg *msg) +{ + struct virtio_device *vdev = vq->vdev; + struct um_pci_device *dev = vdev->priv; + + if (!dev->irq) + return; + + /* we should properly chain interrupts, but on ARCH=um we don't care */ + + switch (msg->op) { + case VIRTIO_PCIDEV_OP_INT: + generic_handle_irq(dev->irq); + break; + case VIRTIO_PCIDEV_OP_MSI: + /* our MSI message is just the interrupt number */ + if (msg->size == sizeof(u32)) + generic_handle_irq(le32_to_cpup((void *)msg->data)); + else + generic_handle_irq(le16_to_cpup((void *)msg->data)); + break; + case VIRTIO_PCIDEV_OP_PME: + /* nothing to do - we already woke up due to the message */ + break; + default: + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); + break; + } +} + +static void um_pci_cmd_vq_cb(struct virtqueue *vq) +{ + struct virtio_device *vdev = vq->vdev; + struct um_pci_device *dev = vdev->priv; + void *cmd; + int len; + + if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) + return; + + while ((cmd = virtqueue_get_buf(vq, &len))) { + if (WARN_ON(HANDLE_IS_NO_FREE(cmd))) + continue; + kfree(cmd); + } +} + +static void um_pci_irq_vq_cb(struct virtqueue *vq) +{ + struct virtio_pcidev_msg *msg; + int len; + + while ((msg = virtqueue_get_buf(vq, &len))) { + if (len >= sizeof(*msg)) + um_pci_handle_irq_message(vq, msg); + + /* recycle the message buffer */ + um_pci_irq_vq_addbuf(vq, msg, true); + } +} + +#ifdef CONFIG_OF +/* Copied from arch/x86/kernel/devicetree.c */ +struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) +{ + struct device_node *np; + + for_each_node_by_type(np, "pci") { + const void *prop; + unsigned int bus_min; + + prop = of_get_property(np, "bus-range", NULL); + if (!prop) + continue; + bus_min = be32_to_cpup(prop); + if (bus->number == bus_min) + return np; + } + return NULL; +} +#endif + +static int um_pci_init_vqs(struct um_pci_device *dev) +{ + struct virtqueue *vqs[2]; + static const char *const names[2] = { "cmd", "irq" }; + vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb }; + int err, i; + + err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL); + if (err) + return err; + + dev->cmd_vq = vqs[0]; + dev->irq_vq = vqs[1]; + + virtio_device_ready(dev->vdev); + + for (i = 0; i < NUM_IRQ_MSGS; i++) { + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); + + if (msg) + um_pci_irq_vq_addbuf(dev->irq_vq, msg, false); + } + + virtqueue_kick(dev->irq_vq); + + return 0; +} + +static void __um_pci_virtio_platform_remove(struct virtio_device *vdev, + struct um_pci_device *dev) +{ + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + + mutex_lock(&um_pci_mtx); + um_pci_platform_device = NULL; + mutex_unlock(&um_pci_mtx); + + kfree(dev); +} + +static int um_pci_virtio_platform_probe(struct virtio_device *vdev, + struct um_pci_device *dev) +{ + int ret; + + dev->platform = true; + + mutex_lock(&um_pci_mtx); + + if (um_pci_platform_device) { + mutex_unlock(&um_pci_mtx); + ret = -EBUSY; + goto out_free; + } + + ret = um_pci_init_vqs(dev); + if (ret) { + mutex_unlock(&um_pci_mtx); + goto out_free; + } + + um_pci_platform_device = dev; + + mutex_unlock(&um_pci_mtx); + + ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); + if (ret) + __um_pci_virtio_platform_remove(vdev, dev); + + return ret; + +out_free: + kfree(dev); + return ret; +} + +static int um_pci_virtio_probe(struct virtio_device *vdev) +{ + struct um_pci_device *dev; + int i, free = -1; + int err = -ENOSPC; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->vdev = vdev; + vdev->priv = dev; + + if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) + return um_pci_virtio_platform_probe(vdev, dev); + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev) + continue; + free = i; + break; + } + + if (free < 0) + goto error; + + err = um_pci_init_vqs(dev); + if (err) + goto error; + + dev->irq = irq_alloc_desc(numa_node_id()); + if (dev->irq < 0) { + err = dev->irq; + goto err_reset; + } + um_pci_devices[free].dev = dev; + vdev->priv = dev; + + mutex_unlock(&um_pci_mtx); + + device_set_wakeup_enable(&vdev->dev, true); + + /* + * In order to do suspend-resume properly, don't allow VQs + * to be suspended. + */ + virtio_uml_set_no_vq_suspend(vdev, true); + + um_pci_rescan(); + return 0; +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +error: + mutex_unlock(&um_pci_mtx); + kfree(dev); + return err; +} + +static void um_pci_virtio_remove(struct virtio_device *vdev) +{ + struct um_pci_device *dev = vdev->priv; + int i; + + if (dev->platform) { + of_platform_depopulate(&vdev->dev); + __um_pci_virtio_platform_remove(vdev, dev); + return; + } + + device_set_wakeup_enable(&vdev->dev, false); + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev != dev) + continue; + + um_pci_devices[i].dev = NULL; + irq_free_desc(dev->irq); + + break; + } + mutex_unlock(&um_pci_mtx); + + if (i < MAX_DEVICES) { + struct pci_dev *pci_dev; + + pci_dev = pci_get_slot(bridge->bus, i); + if (pci_dev) + pci_stop_and_remove_bus_device_locked(pci_dev); + } + + /* Stop all virtqueues */ + virtio_reset_device(vdev); + dev->cmd_vq = NULL; + dev->irq_vq = NULL; + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static struct virtio_device_id id_table[] = { + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver um_pci_virtio_driver = { + .driver.name = "virtio-pci", + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = um_pci_virtio_probe, + .remove = um_pci_virtio_remove, +}; + +static struct resource virt_cfgspace_resource = { + .name = "PCI config space", + .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, + .end = 0xf0000000 - 1, + .flags = IORESOURCE_MEM, +}; + +static long um_pci_map_cfgspace(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE)) + return -EINVAL; + + if (offset / CFG_SPACE_SIZE < MAX_DEVICES) { + *ops = &um_pci_device_cfgspace_ops; + *priv = &um_pci_devices[offset / CFG_SPACE_SIZE]; + return 0; + } + + WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size); + return -ENOENT; +} + +static const struct logic_iomem_region_ops um_pci_cfgspace_ops = { + .map = um_pci_map_cfgspace, +}; + +static struct resource virt_iomem_resource = { + .name = "PCI iomem", + .start = 0xf0000000, + .end = 0xffffffff, + .flags = IORESOURCE_MEM, +}; + +struct um_pci_map_iomem_data { + unsigned long offset; + size_t size; + const struct logic_iomem_ops **ops; + void **priv; + long ret; +}; + +static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data) +{ + struct um_pci_map_iomem_data *data = _data; + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; + struct um_pci_device *dev; + int i; + + if (!reg->dev) + return 0; + + for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) { + struct resource *r = &pdev->resource[i]; + + if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM) + continue; + + /* + * must be the whole or part of the resource, + * not allowed to only overlap + */ + if (data->offset < r->start || data->offset > r->end) + continue; + if (data->offset + data->size - 1 > r->end) + continue; + + dev = reg->dev; + *data->ops = &um_pci_device_bar_ops; + dev->resptr[i] = i; + *data->priv = &dev->resptr[i]; + data->ret = data->offset - r->start; + + /* no need to continue */ + return 1; + } + + return 0; +} + +static long um_pci_map_iomem(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + struct um_pci_map_iomem_data data = { + /* we want the full address here */ + .offset = offset + virt_iomem_resource.start, + .size = size, + .ops = ops, + .priv = priv, + .ret = -ENOENT, + }; + + pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data); + return data.ret; +} + +static const struct logic_iomem_region_ops um_pci_iomem_ops = { + .map = um_pci_map_iomem, +}; + +static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + /* + * This is a very low address and not actually valid 'physical' memory + * in UML, so we can simply map MSI(-X) vectors to there, it cannot be + * legitimately written to by the device in any other way. + * We use the (virtual) IRQ number here as the message to simplify the + * code that receives the message, where for now we simply trust the + * device to send the correct message. + */ + msg->address_hi = 0; + msg->address_lo = 0xa0000; + msg->data = data->irq; +} + +static struct irq_chip um_pci_msi_bottom_irq_chip = { + .name = "UM virtio MSI", + .irq_compose_msi_msg = um_pci_compose_msi_msg, +}; + +static int um_pci_inner_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *args) +{ + unsigned long bit; + + WARN_ON(nr_irqs != 1); + + mutex_lock(&um_pci_mtx); + bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS); + if (bit >= MAX_MSI_VECTORS) { + mutex_unlock(&um_pci_mtx); + return -ENOSPC; + } + + set_bit(bit, um_pci_msi_used); + mutex_unlock(&um_pci_mtx); + + irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip, + domain->host_data, handle_simple_irq, + NULL, NULL); + + return 0; +} + +static void um_pci_inner_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + + mutex_lock(&um_pci_mtx); + + if (!test_bit(d->hwirq, um_pci_msi_used)) + pr_err("trying to free unused MSI#%lu\n", d->hwirq); + else + __clear_bit(d->hwirq, um_pci_msi_used); + + mutex_unlock(&um_pci_mtx); +} + +static const struct irq_domain_ops um_pci_inner_domain_ops = { + .alloc = um_pci_inner_domain_alloc, + .free = um_pci_inner_domain_free, +}; + +static struct irq_chip um_pci_msi_irq_chip = { + .name = "UM virtio PCIe MSI", + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, +}; + +static struct msi_domain_info um_pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | + MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .chip = &um_pci_msi_irq_chip, +}; + +static struct resource busn_resource = { + .name = "PCI busn", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUS, +}; + +static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +{ + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; + + if (WARN_ON(!reg->dev)) + return -EINVAL; + + /* Yes, we map all pins to the same IRQ ... doesn't matter for now. */ + return reg->dev->irq; +} + +void *pci_root_bus_fwnode(struct pci_bus *bus) +{ + return um_pci_fwnode; +} + +static long um_pci_map_platform(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + if (!um_pci_platform_device) + return -ENOENT; + + *ops = &um_pci_device_bar_ops; + *priv = &um_pci_platform_device->resptr[0]; + + return offset; +} + +static const struct logic_iomem_region_ops um_pci_platform_ops = { + .map = um_pci_map_platform, +}; + +static struct resource virt_platform_resource = { + .name = "platform", + .start = 0x10000000, + .end = 0x1fffffff, + .flags = IORESOURCE_MEM, +}; + +static int __init um_pci_init(void) +{ + int err, i; + + WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource, + &um_pci_cfgspace_ops)); + WARN_ON(logic_iomem_add_region(&virt_iomem_resource, + &um_pci_iomem_ops)); + WARN_ON(logic_iomem_add_region(&virt_platform_resource, + &um_pci_platform_ops)); + + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, + "No virtio device ID configured for PCI - no PCI support\n")) + return 0; + + um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer); + if (!um_pci_msg_bufs) + return -ENOMEM; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) { + err = -ENOMEM; + goto free; + } + + um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci"); + if (!um_pci_fwnode) { + err = -ENOMEM; + goto free; + } + + um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS, + MAX_MSI_VECTORS, 0, + &um_pci_inner_domain_ops, NULL); + if (!um_pci_inner_domain) { + err = -ENOMEM; + goto free; + } + + um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode, + &um_pci_msi_domain_info, + um_pci_inner_domain); + if (!um_pci_msi_domain) { + err = -ENOMEM; + goto free; + } + + pci_add_resource(&bridge->windows, &virt_iomem_resource); + pci_add_resource(&bridge->windows, &busn_resource); + bridge->ops = &um_pci_ops; + bridge->map_irq = um_pci_map_irq; + + for (i = 0; i < MAX_DEVICES; i++) { + resource_size_t start; + + start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE; + um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE); + if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) { + err = -ENOMEM; + goto free; + } + } + + err = pci_host_probe(bridge); + if (err) + goto free; + + err = register_virtio_driver(&um_pci_virtio_driver); + if (err) + goto free; + return 0; +free: + if (um_pci_inner_domain) + irq_domain_remove(um_pci_inner_domain); + if (um_pci_fwnode) + irq_domain_free_fwnode(um_pci_fwnode); + if (bridge) { + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); + } + free_percpu(um_pci_msg_bufs); + return err; +} +module_init(um_pci_init); + +static void __exit um_pci_exit(void) +{ + unregister_virtio_driver(&um_pci_virtio_driver); + irq_domain_remove(um_pci_msi_domain); + irq_domain_remove(um_pci_inner_domain); + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); + free_percpu(um_pci_msg_bufs); +} +module_exit(um_pci_exit); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c new file mode 100644 index 0000000000..8adca2000e --- /dev/null +++ b/arch/um/drivers/virtio_uml.c @@ -0,0 +1,1473 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Virtio vhost-user driver + * + * Copyright(c) 2019 Intel Corporation + * + * This driver allows virtio devices to be used over a vhost-user socket. + * + * Guest devices can be instantiated by kernel module or command line + * parameters. One device will be created for each parameter. Syntax: + * + * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>] + * where: + * <socket> := vhost-user socket path to connect + * <virtio_id> := virtio device id (as in virtio_ids.h) + * <platform_id> := (optional) platform device id + * + * example: + * virtio_uml.device=/var/uml.socket:1 + * + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. + */ +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#include <linux/time-internal.h> +#include <linux/virtio-uml.h> +#include <shared/as-layout.h> +#include <irq_kern.h> +#include <init.h> +#include <os.h> +#include "vhost_user.h" + +#define MAX_SUPPORTED_QUEUE_SIZE 256 + +#define to_virtio_uml_device(_vdev) \ + container_of(_vdev, struct virtio_uml_device, vdev) + +struct virtio_uml_platform_data { + u32 virtio_device_id; + const char *socket_path; + struct work_struct conn_broken_wk; + struct platform_device *pdev; +}; + +struct virtio_uml_device { + struct virtio_device vdev; + struct platform_device *pdev; + struct virtio_uml_platform_data *pdata; + + spinlock_t sock_lock; + int sock, req_fd, irq; + u64 features; + u64 protocol_features; + u8 status; + u8 registered:1; + u8 suspended:1; + u8 no_vq_suspend:1; + + u8 config_changed_irq:1; + uint64_t vq_irq_vq_map; + int recv_rc; +}; + +struct virtio_uml_vq_info { + int kick_fd, call_fd; + char name[32]; + bool suspended; +}; + +extern unsigned long long physmem_size, highmem; + +#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) + +/* Vhost-user protocol */ + +static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, + const int *fds, unsigned int fds_num) +{ + int rc; + + do { + rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); + if (rc > 0) { + buf += rc; + len -= rc; + fds = NULL; + fds_num = 0; + } + } while (len && (rc >= 0 || rc == -EINTR)); + + if (rc < 0) + return rc; + return 0; +} + +static int full_read(int fd, void *buf, int len, bool abortable) +{ + int rc; + + if (!len) + return 0; + + do { + rc = os_read_file(fd, buf, len); + if (rc > 0) { + buf += rc; + len -= rc; + } + } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); + + if (rc < 0) + return rc; + if (rc == 0) + return -ECONNRESET; + return 0; +} + +static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) +{ + return full_read(fd, msg, sizeof(msg->header), true); +} + +static int vhost_user_recv(struct virtio_uml_device *vu_dev, + int fd, struct vhost_user_msg *msg, + size_t max_payload_size, bool wait) +{ + size_t size; + int rc; + + /* + * In virtio time-travel mode, we're handling all the vhost-user + * FDs by polling them whenever appropriate. However, we may get + * into a situation where we're sending out an interrupt message + * to a device (e.g. a net device) and need to handle a simulation + * time message while doing so, e.g. one that tells us to update + * our idea of how long we can run without scheduling. + * + * Thus, we need to not just read() from the given fd, but need + * to also handle messages for the simulation time - this function + * does that for us while waiting for the given fd to be readable. + */ + if (wait) + time_travel_wait_readable(fd); + + rc = vhost_user_recv_header(fd, msg); + + if (rc) + return rc; + size = msg->header.size; + if (size > max_payload_size) + return -EPROTO; + return full_read(fd, &msg->payload, size, false); +} + +static void vhost_user_check_reset(struct virtio_uml_device *vu_dev, + int rc) +{ + struct virtio_uml_platform_data *pdata = vu_dev->pdata; + + if (rc != -ECONNRESET) + return; + + if (!vu_dev->registered) + return; + + vu_dev->registered = 0; + + schedule_work(&pdata->conn_broken_wk); +} + +static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, + size_t max_payload_size) +{ + int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, + max_payload_size, true); + + if (rc) { + vhost_user_check_reset(vu_dev, rc); + return rc; + } + + if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) + return -EPROTO; + + return 0; +} + +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, + u64 *value) +{ + struct vhost_user_msg msg; + int rc = vhost_user_recv_resp(vu_dev, &msg, + sizeof(msg.payload.integer)); + + if (rc) + return rc; + if (msg.header.size != sizeof(msg.payload.integer)) + return -EPROTO; + *value = msg.payload.integer; + return 0; +} + +static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, + size_t max_payload_size) +{ + int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, + max_payload_size, false); + + if (rc) + return rc; + + if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != + VHOST_USER_VERSION) + return -EPROTO; + + return 0; +} + +static int vhost_user_send(struct virtio_uml_device *vu_dev, + bool need_response, struct vhost_user_msg *msg, + int *fds, size_t num_fds) +{ + size_t size = sizeof(msg->header) + msg->header.size; + unsigned long flags; + bool request_ack; + int rc; + + msg->header.flags |= VHOST_USER_VERSION; + + /* + * The need_response flag indicates that we already need a response, + * e.g. to read the features. In these cases, don't request an ACK as + * it is meaningless. Also request an ACK only if supported. + */ + request_ack = !need_response; + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) + request_ack = false; + + if (request_ack) + msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; + + spin_lock_irqsave(&vu_dev->sock_lock, flags); + rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); + if (rc < 0) + goto out; + + if (request_ack) { + uint64_t status; + + rc = vhost_user_recv_u64(vu_dev, &status); + if (rc) + goto out; + + if (status) { + vu_err(vu_dev, "slave reports error: %llu\n", status); + rc = -EIO; + goto out; + } + } + +out: + spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + return rc; +} + +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, + bool need_response, u32 request) +{ + struct vhost_user_msg msg = { + .header.request = request, + }; + + return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); +} + +static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, + u32 request, int fd) +{ + struct vhost_user_msg msg = { + .header.request = request, + }; + + return vhost_user_send(vu_dev, false, &msg, &fd, 1); +} + +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, + u32 request, u64 value) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.integer), + .payload.integer = value, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) +{ + return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); +} + +static int vhost_user_get_features(struct virtio_uml_device *vu_dev, + u64 *features) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_FEATURES); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, features); +} + +static int vhost_user_set_features(struct virtio_uml_device *vu_dev, + u64 features) +{ + return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); +} + +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, + u64 *protocol_features) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_PROTOCOL_FEATURES); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, protocol_features); +} + +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, + u64 protocol_features) +{ + return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, + protocol_features); +} + +static void vhost_user_reply(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, int response) +{ + struct vhost_user_msg reply = { + .payload.integer = response, + }; + size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); + int rc; + + reply.header = msg->header; + reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; + reply.header.flags |= VHOST_USER_FLAG_REPLY; + reply.header.size = sizeof(reply.payload.integer); + + rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); + + if (rc) + vu_err(vu_dev, + "sending reply to slave request failed: %d (size %zu)\n", + rc, size); +} + +static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, + struct time_travel_event *ev) +{ + struct virtqueue *vq; + int response = 1; + struct { + struct vhost_user_msg msg; + u8 extra_payload[512]; + } msg; + int rc; + irqreturn_t irq_rc = IRQ_NONE; + + while (1) { + rc = vhost_user_recv_req(vu_dev, &msg.msg, + sizeof(msg.msg.payload) + + sizeof(msg.extra_payload)); + if (rc) + break; + + switch (msg.msg.header.request) { + case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: + vu_dev->config_changed_irq = true; + response = 0; + break; + case VHOST_USER_SLAVE_VRING_CALL: + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vq->index == msg.msg.payload.vring_state.index) { + response = 0; + vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); + break; + } + } + break; + case VHOST_USER_SLAVE_IOTLB_MSG: + /* not supported - VIRTIO_F_ACCESS_PLATFORM */ + case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: + /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ + default: + vu_err(vu_dev, "unexpected slave request %d\n", + msg.msg.header.request); + } + + if (ev && !vu_dev->suspended) + time_travel_add_irq_event(ev); + + if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) + vhost_user_reply(vu_dev, &msg.msg, response); + irq_rc = IRQ_HANDLED; + } + /* mask EAGAIN as we try non-blocking read until socket is empty */ + vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc; + return irq_rc; +} + +static irqreturn_t vu_req_interrupt(int irq, void *data) +{ + struct virtio_uml_device *vu_dev = data; + irqreturn_t ret = IRQ_HANDLED; + + if (!um_irq_timetravel_handler_used()) + ret = vu_req_read_message(vu_dev, NULL); + + if (vu_dev->recv_rc) { + vhost_user_check_reset(vu_dev, vu_dev->recv_rc); + } else if (vu_dev->vq_irq_vq_map) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) + vring_interrupt(0 /* ignored */, vq); + } + vu_dev->vq_irq_vq_map = 0; + } else if (vu_dev->config_changed_irq) { + virtio_config_changed(&vu_dev->vdev); + vu_dev->config_changed_irq = false; + } + + return ret; +} + +static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, + struct time_travel_event *ev) +{ + vu_req_read_message(data, ev); +} + +static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) +{ + int rc, req_fds[2]; + + /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ + rc = os_pipe(req_fds, true, true); + if (rc < 0) + return rc; + vu_dev->req_fd = req_fds[0]; + + rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, + vu_req_interrupt, IRQF_SHARED, + vu_dev->pdev->name, vu_dev, + vu_req_interrupt_comm_handler); + if (rc < 0) + goto err_close; + + vu_dev->irq = rc; + + rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, + req_fds[1]); + if (rc) + goto err_free_irq; + + goto out; + +err_free_irq: + um_free_irq(vu_dev->irq, vu_dev); +err_close: + os_close_file(req_fds[0]); +out: + /* Close unused write end of request fds */ + os_close_file(req_fds[1]); + return rc; +} + +static int vhost_user_init(struct virtio_uml_device *vu_dev) +{ + int rc = vhost_user_set_owner(vu_dev); + + if (rc) + return rc; + rc = vhost_user_get_features(vu_dev, &vu_dev->features); + if (rc) + return rc; + + if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { + rc = vhost_user_get_protocol_features(vu_dev, + &vu_dev->protocol_features); + if (rc) + return rc; + vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; + rc = vhost_user_set_protocol_features(vu_dev, + vu_dev->protocol_features); + if (rc) + return rc; + } + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + rc = vhost_user_init_slave_req(vu_dev); + if (rc) + return rc; + } + + return 0; +} + +static void vhost_user_get_config(struct virtio_uml_device *vu_dev, + u32 offset, void *buf, u32 len) +{ + u32 cfg_size = offset + len; + struct vhost_user_msg *msg; + size_t payload_size = sizeof(msg->payload.config) + cfg_size; + size_t msg_size = sizeof(msg->header) + payload_size; + int rc; + + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) + return; + + msg = kzalloc(msg_size, GFP_KERNEL); + if (!msg) + return; + msg->header.request = VHOST_USER_GET_CONFIG; + msg->header.size = payload_size; + msg->payload.config.offset = 0; + msg->payload.config.size = cfg_size; + + rc = vhost_user_send(vu_dev, true, msg, NULL, 0); + if (rc) { + vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", + rc); + goto free; + } + + rc = vhost_user_recv_resp(vu_dev, msg, msg_size); + if (rc) { + vu_err(vu_dev, + "receiving VHOST_USER_GET_CONFIG response failed: %d\n", + rc); + goto free; + } + + if (msg->header.size != payload_size || + msg->payload.config.size != cfg_size) { + rc = -EPROTO; + vu_err(vu_dev, + "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", + msg->header.size, payload_size, + msg->payload.config.size, cfg_size); + goto free; + } + memcpy(buf, msg->payload.config.payload + offset, len); + +free: + kfree(msg); +} + +static void vhost_user_set_config(struct virtio_uml_device *vu_dev, + u32 offset, const void *buf, u32 len) +{ + struct vhost_user_msg *msg; + size_t payload_size = sizeof(msg->payload.config) + len; + size_t msg_size = sizeof(msg->header) + payload_size; + int rc; + + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) + return; + + msg = kzalloc(msg_size, GFP_KERNEL); + if (!msg) + return; + msg->header.request = VHOST_USER_SET_CONFIG; + msg->header.size = payload_size; + msg->payload.config.offset = offset; + msg->payload.config.size = len; + memcpy(msg->payload.config.payload, buf, len); + + rc = vhost_user_send(vu_dev, false, msg, NULL, 0); + if (rc) + vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", + rc); + + kfree(msg); +} + +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, + struct vhost_user_mem_region *region_out) +{ + unsigned long long mem_offset; + int rc = phys_mapping(addr, &mem_offset); + + if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) + return -EFAULT; + *fd_out = rc; + region_out->guest_addr = addr; + region_out->user_addr = addr; + region_out->size = size; + region_out->mmap_offset = mem_offset; + + /* Ensure mapping is valid for the entire region */ + rc = phys_mapping(addr + size - 1, &mem_offset); + if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", + addr + size - 1, rc, *fd_out)) + return -EFAULT; + return 0; +} + +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) +{ + struct vhost_user_msg msg = { + .header.request = VHOST_USER_SET_MEM_TABLE, + .header.size = sizeof(msg.payload.mem_regions), + .payload.mem_regions.num = 1, + }; + unsigned long reserved = uml_reserved - uml_physmem; + int fds[2]; + int rc; + + /* + * This is a bit tricky, see also the comment with setup_physmem(). + * + * Essentially, setup_physmem() uses a file to mmap() our physmem, + * but the code and data we *already* have is omitted. To us, this + * is no difference, since they both become part of our address + * space and memory consumption. To somebody looking in from the + * outside, however, it is different because the part of our memory + * consumption that's already part of the binary (code/data) is not + * mapped from the file, so it's not visible to another mmap from + * the file descriptor. + * + * Thus, don't advertise this space to the vhost-user slave. This + * means that the slave will likely abort or similar when we give + * it an address from the hidden range, since it's not marked as + * a valid address, but at least that way we detect the issue and + * don't just have the slave read an all-zeroes buffer from the + * shared memory file, or write something there that we can never + * see (depending on the direction of the virtqueue traffic.) + * + * Since we usually don't want to use .text for virtio buffers, + * this effectively means that you cannot use + * 1) global variables, which are in the .bss and not in the shm + * file-backed memory + * 2) the stack in some processes, depending on where they have + * their stack (or maybe only no interrupt stack?) + * + * The stack is already not typically valid for DMA, so this isn't + * much of a restriction, but global variables might be encountered. + * + * It might be possible to fix it by copying around the data that's + * between bss_start and where we map the file now, but it's not + * something that you typically encounter with virtio drivers, so + * it didn't seem worthwhile. + */ + rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, + &fds[0], + &msg.payload.mem_regions.regions[0]); + + if (rc < 0) + return rc; + if (highmem) { + msg.payload.mem_regions.num++; + rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, + &fds[1], &msg.payload.mem_regions.regions[1]); + if (rc < 0) + return rc; + } + + return vhost_user_send(vu_dev, false, &msg, fds, + msg.payload.mem_regions.num); +} + +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, + u32 request, u32 index, u32 num) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.vring_state), + .payload.vring_state.index = index, + .payload.vring_state.num = num, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, + u32 index, u32 num) +{ + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, + index, num); +} + +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, + u32 index, u32 offset) +{ + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, + index, offset); +} + +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, + u32 index, u64 desc, u64 used, u64 avail, + u64 log) +{ + struct vhost_user_msg msg = { + .header.request = VHOST_USER_SET_VRING_ADDR, + .header.size = sizeof(msg.payload.vring_addr), + .payload.vring_addr.index = index, + .payload.vring_addr.desc = desc, + .payload.vring_addr.used = used, + .payload.vring_addr.avail = avail, + .payload.vring_addr.log = log, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, + u32 request, int index, int fd) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.integer), + .payload.integer = index, + }; + + if (index & ~VHOST_USER_VRING_INDEX_MASK) + return -EINVAL; + if (fd < 0) { + msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; + return vhost_user_send(vu_dev, false, &msg, NULL, 0); + } + return vhost_user_send(vu_dev, false, &msg, &fd, 1); +} + +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, + int index, int fd) +{ + return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, + index, fd); +} + +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, + int index, int fd) +{ + return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, + index, fd); +} + +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, + u32 index, bool enable) +{ + if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) + return 0; + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, + index, enable); +} + + +/* Virtio interface */ + +static bool vu_notify(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + const uint64_t n = 1; + int rc; + + if (info->suspended) + return true; + + time_travel_propagate_time(); + + if (info->kick_fd < 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, + vq->index, 0) == 0; + } + + do { + rc = os_write_file(info->kick_fd, &n, sizeof(n)); + } while (rc == -EINTR); + return !WARN(rc != sizeof(n), "write returned %d\n", rc); +} + +static irqreturn_t vu_interrupt(int irq, void *opaque) +{ + struct virtqueue *vq = opaque; + struct virtio_uml_vq_info *info = vq->priv; + uint64_t n; + int rc; + irqreturn_t ret = IRQ_NONE; + + do { + rc = os_read_file(info->call_fd, &n, sizeof(n)); + if (rc == sizeof(n)) + ret |= vring_interrupt(irq, vq); + } while (rc == sizeof(n) || rc == -EINTR); + WARN(rc != -EAGAIN, "read returned %d\n", rc); + return ret; +} + + +static void vu_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vhost_user_get_config(vu_dev, offset, buf, len); +} + +static void vu_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vhost_user_set_config(vu_dev, offset, buf, len); +} + +static u8 vu_get_status(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->status; +} + +static void vu_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vu_dev->status = status; +} + +static void vu_reset(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vu_dev->status = 0; +} + +static void vu_del_vq(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + + if (info->call_fd >= 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + um_free_irq(vu_dev->irq, vq); + os_close_file(info->call_fd); + } + + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); + + vring_del_virtqueue(vq); + kfree(info); +} + +static void vu_del_vqs(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + struct virtqueue *vq, *n; + u64 features; + + /* Note: reverse order as a workaround to a decoding bug in snabb */ + list_for_each_entry_reverse(vq, &vdev->vqs, list) + WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); + + /* Ensure previous messages have been processed */ + WARN_ON(vhost_user_get_features(vu_dev, &features)); + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + vu_del_vq(vq); +} + +static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, + struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + int call_fds[2]; + int rc; + + /* no call FD needed/desired in this case */ + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + info->call_fd = -1; + return 0; + } + + /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ + rc = os_pipe(call_fds, true, true); + if (rc < 0) + return rc; + + info->call_fd = call_fds[0]; + rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (rc < 0) + goto close_both; + + rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); + if (rc) + goto release_irq; + + goto out; + +release_irq: + um_free_irq(vu_dev->irq, vq); +close_both: + os_close_file(call_fds[0]); +out: + /* Close (unused) write end of call fds */ + os_close_file(call_fds[1]); + + return rc; +} + +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, + unsigned index, vq_callback_t *callback, + const char *name, bool ctx) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + struct platform_device *pdev = vu_dev->pdev; + struct virtio_uml_vq_info *info; + struct virtqueue *vq; + int num = MAX_SUPPORTED_QUEUE_SIZE; + int rc; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto error_kzalloc; + } + snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, + pdev->id, name); + + vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, + ctx, vu_notify, callback, info->name); + if (!vq) { + rc = -ENOMEM; + goto error_create; + } + vq->priv = info; + vq->num_max = num; + num = virtqueue_get_vring_size(vq); + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { + info->kick_fd = -1; + } else { + rc = os_eventfd(0, 0); + if (rc < 0) + goto error_kick; + info->kick_fd = rc; + } + + rc = vu_setup_vq_call_fd(vu_dev, vq); + if (rc) + goto error_call; + + rc = vhost_user_set_vring_num(vu_dev, index, num); + if (rc) + goto error_setup; + + rc = vhost_user_set_vring_base(vu_dev, index, 0); + if (rc) + goto error_setup; + + rc = vhost_user_set_vring_addr(vu_dev, index, + virtqueue_get_desc_addr(vq), + virtqueue_get_used_addr(vq), + virtqueue_get_avail_addr(vq), + (u64) -1); + if (rc) + goto error_setup; + + return vq; + +error_setup: + if (info->call_fd >= 0) { + um_free_irq(vu_dev->irq, vq); + os_close_file(info->call_fd); + } +error_call: + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); +error_kick: + vring_del_virtqueue(vq); +error_create: + kfree(info); +error_kzalloc: + return ERR_PTR(rc); +} + +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + int i, queue_idx = 0, rc; + struct virtqueue *vq; + + /* not supported for now */ + if (WARN_ON(nvqs > 64)) + return -EINVAL; + + rc = vhost_user_set_mem_table(vu_dev); + if (rc) + return rc; + + for (i = 0; i < nvqs; ++i) { + if (!names[i]) { + vqs[i] = NULL; + continue; + } + + vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], + ctx ? ctx[i] : false); + if (IS_ERR(vqs[i])) { + rc = PTR_ERR(vqs[i]); + goto error_setup; + } + } + + list_for_each_entry(vq, &vdev->vqs, list) { + struct virtio_uml_vq_info *info = vq->priv; + + if (info->kick_fd >= 0) { + rc = vhost_user_set_vring_kick(vu_dev, vq->index, + info->kick_fd); + if (rc) + goto error_setup; + } + + rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); + if (rc) + goto error_setup; + } + + return 0; + +error_setup: + vu_del_vqs(vdev); + return rc; +} + +static u64 vu_get_features(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->features; +} + +static int vu_finalize_features(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; + + vring_transport_features(vdev); + vu_dev->features = vdev->features | supported; + + return vhost_user_set_features(vu_dev, vu_dev->features); +} + +static const char *vu_bus_name(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->pdev->name; +} + +static const struct virtio_config_ops virtio_uml_config_ops = { + .get = vu_get, + .set = vu_set, + .get_status = vu_get_status, + .set_status = vu_set_status, + .reset = vu_reset, + .find_vqs = vu_find_vqs, + .del_vqs = vu_del_vqs, + .get_features = vu_get_features, + .finalize_features = vu_finalize_features, + .bus_name = vu_bus_name, +}; + +static void virtio_uml_release_dev(struct device *d) +{ + struct virtio_device *vdev = + container_of(d, struct virtio_device, dev); + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + time_travel_propagate_time(); + + /* might not have been opened due to not negotiating the feature */ + if (vu_dev->req_fd >= 0) { + um_free_irq(vu_dev->irq, vu_dev); + os_close_file(vu_dev->req_fd); + } + + os_close_file(vu_dev->sock); + kfree(vu_dev); +} + +void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, + bool no_vq_suspend) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + if (WARN_ON(vdev->config != &virtio_uml_config_ops)) + return; + + vu_dev->no_vq_suspend = no_vq_suspend; + dev_info(&vdev->dev, "%sabled VQ suspend\n", + no_vq_suspend ? "dis" : "en"); +} + +static void vu_of_conn_broken(struct work_struct *wk) +{ + struct virtio_uml_platform_data *pdata; + struct virtio_uml_device *vu_dev; + + pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); + + vu_dev = platform_get_drvdata(pdata->pdev); + + virtio_break_device(&vu_dev->vdev); + + /* + * We can't remove the device from the devicetree so the only thing we + * can do is warn. + */ + WARN_ON(1); +} + +/* Platform device */ + +static struct virtio_uml_platform_data * +virtio_uml_create_pdata(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct virtio_uml_platform_data *pdata; + int ret; + + if (!np) + return ERR_PTR(-EINVAL); + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken); + pdata->pdev = pdev; + + ret = of_property_read_string(np, "socket-path", &pdata->socket_path); + if (ret) + return ERR_PTR(ret); + + ret = of_property_read_u32(np, "virtio-device-id", + &pdata->virtio_device_id); + if (ret) + return ERR_PTR(ret); + + return pdata; +} + +static int virtio_uml_probe(struct platform_device *pdev) +{ + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + struct virtio_uml_device *vu_dev; + int rc; + + if (!pdata) { + pdata = virtio_uml_create_pdata(pdev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + } + + vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); + if (!vu_dev) + return -ENOMEM; + + vu_dev->pdata = pdata; + vu_dev->vdev.dev.parent = &pdev->dev; + vu_dev->vdev.dev.release = virtio_uml_release_dev; + vu_dev->vdev.config = &virtio_uml_config_ops; + vu_dev->vdev.id.device = pdata->virtio_device_id; + vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; + vu_dev->pdev = pdev; + vu_dev->req_fd = -1; + + time_travel_propagate_time(); + + do { + rc = os_connect_socket(pdata->socket_path); + } while (rc == -EINTR); + if (rc < 0) + goto error_free; + vu_dev->sock = rc; + + spin_lock_init(&vu_dev->sock_lock); + + rc = vhost_user_init(vu_dev); + if (rc) + goto error_init; + + platform_set_drvdata(pdev, vu_dev); + + device_set_wakeup_capable(&vu_dev->vdev.dev, true); + + rc = register_virtio_device(&vu_dev->vdev); + if (rc) + put_device(&vu_dev->vdev.dev); + vu_dev->registered = 1; + return rc; + +error_init: + os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); + return rc; +} + +static int virtio_uml_remove(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + unregister_virtio_device(&vu_dev->vdev); + return 0; +} + +/* Command line device list */ + +static void vu_cmdline_release_dev(struct device *d) +{ +} + +static struct device vu_cmdline_parent = { + .init_name = "virtio-uml-cmdline", + .release = vu_cmdline_release_dev, +}; + +static bool vu_cmdline_parent_registered; +static int vu_cmdline_id; + +static int vu_unregister_cmdline_device(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + + kfree(pdata->socket_path); + platform_device_unregister(pdev); + return 0; +} + +static void vu_conn_broken(struct work_struct *wk) +{ + struct virtio_uml_platform_data *pdata; + struct virtio_uml_device *vu_dev; + + pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); + + vu_dev = platform_get_drvdata(pdata->pdev); + + virtio_break_device(&vu_dev->vdev); + + vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); +} + +static int vu_cmdline_set(const char *device, const struct kernel_param *kp) +{ + const char *ids = strchr(device, ':'); + unsigned int virtio_device_id; + int processed, consumed, err; + char *socket_path; + struct virtio_uml_platform_data pdata, *ppdata; + struct platform_device *pdev; + + if (!ids || ids == device) + return -EINVAL; + + processed = sscanf(ids, ":%u%n:%d%n", + &virtio_device_id, &consumed, + &vu_cmdline_id, &consumed); + + if (processed < 1 || ids[consumed]) + return -EINVAL; + + if (!vu_cmdline_parent_registered) { + err = device_register(&vu_cmdline_parent); + if (err) { + pr_err("Failed to register parent device!\n"); + put_device(&vu_cmdline_parent); + return err; + } + vu_cmdline_parent_registered = true; + } + + socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); + if (!socket_path) + return -ENOMEM; + + pdata.virtio_device_id = (u32) virtio_device_id; + pdata.socket_path = socket_path; + + pr_info("Registering device virtio-uml.%d id=%d at %s\n", + vu_cmdline_id, virtio_device_id, socket_path); + + pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", + vu_cmdline_id++, &pdata, + sizeof(pdata)); + err = PTR_ERR_OR_ZERO(pdev); + if (err) + goto free; + + ppdata = pdev->dev.platform_data; + ppdata->pdev = pdev; + INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); + + return 0; + +free: + kfree(socket_path); + return err; +} + +static int vu_cmdline_get_device(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + char *buffer = data; + unsigned int len = strlen(buffer); + + snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", + pdata->socket_path, pdata->virtio_device_id, pdev->id); + return 0; +} + +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) +{ + buffer[0] = '\0'; + if (vu_cmdline_parent_registered) + device_for_each_child(&vu_cmdline_parent, buffer, + vu_cmdline_get_device); + return strlen(buffer) + 1; +} + +static const struct kernel_param_ops vu_cmdline_param_ops = { + .set = vu_cmdline_set, + .get = vu_cmdline_get, +}; + +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); +__uml_help(vu_cmdline_param_ops, +"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n" +" Configure a virtio device over a vhost-user socket.\n" +" See virtio_ids.h for a list of possible virtio device id values.\n" +" Optionally use a specific platform_device id.\n\n" +); + + +static void vu_unregister_cmdline_devices(void) +{ + if (vu_cmdline_parent_registered) { + device_for_each_child(&vu_cmdline_parent, NULL, + vu_unregister_cmdline_device); + device_unregister(&vu_cmdline_parent); + vu_cmdline_parent_registered = false; + } +} + +/* Platform driver */ + +static const struct of_device_id virtio_uml_match[] = { + { .compatible = "virtio,uml", }, + { } +}; +MODULE_DEVICE_TABLE(of, virtio_uml_match); + +static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = true; + vhost_user_set_vring_enable(vu_dev, vq->index, false); + } + } + + if (!device_may_wakeup(&vu_dev->vdev.dev)) { + vu_dev->suspended = true; + return 0; + } + + return irq_set_irq_wake(vu_dev->irq, 1); +} + +static int virtio_uml_resume(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = false; + vhost_user_set_vring_enable(vu_dev, vq->index, true); + } + } + + vu_dev->suspended = false; + + if (!device_may_wakeup(&vu_dev->vdev.dev)) + return 0; + + return irq_set_irq_wake(vu_dev->irq, 0); +} + +static struct platform_driver virtio_uml_driver = { + .probe = virtio_uml_probe, + .remove = virtio_uml_remove, + .driver = { + .name = "virtio-uml", + .of_match_table = virtio_uml_match, + }, + .suspend = virtio_uml_suspend, + .resume = virtio_uml_resume, +}; + +static int __init virtio_uml_init(void) +{ + return platform_driver_register(&virtio_uml_driver); +} + +static void __exit virtio_uml_exit(void) +{ + platform_driver_unregister(&virtio_uml_driver); + vu_unregister_cmdline_devices(); +} + +module_init(virtio_uml_init); +module_exit(virtio_uml_exit); +__uml_exitcall(virtio_uml_exit); + +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); +MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c new file mode 100644 index 0000000000..6918de5e29 --- /dev/null +++ b/arch/um/drivers/xterm.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <termios.h> +#include "chan_user.h" +#include <os.h> +#include <um_malloc.h> +#include "xterm.h" + +struct xterm_chan { + int pid; + int helper_pid; + int chan_fd; + char *title; + int device; + int raw; + struct termios tt; +}; + +static void *xterm_init(char *str, int device, const struct chan_opts *opts) +{ + struct xterm_chan *data; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + *data = ((struct xterm_chan) { .pid = -1, + .helper_pid = -1, + .chan_fd = -1, + .device = device, + .title = opts->xterm_title, + .raw = opts->raw } ); + return data; +} + +/* Only changed by xterm_setup, which is a setup */ +static char *terminal_emulator = CONFIG_XTERM_CHAN_DEFAULT_EMULATOR; +static char *title_switch = "-T"; +static char *exec_switch = "-e"; + +static int __init xterm_setup(char *line, int *add) +{ + *add = 0; + terminal_emulator = line; + + line = strchr(line, ','); + if (line == NULL) + return 0; + + *line++ = '\0'; + if (*line) + title_switch = line; + + line = strchr(line, ','); + if (line == NULL) + return 0; + + *line++ = '\0'; + if (*line) + exec_switch = line; + + return 0; +} + +__uml_setup("xterm=", xterm_setup, +"xterm=<terminal emulator>,<title switch>,<exec switch>\n" +" Specifies an alternate terminal emulator to use for the debugger,\n" +" consoles, and serial lines when they are attached to the xterm channel.\n" +" The values are the terminal emulator binary, the switch it uses to set\n" +" its title, and the switch it uses to execute a subprocess,\n" +" respectively. The title switch must have the form '<switch> title',\n" +" not '<switch>=title'. Similarly, the exec switch must have the form\n" +" '<switch> command arg1 arg2 ...'.\n" +" The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR + ",-T,-e'.\n" +" Values for gnome-terminal are 'xterm=gnome-terminal,-t,-x'.\n\n" +); + +static int xterm_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct xterm_chan *data = d; + int pid, fd, new, err; + char title[256], file[] = "/tmp/xterm-pipeXXXXXX"; + char *argv[] = { terminal_emulator, title_switch, title, exec_switch, + OS_LIB_PATH "/uml/port-helper", "-uml-socket", + file, NULL }; + + if (access(argv[4], X_OK) < 0) + argv[4] = "port-helper"; + + /* + * Check that DISPLAY is set, this doesn't guarantee the xterm + * will work but w/o it we can be pretty sure it won't. + */ + if (getenv("DISPLAY") == NULL) { + printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n"); + return -ENODEV; + } + + /* + * This business of getting a descriptor to a temp file, + * deleting the file and closing the descriptor is just to get + * a known-unused name for the Unix socket that we really + * want. + */ + fd = mkstemp(file); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "xterm_open : mkstemp failed, errno = %d\n", + errno); + return err; + } + + if (unlink(file)) { + err = -errno; + printk(UM_KERN_ERR "xterm_open : unlink failed, errno = %d\n", + errno); + close(fd); + return err; + } + close(fd); + + fd = os_create_unix_socket(file, sizeof(file), 1); + if (fd < 0) { + printk(UM_KERN_ERR "xterm_open : create_unix_socket failed, " + "errno = %d\n", -fd); + return fd; + } + + sprintf(title, data->title, data->device); + pid = run_helper(NULL, NULL, argv); + if (pid < 0) { + err = pid; + printk(UM_KERN_ERR "xterm_open : run_helper failed, " + "errno = %d\n", -err); + goto out_close1; + } + + err = os_set_fd_block(fd, 0); + if (err < 0) { + printk(UM_KERN_ERR "xterm_open : failed to set descriptor " + "non-blocking, err = %d\n", -err); + goto out_kill; + } + + data->chan_fd = fd; + new = xterm_fd(fd, &data->helper_pid); + if (new < 0) { + err = new; + printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n", + -err); + goto out_kill; + } + + err = os_set_fd_block(new, 0); + if (err) { + printk(UM_KERN_ERR "xterm_open : failed to set xterm " + "descriptor non-blocking, err = %d\n", -err); + goto out_close2; + } + + CATCH_EINTR(err = tcgetattr(new, &data->tt)); + if (err) { + new = err; + goto out_close2; + } + + if (data->raw) { + err = raw(new); + if (err) { + new = err; + goto out_close2; + } + } + + unlink(file); + data->pid = pid; + *dev_out = NULL; + + return new; + + out_close2: + close(new); + out_kill: + os_kill_process(pid, 1); + out_close1: + close(fd); + + return err; +} + +static void xterm_close(int fd, void *d) +{ + struct xterm_chan *data = d; + + if (data->pid != -1) + os_kill_process(data->pid, 1); + data->pid = -1; + + if (data->helper_pid != -1) + os_kill_process(data->helper_pid, 0); + data->helper_pid = -1; + + if (data->chan_fd != -1) + os_close_file(data->chan_fd); + os_close_file(fd); +} + +const struct chan_ops xterm_ops = { + .type = "xterm", + .init = xterm_init, + .open = xterm_open, + .close = xterm_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 1, +}; diff --git a/arch/um/drivers/xterm.h b/arch/um/drivers/xterm.h new file mode 100644 index 0000000000..5968da3a6a --- /dev/null +++ b/arch/um/drivers/xterm.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __XTERM_H__ +#define __XTERM_H__ + +extern int xterm_fd(int socket, int *pid_out); + +#endif + diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c new file mode 100644 index 0000000000..8011e51993 --- /dev/null +++ b/arch/um/drivers/xterm_kern.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/slab.h> +#include <linux/completion.h> +#include <linux/irqreturn.h> +#include <asm/irq.h> +#include <irq_kern.h> +#include <os.h> +#include "xterm.h" + +struct xterm_wait { + struct completion ready; + int fd; + int pid; + int new_fd; +}; + +static irqreturn_t xterm_interrupt(int irq, void *data) +{ + struct xterm_wait *xterm = data; + int fd; + + fd = os_rcv_fd(xterm->fd, &xterm->pid); + if (fd == -EAGAIN) + return IRQ_NONE; + + xterm->new_fd = fd; + complete(&xterm->ready); + + return IRQ_HANDLED; +} + +int xterm_fd(int socket, int *pid_out) +{ + struct xterm_wait *data; + int err, ret; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) { + printk(KERN_ERR "xterm_fd : failed to allocate xterm_wait\n"); + return -ENOMEM; + } + + /* This is a locked semaphore... */ + *data = ((struct xterm_wait) { .fd = socket, + .pid = -1, + .new_fd = -1 }); + init_completion(&data->ready); + + err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, + IRQF_SHARED, "xterm", data); + if (err < 0) { + printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " + "err = %d\n", err); + ret = err; + goto out; + } + + /* ... so here we wait for an xterm interrupt. + * + * XXX Note, if the xterm doesn't work for some reason (eg. DISPLAY + * isn't set) this will hang... */ + wait_for_completion(&data->ready); + + um_free_irq(XTERM_IRQ, data); + + ret = data->new_fd; + *pid_out = data->pid; + out: + kfree(data); + + return ret; +} diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild new file mode 100644 index 0000000000..b2d834a29f --- /dev/null +++ b/arch/um/include/asm/Kbuild @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +generic-y += bpf_perf_event.h +generic-y += bug.h +generic-y += compat.h +generic-y += current.h +generic-y += device.h +generic-y += dma-mapping.h +generic-y += emergency-restart.h +generic-y += exec.h +generic-y += extable.h +generic-y += fb.h +generic-y += ftrace.h +generic-y += hw_irq.h +generic-y += irq_regs.h +generic-y += irq_work.h +generic-y += kdebug.h +generic-y += mcs_spinlock.h +generic-y += mmiowb.h +generic-y += module.lds.h +generic-y += param.h +generic-y += parport.h +generic-y += percpu.h +generic-y += preempt.h +generic-y += softirq_stack.h +generic-y += switch_to.h +generic-y += topology.h +generic-y += trace_clock.h +generic-y += kprobes.h +generic-y += mm_hooks.h +generic-y += vga.h diff --git a/arch/um/include/asm/archrandom.h b/arch/um/include/asm/archrandom.h new file mode 100644 index 0000000000..24e16c979c --- /dev/null +++ b/arch/um/include/asm/archrandom.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_UM_ARCHRANDOM_H__ +#define __ASM_UM_ARCHRANDOM_H__ + +#include <linux/types.h> + +/* This is from <os.h>, but better not to #include that in a global header here. */ +ssize_t os_getrandom(void *buf, size_t len, unsigned int flags); + +static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) +{ + ssize_t ret; + + ret = os_getrandom(v, max_longs * sizeof(*v), 0); + if (ret < 0) + return 0; + return ret / sizeof(*v); +} + +static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) +{ + return 0; +} + +#endif diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h new file mode 100644 index 0000000000..5898a26daa --- /dev/null +++ b/arch/um/include/asm/asm-prototypes.h @@ -0,0 +1 @@ +#include <asm-generic/asm-prototypes.h> diff --git a/arch/um/include/asm/cache.h b/arch/um/include/asm/cache.h new file mode 100644 index 0000000000..5c15627348 --- /dev/null +++ b/arch/um/include/asm/cache.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_CACHE_H +#define __UM_CACHE_H + + +#if defined(CONFIG_UML_X86) && !defined(CONFIG_64BIT) +# define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#elif defined(CONFIG_UML_X86) /* 64-bit */ +# define L1_CACHE_SHIFT 6 /* Should be 7 on Intel */ +#else +/* XXX: this was taken from x86, now it's completely random. Luckily only + * affects SMP padding. */ +# define L1_CACHE_SHIFT 5 +#endif + +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif diff --git a/arch/um/include/asm/cacheflush.h b/arch/um/include/asm/cacheflush.h new file mode 100644 index 0000000000..4c9858cd36 --- /dev/null +++ b/arch/um/include/asm/cacheflush.h @@ -0,0 +1,9 @@ +#ifndef __UM_ASM_CACHEFLUSH_H +#define __UM_ASM_CACHEFLUSH_H + +#include <asm/tlbflush.h> +#define flush_cache_vmap flush_tlb_kernel_range +#define flush_cache_vunmap flush_tlb_kernel_range + +#include <asm-generic/cacheflush.h> +#endif /* __UM_ASM_CACHEFLUSH_H */ diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S new file mode 100644 index 0000000000..fd481ac371 --- /dev/null +++ b/arch/um/include/asm/common.lds.S @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm-generic/vmlinux.lds.h> + + .fini : { *(.fini) } =0x9090 + _etext = .; + PROVIDE (etext = .); + + . = ALIGN(4096); + _sdata = .; + PROVIDE (sdata = .); + + RO_DATA(4096) + + .unprotected : { *(.unprotected) } + . = ALIGN(4096); + PROVIDE (_unprotected_end = .); + + . = ALIGN(4096); + EXCEPTION_TABLE(0) + + BUG_TABLE + + .uml.setup.init : { + __uml_setup_start = .; + *(.uml.setup.init) + __uml_setup_end = .; + } + + .uml.help.init : { + __uml_help_start = .; + *(.uml.help.init) + __uml_help_end = .; + } + + .uml.postsetup.init : { + __uml_postsetup_start = .; + *(.uml.postsetup.init) + __uml_postsetup_end = .; + } + + .init.setup : { + INIT_SETUP(0) + } + + PERCPU_SECTION(32) + + .initcall.init : { + INIT_CALLS + } + + .con_initcall.init : { + CON_INITCALL + } + + .exitcall : { + __exitcall_begin = .; + *(.exitcall.exit) + __exitcall_end = .; + } + + .uml.exitcall : { + __uml_exitcall_begin = .; + *(.uml.exitcall.exit) + __uml_exitcall_end = .; + } + + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { *(.altinstr_replacement) } + /* .exit.text is discard at runtime, not link time, to deal with references + from .altinstructions and .eh_frame */ + .exit.text : { EXIT_TEXT } + .exit.data : { *(.exit.data) } + + .preinit_array : { + __preinit_array_start = .; + *(.preinit_array) + __preinit_array_end = .; + } + .init_array : { + __init_array_start = .; + *(.kasan_init) + *(.init_array.*) + *(.init_array) + __init_array_end = .; + } + .fini_array : { + __fini_array_start = .; + *(.fini_array) + __fini_array_end = .; + } + + . = ALIGN(4096); + .init.ramfs : { + INIT_RAM_FS + } + diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h new file mode 100644 index 0000000000..4b6d1b526b --- /dev/null +++ b/arch/um/include/asm/cpufeature.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UM_CPUFEATURE_H +#define _ASM_UM_CPUFEATURE_H + +#include <asm/processor.h> + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) + +#include <asm/asm.h> +#include <linux/bitops.h> + +extern const char * const x86_cap_flags[NCAPINTS*32]; +extern const char * const x86_power_flags[32]; +#define X86_CAP_FMT "%s" +#define x86_cap_flag(flag) x86_cap_flags[flag] + +/* + * In order to save room, we index into this array by doing + * X86_BUG_<name> - NCAPINTS*32. + */ +extern const char * const x86_bug_flags[NBUGINTS*32]; + +#define test_cpu_cap(c, bit) \ + test_bit(bit, (unsigned long *)((c)->x86_capability)) + +/* + * There are 32 bits/features in each mask word. The high bits + * (selected with (bit>>5) give us the word number and the low 5 + * bits give us the bit/feature number inside the word. + * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can + * see if it is set in the mask word. + */ +#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ + (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) + +#define cpu_has(c, bit) \ + test_cpu_cap(c, bit) + +#define this_cpu_has(bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, \ + (unsigned long __percpu *)&cpu_info.x86_capability)) + +/* + * This macro is for detection of features which need kernel + * infrastructure to be used. It may *not* directly test the CPU + * itself. Use the cpu_has() family if you want true runtime + * testing of CPU features, like in hypervisor code where you are + * supporting a possible guest feature where host support for it + * is not relevant. + */ +#define cpu_feature_enabled(bit) \ + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) + +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) + +#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) + +extern void setup_clear_cpu_cap(unsigned int bit); + +#define setup_force_cpu_cap(bit) do { \ + set_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, (unsigned long *)cpu_caps_set); \ +} while (0) + +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + +/* + * Static testing of CPU features. Used the same as boot_cpu_has(). It + * statically patches the target code for additional performance. Use + * static_cpu_has() only in fast paths, where every cycle counts. Which + * means that the boot_cpu_has() variant is already fast enough for the + * majority of cases and you should stick to using it as it is generally + * only two instructions: a RIP-relative MOV and a TEST. + */ +static __always_inline bool _static_cpu_has(u16 bit) +{ + asm_volatile_goto("1: jmp 6f\n" + "2:\n" + ".skip -(((5f-4f) - (2b-1b)) > 0) * " + "((5f-4f) - (2b-1b)),0x90\n" + "3:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 4f - .\n" /* repl offset */ + " .word %P[always]\n" /* always replace */ + " .byte 3b - 1b\n" /* src len */ + " .byte 5f - 4f\n" /* repl len */ + " .byte 3b - 2b\n" /* pad len */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "4: jmp %l[t_no]\n" + "5:\n" + ".previous\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 0\n" /* no replacement */ + " .word %P[feature]\n" /* feature bit */ + " .byte 3b - 1b\n" /* src len */ + " .byte 0\n" /* repl len */ + " .byte 0\n" /* pad len */ + ".previous\n" + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" + : : [feature] "i" (bit), + [always] "i" (X86_FEATURE_ALWAYS), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); +t_yes: + return true; +t_no: + return false; +} + +#define static_cpu_has(bit) \ +( \ + __builtin_constant_p(boot_cpu_has(bit)) ? \ + boot_cpu_has(bit) : \ + _static_cpu_has(bit) \ +) + +#define cpu_has_bug(c, bit) cpu_has(c, (bit)) +#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) + +#define static_cpu_has_bug(bit) static_cpu_has((bit)) +#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) +#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) + +#define MAX_CPU_FEATURES (NCAPINTS * 32) +#define cpu_have_feature boot_cpu_has + +#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" +#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ + boot_cpu_data.x86_model + +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#endif /* _ASM_UM_CPUFEATURE_H */ diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h new file mode 100644 index 0000000000..e79b2ab6f4 --- /dev/null +++ b/arch/um/include/asm/delay.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_DELAY_H +#define __UM_DELAY_H +#include <asm-generic/delay.h> +#include <linux/time-internal.h> + +static inline void um_ndelay(unsigned long nsecs) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { + time_travel_ndelay(nsecs); + return; + } + ndelay(nsecs); +} +#undef ndelay +#define ndelay(n) um_ndelay(n) + +static inline void um_udelay(unsigned long usecs) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { + time_travel_ndelay(1000 * usecs); + return; + } + udelay(usecs); +} +#undef udelay +#define udelay(n) um_udelay(n) +#endif /* __UM_DELAY_H */ diff --git a/arch/um/include/asm/dma.h b/arch/um/include/asm/dma.h new file mode 100644 index 0000000000..fdc53642c7 --- /dev/null +++ b/arch/um/include/asm/dma.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_DMA_H +#define __UM_DMA_H + +#include <asm/io.h> + +extern unsigned long uml_physmem; + +#define MAX_DMA_ADDRESS (uml_physmem) + +#endif diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h new file mode 100644 index 0000000000..2efac58271 --- /dev/null +++ b/arch/um/include/asm/fixmap.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_FIXMAP_H +#define __UM_FIXMAP_H + +#include <asm/processor.h> +#include <asm/archparam.h> +#include <asm/page.h> +#include <linux/threads.h> + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ +enum fixed_addresses { + __end_of_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ + +#define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#include <asm-generic/fixmap.h> + +#endif diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h new file mode 100644 index 0000000000..71bfd9ef39 --- /dev/null +++ b/arch/um/include/asm/fpu/api.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_UM_FPU_API_H +#define _ASM_UM_FPU_API_H + +/* Copyright (c) 2020 Cambridge Greys Ltd + * Copyright (c) 2020 Red Hat Inc. + * A set of "dummy" defines to allow the direct inclusion + * of x86 optimized copy, xor, etc routines into the + * UML code tree. */ + +#define kernel_fpu_begin() (void)0 +#define kernel_fpu_end() (void)0 + +static inline bool irq_fpu_usable(void) +{ + return true; +} + + +#endif diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h new file mode 100644 index 0000000000..780aa6bfc0 --- /dev/null +++ b/arch/um/include/asm/futex.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UM_FUTEX_H +#define _ASM_UM_FUTEX_H + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> + + +int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr); +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval); + +#endif diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h new file mode 100644 index 0000000000..52e2c36267 --- /dev/null +++ b/arch/um/include/asm/hardirq.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_UM_HARDIRQ_H +#define __ASM_UM_HARDIRQ_H + +#include <asm-generic/hardirq.h> + +#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 + +#endif /* __ASM_UM_HARDIRQ_H */ diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h new file mode 100644 index 0000000000..9ea42cc746 --- /dev/null +++ b/arch/um/include/asm/io.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UM_IO_H +#define _ASM_UM_IO_H +#include <linux/types.h> + +/* get emulated iomem (if desired) */ +#include <asm-generic/logic_io.h> + +#ifndef ioremap +#define ioremap ioremap +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return NULL; +} +#endif /* ioremap */ + +#ifndef iounmap +#define iounmap iounmap +static inline void iounmap(void __iomem *addr) +{ +} +#endif /* iounmap */ + +#include <asm-generic/io.h> + +#endif diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h new file mode 100644 index 0000000000..749dfe8512 --- /dev/null +++ b/arch/um/include/asm/irq.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_IRQ_H +#define __UM_IRQ_H + +#define TIMER_IRQ 0 +#define UMN_IRQ 1 +#define UBD_IRQ 2 +#define UM_ETH_IRQ 3 +#define ACCEPT_IRQ 4 +#define MCONSOLE_IRQ 5 +#define WINCH_IRQ 6 +#define SIGIO_WRITE_IRQ 7 +#define TELNETD_IRQ 8 +#define XTERM_IRQ 9 +#define RANDOM_IRQ 10 + +#ifdef CONFIG_UML_NET_VECTOR + +#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1) +#define VECTOR_IRQ_SPACE 8 + +#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ) + +#else + +#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1) + +#endif + +#define UM_LAST_SIGNAL_IRQ 64 +/* If we have (simulated) PCI MSI, allow 64 more interrupt numbers for it */ +#ifdef CONFIG_PCI_MSI +#define NR_IRQS (UM_LAST_SIGNAL_IRQ + 64) +#else +#define NR_IRQS UM_LAST_SIGNAL_IRQ +#endif /* CONFIG_PCI_MSI */ + +#include <asm-generic/irq.h> +#endif diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h new file mode 100644 index 0000000000..1e69ef5bc3 --- /dev/null +++ b/arch/um/include/asm/irqflags.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_IRQFLAGS_H +#define __UM_IRQFLAGS_H + +extern int signals_enabled; +int um_set_signals(int enable); +void block_signals(void); +void unblock_signals(void); + +#define arch_local_save_flags arch_local_save_flags +static inline unsigned long arch_local_save_flags(void) +{ + return signals_enabled; +} + +#define arch_local_irq_restore arch_local_irq_restore +static inline void arch_local_irq_restore(unsigned long flags) +{ + um_set_signals(flags); +} + +#define arch_local_irq_enable arch_local_irq_enable +static inline void arch_local_irq_enable(void) +{ + unblock_signals(); +} + +#define arch_local_irq_disable arch_local_irq_disable +static inline void arch_local_irq_disable(void) +{ + block_signals(); +} + +#define ARCH_IRQ_DISABLED 0 + +#include <asm-generic/irqflags.h> + +#endif diff --git a/arch/um/include/asm/kasan.h b/arch/um/include/asm/kasan.h new file mode 100644 index 0000000000..0d6547f4ec --- /dev/null +++ b/arch/um/include/asm/kasan.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_UM_KASAN_H +#define __ASM_UM_KASAN_H + +#include <linux/init.h> +#include <linux/const.h> + +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +/* used in kasan_mem_to_shadow to divide by 8 */ +#define KASAN_SHADOW_SCALE_SHIFT 3 + +#ifdef CONFIG_X86_64 +#define KASAN_HOST_USER_SPACE_END_ADDR 0x00007fffffffffffUL +/* KASAN_SHADOW_SIZE is the size of total address space divided by 8 */ +#define KASAN_SHADOW_SIZE ((KASAN_HOST_USER_SPACE_END_ADDR + 1) >> \ + KASAN_SHADOW_SCALE_SHIFT) +#else +#error "KASAN_SHADOW_SIZE is not defined for this sub-architecture" +#endif /* CONFIG_X86_64 */ + +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) + +#ifdef CONFIG_KASAN +void kasan_init(void); +void kasan_map_memory(void *start, unsigned long len); +extern int kasan_um_is_ready; + +#ifdef CONFIG_STATIC_LINK +#define kasan_arch_is_ready() (kasan_um_is_ready) +#endif +#else +static inline void kasan_init(void) { } +#endif /* CONFIG_KASAN */ + +#endif /* __ASM_UM_KASAN_H */ diff --git a/arch/um/include/asm/kvm_para.h b/arch/um/include/asm/kvm_para.h new file mode 100644 index 0000000000..14fab8f0b9 --- /dev/null +++ b/arch/um/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include <asm-generic/kvm_para.h> diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h new file mode 100644 index 0000000000..5b072aba5b --- /dev/null +++ b/arch/um/include/asm/mmu.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __ARCH_UM_MMU_H +#define __ARCH_UM_MMU_H + +#include <mm_id.h> +#include <asm/mm_context.h> + +typedef struct mm_context { + struct mm_id id; + struct uml_arch_mm_context arch; + struct page *stub_pages[2]; +} mm_context_t; + +extern void __switch_mm(struct mm_id * mm_idp); + +/* Avoid tangled inclusion with asm/ldt.h */ +extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm); +extern void free_ldt(struct mm_context *mm); + +#endif diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h new file mode 100644 index 0000000000..68e2eb9cfb --- /dev/null +++ b/arch/um/include/asm/mmu_context.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_MMU_CONTEXT_H +#define __UM_MMU_CONTEXT_H + +#include <linux/sched.h> +#include <linux/mm_types.h> +#include <linux/mmap_lock.h> + +#include <asm/mm_hooks.h> +#include <asm/mmu.h> + +extern void force_flush_all(void); + +#define activate_mm activate_mm +static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) +{ + /* + * This is called by fs/exec.c and sys_unshare() + * when the new ->mm is used for the first time. + */ + __switch_mm(&new->context.id); +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if(prev != next){ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + if(next != &init_mm) + __switch_mm(&next->context.id); + } +} + +#define init_new_context init_new_context +extern int init_new_context(struct task_struct *task, struct mm_struct *mm); + +#define destroy_context destroy_context +extern void destroy_context(struct mm_struct *mm); + +#include <asm-generic/mmu_context.h> + +#endif diff --git a/arch/um/include/asm/msi.h b/arch/um/include/asm/msi.h new file mode 100644 index 0000000000..c8c6c381f3 --- /dev/null +++ b/arch/um/include/asm/msi.h @@ -0,0 +1 @@ +#include <asm-generic/msi.h> diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h new file mode 100644 index 0000000000..84866127d0 --- /dev/null +++ b/arch/um/include/asm/page.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright 2003 PathScale, Inc. + */ + +#ifndef __UM_PAGE_H +#define __UM_PAGE_H + +#include <linux/const.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifndef __ASSEMBLY__ + +struct page; + +#include <linux/pfn.h> +#include <linux/types.h> +#include <asm/vm-flags.h> + +/* + * These are used to make use of C type-checking.. + */ + +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) + +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +#define pte_val(p) ((p).pte) + +#define pte_get_bits(p, bits) ((p).pte & (bits)) +#define pte_set_bits(p, bits) ((p).pte |= (bits)) +#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) +#define pte_copy(to, from) ({ (to).pte = (from).pte; }) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_set_val(p, phys, prot) \ + ({ (p).pte = (phys) | pgprot_val(prot); }) + +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) } ) + +typedef unsigned long long phys_t; + +#else + +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pgd; } pgd_t; + +#ifdef CONFIG_3_LEVEL_PGTABLES +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) } ) +#endif + +#define pte_val(x) ((x).pte) + + +#define pte_get_bits(p, bits) ((p).pte & (bits)) +#define pte_set_bits(p, bits) ((p).pte |= (bits)) +#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) +#define pte_copy(to, from) ((to).pte = (from).pte) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot)) + +typedef unsigned long phys_t; + +#endif + +typedef struct { unsigned long pgprot; } pgprot_t; + +typedef struct page *pgtable_t; + +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +extern unsigned long uml_physmem; + +#define PAGE_OFFSET (uml_physmem) +#define KERNELBASE PAGE_OFFSET + +#define __va_space (8*1024*1024) + +#include <mem.h> + +/* Cast to unsigned long before casting to void * to avoid a warning from + * mmap_kmem about cutting a long long down to a void *. Not sure that + * casting is the right thing, but 32-bit UML can't have 64-bit virtual + * addresses + */ +#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt)) +#define __va(phys) uml_to_virt((unsigned long) (phys)) + +#define phys_to_pfn(p) ((p) >> PAGE_SHIFT) +#define pfn_to_phys(pfn) PFN_PHYS(pfn) + +#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) + +#include <asm-generic/memory_model.h> +#include <asm-generic/getorder.h> + +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_32 +#define __HAVE_ARCH_GATE_AREA 1 +#endif + +#endif /* __UM_PAGE_H */ diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h new file mode 100644 index 0000000000..238d2e7faf --- /dev/null +++ b/arch/um/include/asm/pci.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_UM_PCI_H +#define __ASM_UM_PCI_H +#include <linux/types.h> +#include <asm/io.h> + +/* Generic PCI */ +#include <asm-generic/pci.h> + +#ifdef CONFIG_PCI_MSI +/* + * This is a bit of an annoying hack, and it assumes we only have + * the virt-pci (if anything). Which is true, but still. + */ +void *pci_root_bus_fwnode(struct pci_bus *bus); +#define pci_root_bus_fwnode pci_root_bus_fwnode +#endif + +#endif /* __ASM_UM_PCI_H */ diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h new file mode 100644 index 0000000000..de5e31c647 --- /dev/null +++ b/arch/um/include/asm/pgalloc.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright 2003 PathScale, Inc. + * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h + */ + +#ifndef __UM_PGALLOC_H +#define __UM_PGALLOC_H + +#include <linux/mm.h> + +#include <asm-generic/pgalloc.h> + +#define pmd_populate_kernel(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte))) + +#define pmd_populate(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + \ + ((unsigned long long)page_to_pfn(pte) << \ + (unsigned long long) PAGE_SHIFT))) + +/* + * Allocate and free page tables. + */ +extern pgd_t *pgd_alloc(struct mm_struct *); + +#define __pte_free_tlb(tlb, pte, address) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ +} while (0) + +#ifdef CONFIG_3_LEVEL_PGTABLES + +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ +} while (0) + +#endif + +#endif + diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h new file mode 100644 index 0000000000..8256ecc5b9 --- /dev/null +++ b/arch/um/include/asm/pgtable-2level.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright 2003 PathScale, Inc. + * Derived from include/asm-i386/pgtable.h + */ + +#ifndef __UM_PGTABLE_2LEVEL_H +#define __UM_PGTABLE_2LEVEL_H + +#include <asm-generic/pgtable-nopmd.h> + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ + +#define PGDIR_SHIFT 22 +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * entries per page directory level: the i386 is two-level, so + * we don't really have any PMD directory physically. + */ +#define PTRS_PER_PTE 1024 +#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) +#define PTRS_PER_PGD 1024 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%08lx).\n", __FILE__, __LINE__, &(e), \ + pte_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \ + pgd_val(e)) + +static inline int pgd_newpage(pgd_t pgd) { return 0; } +static inline void pgd_mkuptodate(pgd_t pgd) { } + +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) + +#define pte_pfn(x) phys_to_pfn(pte_val(x)) +#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) + +#endif diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h new file mode 100644 index 0000000000..8a5032ec23 --- /dev/null +++ b/arch/um/include/asm/pgtable-3level.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2003 PathScale Inc + * Derived from include/asm-i386/pgtable.h + */ + +#ifndef __UM_PGTABLE_3LEVEL_H +#define __UM_PGTABLE_3LEVEL_H + +#include <asm-generic/pgtable-nopud.h> + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ + +#ifdef CONFIG_64BIT +#define PGDIR_SHIFT 30 +#else +#define PGDIR_SHIFT 31 +#endif +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* PMD_SHIFT determines the size of the area a second-level page table can + * map + */ + +#define PMD_SHIFT 21 +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* + * entries per page directory level + */ + +#define PTRS_PER_PTE 512 +#ifdef CONFIG_64BIT +#define PTRS_PER_PMD 512 +#define PTRS_PER_PGD 512 +#else +#define PTRS_PER_PMD 1024 +#define PTRS_PER_PGD 1024 +#endif + +#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pgd_val(e)) + +#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE)) +#define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pud_present(x) (pud_val(x) & _PAGE_PRESENT) +#define pud_populate(mm, pud, pmd) \ + set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) + +#define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) + +static inline int pgd_newpage(pgd_t pgd) +{ + return(pgd_val(pgd) & _PAGE_NEWPAGE); +} + +static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } + +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) + +static inline void pud_clear (pud_t *pud) +{ + set_pud(pud, __pud(_PAGE_NEWPAGE)); +} + +#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) +#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK)) + +static inline unsigned long pte_pfn(pte_t pte) +{ + return phys_to_pfn(pte_val(pte)); +} + +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + pte_t pte; + phys_t phys = pfn_to_phys(page_nr); + + pte_set_val(pte, phys, pgprot); + return pte; +} + +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); +} + +#endif + diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h new file mode 100644 index 0000000000..e1ece21dbe --- /dev/null +++ b/arch/um/include/asm/pgtable.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright 2003 PathScale, Inc. + * Derived from include/asm-i386/pgtable.h + */ + +#ifndef __UM_PGTABLE_H +#define __UM_PGTABLE_H + +#include <asm/fixmap.h> + +#define _PAGE_PRESENT 0x001 +#define _PAGE_NEWPAGE 0x002 +#define _PAGE_NEWPROT 0x004 +#define _PAGE_RW 0x020 +#define _PAGE_USER 0x040 +#define _PAGE_ACCESSED 0x080 +#define _PAGE_DIRTY 0x100 +/* If _PAGE_PRESENT is clear, we use these: */ +#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE; + pte_present gives true */ + +/* We borrow bit 10 to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE 0x400 + +#ifdef CONFIG_3_LEVEL_PGTABLES +#include <asm/pgtable-3level.h> +#else +#include <asm/pgtable-2level.h> +#endif + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* zero page used for uninitialized stuff */ +extern unsigned long *empty_zero_page; + +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ + +extern unsigned long end_iomem; + +#define VMALLOC_OFFSET (__va_space) +#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) +#define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) + +/* + * The i386 can't do page protection for execute, and considers that the same + * are read. + * Also, write permissions imply read permissions. This is the closest we can + * get.. + */ + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) + +#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) + +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) + +#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) + +#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) +#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) + +#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE) +#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE) + +#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) + +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pte_present(x) pte_get_bits(x, (_PAGE_PRESENT | _PAGE_PROTNONE)) + +/* + * ================================= + * Flags checking section. + * ================================= + */ + +static inline int pte_none(pte_t pte) +{ + return pte_is_zero(pte); +} + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) +{ + return((pte_get_bits(pte, _PAGE_USER)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_exec(pte_t pte){ + return((pte_get_bits(pte, _PAGE_USER)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_write(pte_t pte) +{ + return((pte_get_bits(pte, _PAGE_RW)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_dirty(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_DIRTY); +} + +static inline int pte_young(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_ACCESSED); +} + +static inline int pte_newpage(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_NEWPAGE); +} + +static inline int pte_newprot(pte_t pte) +{ + return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); +} + +/* + * ================================= + * Flags setting section. + * ================================= + */ + +static inline pte_t pte_mknewprot(pte_t pte) +{ + pte_set_bits(pte, _PAGE_NEWPROT); + return(pte); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_DIRTY); + return(pte); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_ACCESSED); + return(pte); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + if (likely(pte_get_bits(pte, _PAGE_RW))) + pte_clear_bits(pte, _PAGE_RW); + else + return pte; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkread(pte_t pte) +{ + if (unlikely(pte_get_bits(pte, _PAGE_USER))) + return pte; + pte_set_bits(pte, _PAGE_USER); + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_set_bits(pte, _PAGE_DIRTY); + return(pte); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_set_bits(pte, _PAGE_ACCESSED); + return(pte); +} + +static inline pte_t pte_mkwrite_novma(pte_t pte) +{ + if (unlikely(pte_get_bits(pte, _PAGE_RW))) + return pte; + pte_set_bits(pte, _PAGE_RW); + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkuptodate(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_NEWPAGE); + if(pte_present(pte)) + pte_clear_bits(pte, _PAGE_NEWPROT); + return(pte); +} + +static inline pte_t pte_mknewpage(pte_t pte) +{ + pte_set_bits(pte, _PAGE_NEWPAGE); + return(pte); +} + +static inline void set_pte(pte_t *pteptr, pte_t pteval) +{ + pte_copy(*pteptr, pteval); + + /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so + * fix_range knows to unmap it. _PAGE_NEWPROT is specific to + * mapped pages. + */ + + *pteptr = pte_mknewpage(*pteptr); + if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); +} + +#define PFN_PTE_SHIFT PAGE_SHIFT + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) +#define __virt_to_page(virt) phys_to_page(__pa(virt)) +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) + +#define mk_pte(page, pgprot) \ + ({ pte_t pte; \ + \ + pte_set_val(pte, page_to_phys(page), (pgprot)); \ + if (pte_present(pte)) \ + pte_mknewprot(pte_mknewpage(pte)); \ + pte;}) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot); + return pte; +} + +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +struct mm_struct; +extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); + +#define update_mmu_cache(vma,address,ptep) do {} while (0) +#define update_mmu_cache_range(vmf, vma, address, ptep, nr) do {} while (0) + +/* + * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that + * are !pte_none() && !pte_present(). + * + * Format of swap PTEs: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * <--------------- offset ----------------> E < type -> 0 0 0 1 0 + * + * E is the exclusive marker that is not stored in swap entries. + * _PAGE_NEWPAGE (bit 1) is always set to 1 in set_pte(). + */ +#define __swp_type(x) (((x).val >> 5) & 0x1f) +#define __swp_offset(x) ((x).val >> 11) + +#define __swp_entry(type, offset) \ + ((swp_entry_t) { (((type) & 0x1f) << 5) | ((offset) << 11) }) +#define __pte_to_swp_entry(pte) \ + ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE); +} + +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + pte_set_bits(pte, _PAGE_SWP_EXCLUSIVE); + return pte; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_SWP_EXCLUSIVE); + return pte; +} + +/* Clear a kernel PTE and flush it from the TLB */ +#define kpte_clear_flush(ptep, vaddr) \ +do { \ + pte_clear(&init_mm, (vaddr), (ptep)); \ + __flush_tlb_one((vaddr)); \ +} while (0) + +#endif diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h new file mode 100644 index 0000000000..7414154b8e --- /dev/null +++ b/arch/um/include/asm/processor-generic.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_PROCESSOR_GENERIC_H +#define __UM_PROCESSOR_GENERIC_H + +struct pt_regs; + +struct task_struct; + +#include <asm/ptrace.h> +#include <sysdep/archsetjmp.h> + +#include <linux/prefetch.h> + +#include <asm/cpufeatures.h> + +struct mm_struct; + +struct thread_struct { + struct pt_regs regs; + struct pt_regs *segv_regs; + int singlestep_syscall; + void *fault_addr; + jmp_buf *fault_catcher; + struct task_struct *prev_sched; + struct arch_thread arch; + jmp_buf switch_buf; + struct { + int op; + union { + struct { + int pid; + } fork, exec; + struct { + int (*proc)(void *); + void *arg; + } thread; + struct { + void (*proc)(void *); + void *arg; + } cb; + } u; + } request; +}; + +#define INIT_THREAD \ +{ \ + .regs = EMPTY_REGS, \ + .fault_addr = NULL, \ + .prev_sched = NULL, \ + .arch = INIT_ARCH_THREAD, \ + .request = { 0 } \ +} + +/* + * User space process size: 3GB (default). + */ +extern unsigned long task_size; + +#define TASK_SIZE (task_size) + +#undef STACK_TOP +#undef STACK_TOP_MAX + +extern unsigned long stacksizelim; + +#define STACK_ROOM (stacksizelim) +#define STACK_TOP (TASK_SIZE - 2 * PAGE_SIZE) +#define STACK_TOP_MAX STACK_TOP + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (0x40000000) + +extern void start_thread(struct pt_regs *regs, unsigned long entry, + unsigned long stack); + +struct cpuinfo_um { + unsigned long loops_per_jiffy; + int ipi_pipe[2]; + int cache_alignment; + union { + __u32 x86_capability[NCAPINTS + NBUGINTS]; + unsigned long x86_capability_alignment; + }; +}; + +extern struct cpuinfo_um boot_cpu_data; + +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data +#define cache_line_size() (boot_cpu_data.cache_alignment) + +extern unsigned long get_thread_reg(int reg, jmp_buf *buf); +#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) +extern unsigned long __get_wchan(struct task_struct *p); + +#endif diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h new file mode 100644 index 0000000000..adf91ef553 --- /dev/null +++ b/arch/um/include/asm/ptrace-generic.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_PTRACE_GENERIC_H +#define __UM_PTRACE_GENERIC_H + +#ifndef __ASSEMBLY__ + +#include <sysdep/ptrace.h> + +struct pt_regs { + struct uml_pt_regs regs; +}; + +#define arch_has_single_step() (1) + +#define EMPTY_REGS { .regs = EMPTY_UML_PT_REGS } + +#define PT_REGS_IP(r) UPT_IP(&(r)->regs) +#define PT_REGS_SP(r) UPT_SP(&(r)->regs) + +#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs) + +#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs) + +#define instruction_pointer(regs) PT_REGS_IP(regs) + +#define PTRACE_OLDSETOPTIONS 21 + +struct task_struct; + +extern long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data); +extern unsigned long getreg(struct task_struct *child, int regno); +extern int putreg(struct task_struct *child, int regno, unsigned long value); + +extern int arch_set_tls(struct task_struct *new, unsigned long tls); +extern void clear_flushed_tls(struct task_struct *task); +extern int syscall_trace_enter(struct pt_regs *regs); +extern void syscall_trace_leave(struct pt_regs *regs); + +#endif + +#endif diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h new file mode 100644 index 0000000000..a3c1fb6ed6 --- /dev/null +++ b/arch/um/include/asm/sections.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SECTIONS_H +#define __UM_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char __binary_start[]; +extern char __syscall_stub_start[], __syscall_stub_end[]; + +#endif diff --git a/arch/um/include/asm/setup.h b/arch/um/include/asm/setup.h new file mode 100644 index 0000000000..80ada899f2 --- /dev/null +++ b/arch/um/include/asm/setup.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef SETUP_H_INCLUDED +#define SETUP_H_INCLUDED + +/* POSIX mandated with _POSIX_ARG_MAX that we can rely on 4096 chars in the + * command line, so this choice is ok. + */ + +#define COMMAND_LINE_SIZE 4096 + +#endif /* SETUP_H_INCLUDED */ diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h new file mode 100644 index 0000000000..a8cc1d46dd --- /dev/null +++ b/arch/um/include/asm/smp.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SMP_H +#define __UM_SMP_H + +#define hard_smp_processor_id() 0 + +#endif diff --git a/arch/um/include/asm/stacktrace.h b/arch/um/include/asm/stacktrace.h new file mode 100644 index 0000000000..436b55952c --- /dev/null +++ b/arch/um/include/asm/stacktrace.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UML_STACKTRACE_H +#define _ASM_UML_STACKTRACE_H + +#include <linux/uaccess.h> +#include <linux/ptrace.h> + +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +struct stacktrace_ops { + void (*address)(void *data, unsigned long address, int reliable); +}; + +#ifdef CONFIG_FRAME_POINTER +static inline unsigned long +get_frame_pointer(struct task_struct *task, struct pt_regs *segv_regs) +{ + if (!task || task == current) + return segv_regs ? PT_REGS_BP(segv_regs) : current_bp(); + return KSTK_EBP(task); +} +#else +static inline unsigned long +get_frame_pointer(struct task_struct *task, struct pt_regs *segv_regs) +{ + return 0; +} +#endif + +static inline unsigned long +*get_stack_pointer(struct task_struct *task, struct pt_regs *segv_regs) +{ + if (!task || task == current) + return segv_regs ? (unsigned long *)PT_REGS_SP(segv_regs) : current_sp(); + return (unsigned long *)KSTK_ESP(task); +} + +void dump_trace(struct task_struct *tsk, const struct stacktrace_ops *ops, void *data); + +#endif /* _ASM_UML_STACKTRACE_H */ diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h new file mode 100644 index 0000000000..172b74143c --- /dev/null +++ b/arch/um/include/asm/syscall-generic.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Access to user system call parameters and results + * + * See asm-generic/syscall.h for function descriptions. + * + * Copyright (C) 2015 Mickaël Salaün <mic@digikod.net> + */ + +#ifndef __UM_SYSCALL_GENERIC_H +#define __UM_SYSCALL_GENERIC_H + +#include <asm/ptrace.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <sysdep/ptrace.h> + +static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) +{ + + return PT_REGS_SYSCALL_NR(regs); +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + const long error = regs_return_value(regs); + + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs_return_value(regs); +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + PT_REGS_SET_SYSCALL_RETURN(regs, (long) error ?: val); +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned long *args) +{ + const struct uml_pt_regs *r = ®s->regs; + + *args++ = UPT_SYSCALL_ARG1(r); + *args++ = UPT_SYSCALL_ARG2(r); + *args++ = UPT_SYSCALL_ARG3(r); + *args++ = UPT_SYSCALL_ARG4(r); + *args++ = UPT_SYSCALL_ARG5(r); + *args = UPT_SYSCALL_ARG6(r); +} + +/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ + +#endif /* __UM_SYSCALL_GENERIC_H */ diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h new file mode 100644 index 0000000000..8fc8c65cd3 --- /dev/null +++ b/arch/um/include/asm/sysrq.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SYSRQ_H +#define __UM_SYSRQ_H + +struct task_struct; +extern void show_trace(struct task_struct* task, unsigned long *stack); + +#endif diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h new file mode 100644 index 0000000000..c7b4b49826 --- /dev/null +++ b/arch/um/include/asm/thread_info.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_THREAD_INFO_H +#define __UM_THREAD_INFO_H + +#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER +#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) + +#ifndef __ASSEMBLY__ + +#include <asm/types.h> +#include <asm/page.h> +#include <asm/segment.h> +#include <sysdep/ptrace_user.h> + +struct thread_info { + struct task_struct *task; /* main task structure */ + unsigned long flags; /* low level flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + struct thread_info *real_thread; /* Points to non-IRQ stack */ + unsigned long aux_fp_regs[FP_SIZE]; /* auxiliary fp_regs to save/restore + them out-of-band */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .real_thread = NULL, \ +} + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + struct thread_info *ti; + unsigned long mask = THREAD_SIZE - 1; + void *p; + + asm volatile ("" : "=r" (p) : "0" (&ti)); + ti = (struct thread_info *) (((unsigned long)p) & ~mask); + return ti; +} + +#endif + +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ +#define TIF_RESTART_BLOCK 4 +#define TIF_MEMDIE 5 /* is terminating due to OOM killer */ +#define TIF_SYSCALL_AUDIT 6 +#define TIF_RESTORE_SIGMASK 7 +#define TIF_NOTIFY_RESUME 8 +#define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_MEMDIE (1 << TIF_MEMDIE) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) + +#endif diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h new file mode 100644 index 0000000000..9f27176adb --- /dev/null +++ b/arch/um/include/asm/timex.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_TIMEX_H +#define __UM_TIMEX_H + +#define CLOCK_TICK_RATE (HZ) + +#include <asm-generic/timex.h> + +#endif diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h new file mode 100644 index 0000000000..0422467bda --- /dev/null +++ b/arch/um/include/asm/tlb.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_TLB_H +#define __UM_TLB_H + +#include <linux/mm.h> + +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm-generic/tlb.h> + +#endif diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h new file mode 100644 index 0000000000..a5bda89039 --- /dev/null +++ b/arch/um/include/asm/tlbflush.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_TLBFLUSH_H +#define __UM_TLBFLUSH_H + +#include <linux/mm.h> + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_kernel_vm() flushes the kernel vm area + * - flush_tlb_range(vma, start, end) flushes a range of pages + */ + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); +extern void flush_tlb_kernel_vm(void); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void __flush_tlb_one(unsigned long addr); + +#endif diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h new file mode 100644 index 0000000000..7d9d60e41e --- /dev/null +++ b/arch/um/include/asm/uaccess.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2015 Richard Weinberger (richard@nod.at) + */ + +#ifndef __UM_UACCESS_H +#define __UM_UACCESS_H + +#include <asm/elf.h> +#include <asm/unaligned.h> + +#define __under_task_size(addr, size) \ + (((unsigned long) (addr) < TASK_SIZE) && \ + (((unsigned long) (addr) + (size)) < TASK_SIZE)) + +#define __access_ok_vsyscall(addr, size) \ + (((unsigned long) (addr) >= FIXADDR_USER_START) && \ + ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ + ((unsigned long) (addr) + (size) >= (unsigned long)(addr))) + +#define __addr_range_nowrap(addr, size) \ + ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) + +extern unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __clear_user(void __user *mem, unsigned long len); +static inline int __access_ok(const void __user *ptr, unsigned long size); + +/* Teach asm-generic/uaccess.h that we have C functions for these. */ +#define __access_ok __access_ok +#define __clear_user __clear_user + +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER + +#include <asm-generic/uaccess.h> + +static inline int __access_ok(const void __user *ptr, unsigned long size) +{ + unsigned long addr = (unsigned long)ptr; + return __addr_range_nowrap(addr, size) && + (__under_task_size(addr, size) || + __access_ok_vsyscall(addr, size)); +} + +/* no pagefaults for kernel addresses in um */ +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + *((type *)dst) = get_unaligned((type *)(src)); \ + if (0) /* make sure the label looks used to the compiler */ \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + put_unaligned(*((type *)src), (type *)(dst)); \ + if (0) /* make sure the label looks used to the compiler */ \ + goto err_label; \ +} while (0) + +#endif diff --git a/arch/um/include/asm/unwind.h b/arch/um/include/asm/unwind.h new file mode 100644 index 0000000000..7ffa5437b7 --- /dev/null +++ b/arch/um/include/asm/unwind.h @@ -0,0 +1,8 @@ +#ifndef _ASM_UML_UNWIND_H +#define _ASM_UML_UNWIND_H + +static inline void +unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, + void *orc, size_t orc_size) {} + +#endif /* _ASM_UML_UNWIND_H */ diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h new file mode 100644 index 0000000000..9a7b9ed937 --- /dev/null +++ b/arch/um/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UM_VMALLOC_H +#define _ASM_UM_VMALLOC_H + +#endif /* _ASM_UM_VMALLOC_H */ diff --git a/arch/um/include/asm/vmlinux.lds.h b/arch/um/include/asm/vmlinux.lds.h new file mode 100644 index 0000000000..149494ae78 --- /dev/null +++ b/arch/um/include/asm/vmlinux.lds.h @@ -0,0 +1,2 @@ +#include <asm/thread_info.h> +#include <asm-generic/vmlinux.lds.h> diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h new file mode 100644 index 0000000000..647fae200c --- /dev/null +++ b/arch/um/include/asm/xor.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UM_XOR_H +#define _ASM_UM_XOR_H + +#ifdef CONFIG_64BIT +#undef CONFIG_X86_32 +#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_sse_pf64)) +#else +#define CONFIG_X86_32 1 +#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_8regs)) +#endif + +#include <asm/cpufeature.h> +#include <../../x86/include/asm/xor.h> +#include <linux/time-internal.h> + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +#undef XOR_SELECT_TEMPLATE +/* pick an arbitrary one - measuring isn't possible with inf-cpu */ +#define XOR_SELECT_TEMPLATE(x) \ + (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x) +#endif + +#endif diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h new file mode 100644 index 0000000000..b22226634f --- /dev/null +++ b/arch/um/include/linux/time-internal.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __TIMER_INTERNAL_H__ +#define __TIMER_INTERNAL_H__ +#include <linux/list.h> +#include <asm/bug.h> +#include <shared/timetravel.h> + +#define TIMER_MULTIPLIER 256 +#define TIMER_MIN_DELTA 500 + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +struct time_travel_event { + unsigned long long time; + void (*fn)(struct time_travel_event *d); + struct list_head list; + bool pending, onstack; +}; + +void time_travel_sleep(void); + +static inline void +time_travel_set_event_fn(struct time_travel_event *e, + void (*fn)(struct time_travel_event *d)) +{ + e->fn = fn; +} + +void __time_travel_propagate_time(void); + +static inline void time_travel_propagate_time(void) +{ + if (time_travel_mode == TT_MODE_EXTERNAL) + __time_travel_propagate_time(); +} + +void __time_travel_wait_readable(int fd); + +static inline void time_travel_wait_readable(int fd) +{ + if (time_travel_mode == TT_MODE_EXTERNAL) + __time_travel_wait_readable(fd); +} + +void time_travel_add_irq_event(struct time_travel_event *e); +void time_travel_add_event_rel(struct time_travel_event *e, + unsigned long long delay_ns); +bool time_travel_del_event(struct time_travel_event *e); +#else +struct time_travel_event { +}; + +static inline void time_travel_sleep(void) +{ +} + +/* this is a macro so the event/function need not exist */ +#define time_travel_set_event_fn(e, fn) do {} while (0) + +static inline void time_travel_propagate_time(void) +{ +} + +static inline void time_travel_wait_readable(int fd) +{ +} + +static inline void time_travel_add_irq_event(struct time_travel_event *e) +{ + WARN_ON(1); +} + +/* + * not inlines so the data structure need not exist, + * cause linker failures + */ +extern void time_travel_not_configured(void); +#define time_travel_add_event_rel(...) time_travel_not_configured() +#define time_travel_del_event(...) time_travel_not_configured() +#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ + +/* + * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used, + * which is intentional since we really shouldn't link it in that case. + */ +void time_travel_ndelay(unsigned long nsec); +#endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/include/linux/virtio-uml.h b/arch/um/include/linux/virtio-uml.h new file mode 100644 index 0000000000..2f652fa90f --- /dev/null +++ b/arch/um/include/linux/virtio-uml.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ + +#ifndef __VIRTIO_UML_H__ +#define __VIRTIO_UML_H__ + +void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, + bool no_vq_suspend); + +#endif /* __VIRTIO_UML_H__ */ diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h new file mode 100644 index 0000000000..880ee42a33 --- /dev/null +++ b/arch/um/include/shared/arch.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __ARCH_H__ +#define __ARCH_H__ + +#include <sysdep/ptrace.h> + +extern void arch_check_bugs(void); +extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs); +extern void arch_examine_signal(int sig, struct uml_pt_regs *regs); + +#endif diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h new file mode 100644 index 0000000000..9ec3015bc5 --- /dev/null +++ b/arch/um/include/shared/as-layout.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __START_H__ +#define __START_H__ + +#include <generated/asm-offsets.h> + +/* + * Stolen from linux/const.h, which can't be directly included since + * this is used in userspace code, which has no access to the kernel + * headers. Changed to be suitable for adding casts to the start, + * rather than "UL" to the end. + */ + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. + */ +#define STUB_START stub_start +#define STUB_CODE STUB_START +#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) +#define STUB_DATA_PAGES 1 /* must be a power of two */ +#define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE) + +#ifndef __ASSEMBLY__ + +#include <sysdep/ptrace.h> + +struct cpu_task { + int pid; + void *task; +}; + +extern struct cpu_task cpu_tasks[]; + +extern unsigned long high_physmem; +extern unsigned long uml_physmem; +extern unsigned long uml_reserved; +extern unsigned long end_vm; +extern unsigned long start_vm; +extern unsigned long long highmem; + +extern unsigned long brk_start; + +extern unsigned long host_task_size; +extern unsigned long stub_start; + +extern int linux_main(int argc, char **argv); +extern void uml_finishsetup(void); + +struct siginfo; +extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *); + +#endif + +#endif diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h new file mode 100644 index 0000000000..96195483fb --- /dev/null +++ b/arch/um/include/shared/common-offsets.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* for use by sys-$SUBARCH/kernel-offsets.c */ +#include <stub-data.h> + +DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); + +DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); +DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); +DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); +DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); + +DEFINE(UM_GFP_KERNEL, GFP_KERNEL); +DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); + +DEFINE(UM_THREAD_SIZE, THREAD_SIZE); + +DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); +DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); + +#ifdef CONFIG_PRINTK +DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); +#endif +#ifdef CONFIG_UML_X86 +DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); +#endif +#ifdef CONFIG_64BIT +DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); +#endif +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); +#endif + +/* for stub */ +DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset)); +DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err)); +DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd)); diff --git a/arch/um/include/shared/elf_user.h b/arch/um/include/shared/elf_user.h new file mode 100644 index 0000000000..fd461ee40c --- /dev/null +++ b/arch/um/include/shared/elf_user.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + */ + +#ifndef __ELF_USER_H__ +#define __ELF_USER_H__ + +/* For compilation on a host that doesn't support AT_SYSINFO (Linux 2.4) */ + +#ifndef AT_SYSINFO +#define AT_SYSINFO 32 +#endif +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 +#endif + +#endif diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h new file mode 100644 index 0000000000..ed952ac661 --- /dev/null +++ b/arch/um/include/shared/frame_kern.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __FRAME_KERN_H_ +#define __FRAME_KERN_H_ + +extern int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *mask); +extern int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *mask); + +#endif + diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h new file mode 100644 index 0000000000..1a659e2e8c --- /dev/null +++ b/arch/um/include/shared/init.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UML_INIT_H +#define _LINUX_UML_INIT_H + +/* These macros are used to mark some functions or + * initialized data (doesn't apply to uninitialized data) + * as `initialization' functions. The kernel can take this + * as hint that the function is used only during the initialization + * phase and free up used memory resources after + * + * Usage: + * For functions: + * + * You should add __init immediately before the function name, like: + * + * static void __init initme(int x, int y) + * { + * extern int z; z = x * y; + * } + * + * If the function has a prototype somewhere, you can also add + * __init between closing brace of the prototype and semicolon: + * + * extern int initialize_foobar_device(int, int, int) __init; + * + * For initialized data: + * You should insert __initdata between the variable name and equal + * sign followed by value, e.g.: + * + * static int init_variable __initdata = 0; + * static const char linux_logo[] __initconst = { 0x32, 0x36, ... }; + * + * Don't forget to initialize data not at file scope, i.e. within a function, + * as gcc otherwise puts the data into the bss section and not into the init + * section. + * + * Also note, that this data cannot be "const". + */ + +#ifndef _LINUX_INIT_H +typedef int (*initcall_t)(void); +typedef void (*exitcall_t)(void); + +#include <linux/compiler_types.h> + +/* These are for everybody (although not all archs will actually + discard it in modules) */ +#define __init __section(".init.text") +#define __initdata __section(".init.data") +#define __exitdata __section(".exit.data") +#define __exit_call __used __section(".exitcall.exit") + +#ifdef MODULE +#define __exit __section(".exit.text") +#else +#define __exit __used __section(".exit.text") +#endif + +#endif + +#ifndef MODULE +struct uml_param { + const char *str; + int (*setup_func)(char *, int *); +}; + +extern initcall_t __uml_postsetup_start, __uml_postsetup_end; +extern const char *__uml_help_start, *__uml_help_end; +#endif + +#define __uml_exitcall(fn) \ + static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn + +extern struct uml_param __uml_setup_start, __uml_setup_end; + +#define __uml_postsetup(fn) \ + static initcall_t __uml_postsetup_##fn __uml_postsetup_call = fn + +#define __non_empty_string(dummyname,string) \ + struct __uml_non_empty_string_struct_##dummyname \ + { \ + char _string[sizeof(string)-2]; \ + } + +#ifndef MODULE +#define __uml_setup(str, fn, help...) \ + __non_empty_string(fn ##_setup, str); \ + __uml_help(fn, help); \ + static char __uml_setup_str_##fn[] __initdata = str; \ + static struct uml_param __uml_setup_##fn __uml_init_setup = { __uml_setup_str_##fn, fn } +#else +#define __uml_setup(str, fn, help...) \ + +#endif + +#define __uml_help(fn, help...) \ + __non_empty_string(fn ##__help, help); \ + static char __uml_help_str_##fn[] __initdata = help; \ + static const char *__uml_help_##fn __uml_setup_help = __uml_help_str_##fn + +/* + * Mark functions and data as being only used at initialization + * or exit time. + */ +#define __uml_init_setup __used __section(".uml.setup.init") +#define __uml_setup_help __used __section(".uml.help.init") +#define __uml_postsetup_call __used __section(".uml.postsetup.init") +#define __uml_exit_call __used __section(".uml.exitcall.exit") + +#ifdef __UM_HOST__ + +#define __define_initcall(level,fn) \ + static initcall_t __initcall_##fn __used \ + __attribute__((__section__(".initcall" level ".init"))) = fn + +/* Userspace initcalls shouldn't depend on anything in the kernel, so we'll + * make them run first. + */ +#define __initcall(fn) __define_initcall("1", fn) + +#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn + +#define __init_call __used __section(".initcall.init") + +#endif + +#endif /* _LINUX_UML_INIT_H */ diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h new file mode 100644 index 0000000000..44357fa6ee --- /dev/null +++ b/arch/um/include/shared/irq_kern.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __IRQ_KERN_H__ +#define __IRQ_KERN_H__ + +#include <linux/interrupt.h> +#include <linux/time-internal.h> +#include <asm/ptrace.h> +#include "irq_user.h" + +#define UM_IRQ_ALLOC -1 + +int um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id); + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +/** + * um_request_irq_tt - request an IRQ with timetravel handler + * + * @irq: the IRQ number, or %UM_IRQ_ALLOC + * @fd: The file descriptor to request an IRQ for + * @type: read or write + * @handler: the (generic style) IRQ handler + * @irqflags: Linux IRQ flags + * @devname: name for this to show + * @dev_id: data pointer to pass to the IRQ handler + * @timetravel_handler: the timetravel interrupt handler, invoked with the IRQ + * number, fd, dev_id and time-travel event pointer. + * + * Returns: The interrupt number assigned or a negative error. + * + * Note that the timetravel handler is invoked only if the time_travel_mode is + * %TT_MODE_EXTERNAL, and then it is invoked even while the system is suspended! + * This function must call time_travel_add_irq_event() for the event passed with + * an appropriate delay, before sending an ACK on the socket it was invoked for. + * + * If this was called while the system is suspended, then adding the event will + * cause the system to resume. + * + * Since this function will almost certainly have to handle the FD's condition, + * a read will consume the message, and after that it is up to the code using + * it to pass such a message to the @handler in whichever way it can. + * + * If time_travel_mode is not %TT_MODE_EXTERNAL the @timetravel_handler will + * not be invoked at all and the @handler must handle the FD becoming + * readable (or writable) instead. Use um_irq_timetravel_handler_used() to + * distinguish these cases. + * + * See virtio_uml.c for an example. + */ +int um_request_irq_tt(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)); +#else +static inline +int um_request_irq_tt(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) +{ + return um_request_irq(irq, fd, type, handler, irqflags, + devname, dev_id); +} +#endif + +static inline bool um_irq_timetravel_handler_used(void) +{ + return time_travel_mode == TT_MODE_EXTERNAL; +} + +void um_free_irq(int irq, void *dev_id); +void free_irqs(void); +#endif diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h new file mode 100644 index 0000000000..da0f6eea30 --- /dev/null +++ b/arch/um/include/shared/irq_user.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __IRQ_USER_H__ +#define __IRQ_USER_H__ + +#include <sysdep/ptrace.h> + +enum um_irq_type { + IRQ_READ, + IRQ_WRITE, + NUM_IRQ_TYPES, +}; + +struct siginfo; +extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +void sigio_run_timetravel_handlers(void); +extern void free_irq_by_fd(int fd); +extern void deactivate_fd(int fd, int irqnum); +extern int deactivate_all_fds(void); + +#endif diff --git a/arch/um/include/shared/kern.h b/arch/um/include/shared/kern.h new file mode 100644 index 0000000000..3a9c75a841 --- /dev/null +++ b/arch/um/include/shared/kern.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __KERN_H__ +#define __KERN_H__ + +/* These are all user-mode things which are convenient to call directly + * from kernel code and for which writing a wrapper is too much of a pain. + * The regular include files can't be included because this file is included + * only into kernel code, and user-space includes conflict with kernel + * includes. + */ + +extern int printf(const char *fmt, ...); +extern void *sbrk(int increment); +extern int pause(void); +extern void exit(int); + +#endif + diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h new file mode 100644 index 0000000000..d8b8b4f07e --- /dev/null +++ b/arch/um/include/shared/kern_util.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __KERN_UTIL_H__ +#define __KERN_UTIL_H__ + +#include <sysdep/ptrace.h> +#include <sysdep/faultinfo.h> + +struct siginfo; + +extern int uml_exitcode; + +extern int ncpus; +extern int kmalloc_ok; + +#define UML_ROUND_UP(addr) \ + ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) + +extern unsigned long alloc_stack(int order, int atomic); +extern void free_stack(unsigned long stack, int order); + +struct pt_regs; +extern void do_signal(struct pt_regs *regs); +extern void interrupt_end(void); +extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs); + +extern unsigned long segv(struct faultinfo fi, unsigned long ip, + int is_user, struct uml_pt_regs *regs); +extern int handle_page_fault(unsigned long address, unsigned long ip, + int is_write, int is_user, int *code_out); + +extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs); +extern void initial_thread_cb(void (*proc)(void *), void *arg); +extern int is_syscall(unsigned long addr); + +extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); + +extern void uml_pm_wake(void); + +extern int start_uml(void); +extern void paging_init(void); + +extern void uml_cleanup(void); +extern void do_uml_exitcalls(void); + +/* + * Are we disallowed to sleep? Used to choose between GFP_KERNEL and + * GFP_ATOMIC. + */ +extern int __cant_sleep(void); +extern int get_current_pid(void); +extern int copy_from_user_proc(void *to, void *from, int size); +extern char *uml_strdup(const char *string); + +extern unsigned long to_irq_stack(unsigned long *mask_out); +extern unsigned long from_irq_stack(int nested); + +extern int singlestepping(void *t); + +extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs); +extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void fatal_sigsegv(void) __attribute__ ((noreturn)); + +void um_idle_sleep(void); + +#endif diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h new file mode 100644 index 0000000000..8863319039 --- /dev/null +++ b/arch/um/include/shared/longjmp.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UML_LONGJMP_H +#define __UML_LONGJMP_H + +#include <sysdep/archsetjmp.h> +#include <os.h> + +extern int signals_enabled; +extern int setjmp(jmp_buf); +extern void longjmp(jmp_buf, int); + +#define UML_LONGJMP(buf, val) do { \ + longjmp(*buf, val); \ +} while(0) + +#define UML_SETJMP(buf) ({ \ + int n, enable; \ + enable = *(volatile int *)&signals_enabled; \ + n = setjmp(*buf); \ + if(n != 0) \ + um_set_signals_trace(enable); \ + n; }) + +#endif diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h new file mode 100644 index 0000000000..98aacd5441 --- /dev/null +++ b/arch/um/include/shared/mem.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __MEM_H__ +#define __MEM_H__ + +extern int phys_mapping(unsigned long phys, unsigned long long *offset_out); + +extern unsigned long uml_physmem; +static inline unsigned long uml_to_phys(void *virt) +{ + return(((unsigned long) virt) - uml_physmem); +} + +static inline void *uml_to_virt(unsigned long phys) +{ + return((void *) uml_physmem + phys); +} + +#endif diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h new file mode 100644 index 0000000000..11a723a585 --- /dev/null +++ b/arch/um/include/shared/mem_user.h @@ -0,0 +1,57 @@ +/* + * arch/um/include/mem_user.h + * + * BRIEF MODULE DESCRIPTION + * user side memory interface for support IO memory inside user mode linux + * + * Copyright (C) 2001 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _MEM_USER_H +#define _MEM_USER_H + +struct iomem_region { + struct iomem_region *next; + char *driver; + int fd; + int size; + unsigned long phys; + unsigned long virt; +}; + +extern struct iomem_region *iomem_regions; +extern int iomem_size; + +#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) + +extern unsigned long find_iomem(char *driver, unsigned long *len_out); +extern void mem_total_pages(unsigned long physmem, unsigned long iomem, + unsigned long highmem); +extern void setup_physmem(unsigned long start, unsigned long usable, + unsigned long len, unsigned long long highmem); +extern void map_memory(unsigned long virt, unsigned long phys, + unsigned long len, int r, int w, int x); + +#endif diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h new file mode 100644 index 0000000000..67b2e9a1f2 --- /dev/null +++ b/arch/um/include/shared/net_kern.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_NET_KERN_H +#define __UM_NET_KERN_H + +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/list.h> +#include <linux/workqueue.h> + +struct uml_net { + struct list_head list; + struct net_device *dev; + struct platform_device pdev; + int index; +}; + +struct uml_net_private { + struct list_head list; + spinlock_t lock; + struct net_device *dev; + struct timer_list tl; + + struct work_struct work; + int fd; + unsigned char mac[ETH_ALEN]; + int max_packet; + unsigned short (*protocol)(struct sk_buff *); + int (*open)(void *); + void (*close)(int, void *); + void (*remove)(void *); + int (*read)(int, struct sk_buff *skb, struct uml_net_private *); + int (*write)(int, struct sk_buff *skb, struct uml_net_private *); + + void (*add_address)(unsigned char *, unsigned char *, void *); + void (*delete_address)(unsigned char *, unsigned char *, void *); + char user[]; +}; + +struct net_kern_info { + void (*init)(struct net_device *, void *); + unsigned short (*protocol)(struct sk_buff *); + int (*read)(int, struct sk_buff *skb, struct uml_net_private *); + int (*write)(int, struct sk_buff *skb, struct uml_net_private *); +}; + +struct transport { + struct list_head list; + const char *name; + int (* const setup)(char *, char **, void *); + const struct net_user_info *user; + const struct net_kern_info *kern; + const int private_size; + const int setup_size; +}; + +extern int tap_setup_common(char *str, char *type, char **dev_name, + char **mac_out, char **gate_addr); +extern void register_transport(struct transport *new); +extern unsigned short eth_protocol(struct sk_buff *skb); +extern void uml_net_setup_etheraddr(struct net_device *dev, char *str); + + +#endif diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h new file mode 100644 index 0000000000..ba92a4d935 --- /dev/null +++ b/arch/um/include/shared/net_user.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_NET_USER_H__ +#define __UM_NET_USER_H__ + +#define ETH_ADDR_LEN (6) +#define ETH_HEADER_ETHERTAP (16) +#define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for crazy people */ +#define ETH_MAX_PACKET (1500) + +#define UML_NET_VERSION (4) + +struct net_user_info { + int (*init)(void *, void *); + int (*open)(void *); + void (*close)(int, void *); + void (*remove)(void *); + void (*add_address)(unsigned char *, unsigned char *, void *); + void (*delete_address)(unsigned char *, unsigned char *, void *); + int max_packet; + int mtu; +}; + +extern void iter_addresses(void *d, void (*cb)(unsigned char *, + unsigned char *, void *), + void *arg); + +extern void *get_output_buffer(int *len_out); +extern void free_output_buffer(void *buffer); + +extern int tap_open_common(void *dev, char *gate_addr); +extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr); + +extern void read_output(int fd, char *output_out, int len); + +extern int net_read(int fd, void *buf, int len); +extern int net_recvfrom(int fd, void *buf, int len); +extern int net_write(int fd, void *buf, int len); +extern int net_send(int fd, void *buf, int len); +extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); + +extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); +extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); + +extern char *split_if_spec(char *str, ...); + +extern int dev_netmask(void *d, void *m); + +#endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h new file mode 100644 index 0000000000..0df646c665 --- /dev/null +++ b/arch/um/include/shared/os.h @@ -0,0 +1,346 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __OS_H__ +#define __OS_H__ + +#include <irq_user.h> +#include <longjmp.h> +#include <mm_id.h> +/* This is to get size_t */ +#ifndef __UM_HOST__ +#include <linux/types.h> +#else +#include <sys/types.h> +#endif + +#define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR)) + +#define OS_TYPE_FILE 1 +#define OS_TYPE_DIR 2 +#define OS_TYPE_SYMLINK 3 +#define OS_TYPE_CHARDEV 4 +#define OS_TYPE_BLOCKDEV 5 +#define OS_TYPE_FIFO 6 +#define OS_TYPE_SOCK 7 + +/* os_access() flags */ +#define OS_ACC_F_OK 0 /* Test for existence. */ +#define OS_ACC_X_OK 1 /* Test for execute permission. */ +#define OS_ACC_W_OK 2 /* Test for write permission. */ +#define OS_ACC_R_OK 4 /* Test for read permission. */ +#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ + +#ifdef CONFIG_64BIT +#define OS_LIB_PATH "/usr/lib64/" +#else +#define OS_LIB_PATH "/usr/lib/" +#endif + +#define OS_SENDMSG_MAX_FDS 8 + +/* + * types taken from stat_file() in hostfs_user.c + * (if they are wrong here, they are wrong there...). + */ +struct uml_stat { + int ust_dev; /* device */ + unsigned long long ust_ino; /* inode */ + int ust_mode; /* protection */ + int ust_nlink; /* number of hard links */ + int ust_uid; /* user ID of owner */ + int ust_gid; /* group ID of owner */ + unsigned long long ust_size; /* total size, in bytes */ + int ust_blksize; /* blocksize for filesystem I/O */ + unsigned long long ust_blocks; /* number of blocks allocated */ + unsigned long ust_atime; /* time of last access */ + unsigned long ust_mtime; /* time of last modification */ + unsigned long ust_ctime; /* time of last change */ +}; + +struct openflags { + unsigned int r : 1; + unsigned int w : 1; + unsigned int s : 1; /* O_SYNC */ + unsigned int c : 1; /* O_CREAT */ + unsigned int t : 1; /* O_TRUNC */ + unsigned int a : 1; /* O_APPEND */ + unsigned int e : 1; /* O_EXCL */ + unsigned int cl : 1; /* FD_CLOEXEC */ +}; + +#define OPENFLAGS() ((struct openflags) { .r = 0, .w = 0, .s = 0, .c = 0, \ + .t = 0, .a = 0, .e = 0, .cl = 0 }) + +static inline struct openflags of_read(struct openflags flags) +{ + flags.r = 1; + return flags; +} + +static inline struct openflags of_write(struct openflags flags) +{ + flags.w = 1; + return flags; +} + +static inline struct openflags of_rdwr(struct openflags flags) +{ + return of_read(of_write(flags)); +} + +static inline struct openflags of_set_rw(struct openflags flags, int r, int w) +{ + flags.r = r; + flags.w = w; + return flags; +} + +static inline struct openflags of_sync(struct openflags flags) +{ + flags.s = 1; + return flags; +} + +static inline struct openflags of_create(struct openflags flags) +{ + flags.c = 1; + return flags; +} + +static inline struct openflags of_trunc(struct openflags flags) +{ + flags.t = 1; + return flags; +} + +static inline struct openflags of_append(struct openflags flags) +{ + flags.a = 1; + return flags; +} + +static inline struct openflags of_excl(struct openflags flags) +{ + flags.e = 1; + return flags; +} + +static inline struct openflags of_cloexec(struct openflags flags) +{ + flags.cl = 1; + return flags; +} + +/* file.c */ +extern int os_stat_file(const char *file_name, struct uml_stat *buf); +extern int os_stat_fd(const int fd, struct uml_stat *buf); +extern int os_access(const char *file, int mode); +extern int os_set_exec_close(int fd); +extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); +extern int os_get_ifname(int fd, char *namebuf); +extern int os_set_slip(int fd); +extern int os_mode_fd(int fd, int mode); +extern int os_fsync_file(int fd); + +extern int os_seek_file(int fd, unsigned long long offset); +extern int os_open_file(const char *file, struct openflags flags, int mode); +extern int os_read_file(int fd, void *buf, int len); +extern int os_write_file(int fd, const void *buf, int count); +extern int os_sync_file(int fd); +extern int os_file_size(const char *file, unsigned long long *size_out); +extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); +extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); +extern int os_file_modtime(const char *file, long long *modtime); +extern int os_pipe(int *fd, int stream, int close_on_exec); +extern int os_set_fd_async(int fd); +extern int os_clear_fd_async(int fd); +extern int os_set_fd_block(int fd, int blocking); +extern int os_accept_connection(int fd); +extern int os_create_unix_socket(const char *file, int len, int close_on_exec); +extern int os_shutdown_socket(int fd, int r, int w); +extern void os_close_file(int fd); +extern int os_rcv_fd(int fd, int *helper_pid_out); +extern int os_connect_socket(const char *name); +extern int os_file_type(char *file); +extern int os_file_mode(const char *file, struct openflags *mode_out); +extern int os_lock_file(int fd, int excl); +extern void os_flush_stdout(void); +extern unsigned os_major(unsigned long long dev); +extern unsigned os_minor(unsigned long long dev); +extern unsigned long long os_makedev(unsigned major, unsigned minor); +extern int os_falloc_punch(int fd, unsigned long long offset, int count); +extern int os_falloc_zeroes(int fd, unsigned long long offset, int count); +extern int os_eventfd(unsigned int initval, int flags); +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len, + const int *fds, unsigned int fds_num); +int os_poll(unsigned int n, const int *fds); + +/* start_up.c */ +extern void os_early_checks(void); +extern void os_check_bugs(void); +extern void check_host_supports_tls(int *supports_tls, int *tls_min); +extern void get_host_cpu_features( + void (*flags_helper_func)(char *line), + void (*cache_helper_func)(char *line)); + +/* mem.c */ +extern int create_mem_file(unsigned long long len); + +/* process.c */ +extern unsigned long os_process_pc(int pid); +extern int os_process_parent(int pid); +extern void os_alarm_process(int pid); +extern void os_stop_process(int pid); +extern void os_kill_process(int pid, int reap_child); +extern void os_kill_ptraced_process(int pid, int reap_child); + +extern int os_getpid(void); +extern int os_getpgrp(void); + +extern void init_new_thread_signals(void); + +extern int os_map_memory(void *virt, int fd, unsigned long long off, + unsigned long len, int r, int w, int x); +extern int os_protect_memory(void *addr, unsigned long len, + int r, int w, int x); +extern int os_unmap_memory(void *addr, int len); +extern int os_drop_memory(void *addr, int length); +extern int can_drop_memory(void); +extern int os_mincore(void *addr, unsigned long len); + +/* execvp.c */ +extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); +/* helper.c */ +extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv); +extern int run_helper_thread(int (*proc)(void *), void *arg, + unsigned int flags, unsigned long *stack_out); +extern int helper_wait(int pid); + + +/* umid.c */ +extern int umid_file_name(char *name, char *buf, int len); +extern int set_umid(char *name); +extern char *get_umid(void); + +/* signal.c */ +extern void timer_set_signal_handler(void); +extern void set_sigstack(void *sig_stack, int size); +extern void set_handler(int sig); +extern void send_sigio_to_self(void); +extern int change_sig(int signal, int on); +extern void block_signals(void); +extern void unblock_signals(void); +extern int um_set_signals(int enable); +extern int um_set_signals_trace(int enable); +extern int os_is_signal_stack(void); +extern void deliver_alarm(void); +extern void register_pm_wake_signal(void); +extern void block_signals_hard(void); +extern void unblock_signals_hard(void); +extern void mark_sigio_pending(void); + +/* util.c */ +extern void stack_protections(unsigned long address); +extern int raw(int fd); +extern void setup_machinename(char *machine_out); +extern void setup_hostinfo(char *buf, int len); +extern ssize_t os_getrandom(void *buf, size_t len, unsigned int flags); +extern void os_dump_core(void) __attribute__ ((noreturn)); +extern void um_early_printk(const char *s, unsigned int n); +extern void os_fix_helper_signals(void); +extern void os_info(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void os_warn(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +/* time.c */ +extern void os_idle_sleep(void); +extern int os_timer_create(void); +extern int os_timer_set_interval(unsigned long long nsecs); +extern int os_timer_one_shot(unsigned long long nsecs); +extern void os_timer_disable(void); +extern long long os_persistent_clock_emulation(void); +extern long long os_nsecs(void); + +/* skas/mem.c */ +extern long run_syscall_stub(struct mm_id * mm_idp, + int syscall, unsigned long *args, long expected, + void **addr, int done); +extern long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr); +extern int map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int prot, int phys_fd, + unsigned long long offset, int done, void **data); +extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int done, void **data); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, unsigned int prot, int done, void **data); + +/* skas/process.c */ +extern int is_skas_winch(int pid, int fd, void *data); +extern int start_userspace(unsigned long stub_stack); +extern int copy_context_skas0(unsigned long stack, int pid); +extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs); +extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); +extern void switch_threads(jmp_buf *me, jmp_buf *you); +extern int start_idle_thread(void *stack, jmp_buf *switch_buf); +extern void initial_thread_cb_skas(void (*proc)(void *), + void *arg); +extern void halt_skas(void); +extern void reboot_skas(void); + +/* irq.c */ +extern int os_waiting_for_events_epoll(void); +extern void *os_epoll_get_data_pointer(int index); +extern int os_epoll_triggered(int index, int events); +extern int os_event_mask(enum um_irq_type irq_type); +extern int os_setup_epoll(void); +extern int os_add_epoll_fd(int events, int fd, void *data); +extern int os_mod_epoll_fd(int events, int fd, void *data); +extern int os_del_epoll_fd(int fd); +extern void os_set_ioignore(void); +extern void os_close_epoll_fd(void); +extern void um_irqs_suspend(void); +extern void um_irqs_resume(void); + +/* sigio.c */ +extern int add_sigio_fd(int fd); +extern int ignore_sigio_fd(int fd); +extern void maybe_sigio_broken(int fd); +extern void sigio_broken(int fd); +/* + * unlocked versions for IRQ controller code. + * + * This is safe because it's used at suspend/resume and nothing + * else is running. + */ +extern int __add_sigio_fd(int fd); +extern int __ignore_sigio_fd(int fd); + +/* prctl.c */ +extern int os_arch_prctl(int pid, int option, unsigned long *arg2); + +/* tty.c */ +extern int get_pty(void); + +/* sys-$ARCH/task_size.c */ +extern unsigned long os_get_top_address(void); + +long syscall(long number, ...); + +/* irqflags tracing */ +extern void block_signals_trace(void); +extern void unblock_signals_trace(void); +extern void um_trace_signals_on(void); +extern void um_trace_signals_off(void); + +/* time-travel */ +extern void deliver_time_travel_irqs(void); + +#endif diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h new file mode 100644 index 0000000000..95455e8996 --- /dev/null +++ b/arch/um/include/shared/ptrace_user.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __PTRACE_USER_H__ +#define __PTRACE_USER_H__ + +#include <sys/ptrace.h> +#include <sysdep/ptrace_user.h> + +extern int ptrace_getregs(long pid, unsigned long *regs_out); +extern int ptrace_setregs(long pid, unsigned long *regs_in); + +/* syscall emulation path in ptrace */ + +#ifndef PTRACE_SYSEMU +#define PTRACE_SYSEMU 31 +#endif +#ifndef PTRACE_SYSEMU_SINGLESTEP +#define PTRACE_SYSEMU_SINGLESTEP 32 +#endif + +/* On architectures, that started to support PTRACE_O_TRACESYSGOOD + * in linux 2.4, there are two different definitions of + * PTRACE_SETOPTIONS: linux 2.4 uses 21 while linux 2.6 uses 0x4200. + * For binary compatibility, 2.6 also supports the old "21", named + * PTRACE_OLDSETOPTION. On these architectures, UML always must use + * "21", to ensure the kernel runs on 2.4 and 2.6 host without + * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML. + * We also want to be able to build the kernel on 2.4, which doesn't + * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare + * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS. + * + * On architectures, that start to support PTRACE_O_TRACESYSGOOD on + * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't + * supported by the host kernel. In that case, our trick lets us use + * the new 0x4200 with the name PTRACE_OLDSETOPTIONS. + */ +#ifndef PTRACE_OLDSETOPTIONS +#define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS +#endif + +void set_using_sysemu(int value); +int get_using_sysemu(void); +extern int sysemu_supported; + +#define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \ + (((int[3][3] ) { \ + { PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \ + { PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \ + { PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \ + PTRACE_SYSEMU_SINGLESTEP } }) \ + [sysemu_mode][singlestep_mode]) + +#endif diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h new file mode 100644 index 0000000000..2f9c3ce5b4 --- /dev/null +++ b/arch/um/include/shared/registers.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2004 PathScale, Inc + */ + +#ifndef __REGISTERS_H +#define __REGISTERS_H + +#include <sysdep/ptrace.h> + +extern int save_i387_registers(int pid, unsigned long *fp_regs); +extern int restore_i387_registers(int pid, unsigned long *fp_regs); +extern int save_fp_registers(int pid, unsigned long *fp_regs); +extern int restore_fp_registers(int pid, unsigned long *fp_regs); +extern int save_fpx_registers(int pid, unsigned long *fp_regs); +extern int restore_fpx_registers(int pid, unsigned long *fp_regs); +extern int save_registers(int pid, struct uml_pt_regs *regs); +extern int restore_pid_registers(int pid, struct uml_pt_regs *regs); +extern int init_pid_registers(int pid); +extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); +extern int get_fp_registers(int pid, unsigned long *regs); +extern int put_fp_registers(int pid, unsigned long *regs); + +#endif diff --git a/arch/um/include/shared/sigio.h b/arch/um/include/shared/sigio.h new file mode 100644 index 0000000000..e60c8b2278 --- /dev/null +++ b/arch/um/include/shared/sigio.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __SIGIO_H__ +#define __SIGIO_H__ + +extern int write_sigio_irq(int fd); +extern void sigio_lock(void); +extern void sigio_unlock(void); + +#endif diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h new file mode 100644 index 0000000000..e82e203f5f --- /dev/null +++ b/arch/um/include/shared/skas/mm_id.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; + int kill; +}; + +#endif diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h new file mode 100644 index 0000000000..c93d2cbc8f --- /dev/null +++ b/arch/um/include/shared/skas/skas.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __SKAS_H +#define __SKAS_H + +#include <sysdep/ptrace.h> + +extern int userspace_pid[]; + +extern int user_thread(unsigned long stack, int flags); +extern void new_thread_handler(void); +extern void handle_syscall(struct uml_pt_regs *regs); +extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); + +#endif diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h new file mode 100644 index 0000000000..5e3ade3fb3 --- /dev/null +++ b/arch/um/include/shared/skas/stub-data.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + */ + +#ifndef __STUB_DATA_H +#define __STUB_DATA_H + +struct stub_data { + unsigned long offset; + int fd; + long parent_err, child_err; +}; + +#endif diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h new file mode 100644 index 0000000000..e5c3d69f1b --- /dev/null +++ b/arch/um/include/shared/timetravel.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2021 Intel Corporation + */ +#ifndef _UM_TIME_TRAVEL_H_ +#define _UM_TIME_TRAVEL_H_ + +enum time_travel_mode { + TT_MODE_OFF, + TT_MODE_BASIC, + TT_MODE_INFCPU, + TT_MODE_EXTERNAL, +}; + +#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ + defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) +extern enum time_travel_mode time_travel_mode; +#else +#define time_travel_mode TT_MODE_OFF +#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ + +#endif /* _UM_TIME_TRAVEL_H_ */ diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h new file mode 100644 index 0000000000..13da93284c --- /dev/null +++ b/arch/um/include/shared/um_malloc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> + */ + +#ifndef __UM_MALLOC_H__ +#define __UM_MALLOC_H__ + +#include <generated/asm-offsets.h> + +extern void *uml_kmalloc(int size, int flags); +extern void kfree(const void *ptr); + +extern void *vmalloc(unsigned long size); +extern void vfree(void *ptr); + +#endif /* __UM_MALLOC_H__ */ + + diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h new file mode 100644 index 0000000000..981e11d8e0 --- /dev/null +++ b/arch/um/include/shared/user.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __USER_H__ +#define __USER_H__ + +#include <generated/asm-offsets.h> + +/* + * The usual definition - copied here because the kernel provides its own, + * fancier, type-safe, definition. Using that one would require + * copying too much infrastructure for my taste, so userspace files + * get less checking than kernel files. + */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +/* This is to get size_t and NULL */ +#ifndef __UM_HOST__ +#include <linux/types.h> +#else +#include <stddef.h> +#include <sys/types.h> +#endif + +extern void panic(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +/* Requires preincluding include/linux/kern_levels.h */ +#define UM_KERN_EMERG KERN_EMERG +#define UM_KERN_ALERT KERN_ALERT +#define UM_KERN_CRIT KERN_CRIT +#define UM_KERN_ERR KERN_ERR +#define UM_KERN_WARNING KERN_WARNING +#define UM_KERN_NOTICE KERN_NOTICE +#define UM_KERN_INFO KERN_INFO +#define UM_KERN_DEBUG KERN_DEBUG +#define UM_KERN_CONT KERN_CONT + +#ifdef UML_CONFIG_PRINTK +#define printk(...) _printk(__VA_ARGS__) +extern int _printk(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +#else +static inline int printk(const char *fmt, ...) +{ + return 0; +} +#endif + +extern int in_aton(char *str); +extern size_t strlcat(char *, const char *, size_t); +extern size_t strscpy(char *, const char *, size_t); + +/* Copied from linux/compiler-gcc.h since we can't include it directly */ +#define barrier() __asm__ __volatile__("": : :"memory") + +#endif diff --git a/arch/um/include/uapi/asm/Kbuild b/arch/um/include/uapi/asm/Kbuild new file mode 100644 index 0000000000..f66554cd5c --- /dev/null +++ b/arch/um/include/uapi/asm/Kbuild @@ -0,0 +1 @@ +# SPDX-License-Identifier: GPL-2.0 diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile new file mode 100644 index 0000000000..811188be95 --- /dev/null +++ b/arch/um/kernel/Makefile @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux,intel}.com) +# + +# Don't instrument UML-specific code; without this, we may crash when +# accessing the instrumentation buffer for the first time from the +# kernel. +KCOV_INSTRUMENT := n + +CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ + -DELF_ARCH=$(LDS_ELF_ARCH) \ + -DELF_FORMAT=$(LDS_ELF_FORMAT) \ + $(LDS_EXTRA) +extra-y := vmlinux.lds + +obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ + physmem.o process.o ptrace.o reboot.o sigio.o \ + signal.o sysrq.o time.o tlb.o trap.o \ + um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ +obj-y += load_file.o + +obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o +obj-$(CONFIG_GPROF) += gprof_syms.o +obj-$(CONFIG_OF) += dtb.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o + +USER_OBJS := config.o + +include $(srctree)/arch/um/scripts/Makefile.rules + +targets := config.c config.tmp capflags.c + +# Be careful with the below Sed code - sed is pitfall-rich! +# We use sed to lower build requirements, for "embedded" builders for instance. + +$(obj)/config.tmp: $(objtree)/.config FORCE + $(call if_changed,quote1) + +quiet_cmd_quote1 = QUOTE $@ + cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n",/' \ + $< > $@ + +$(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE + $(call if_changed,quote2) + +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^ + +cpufeature = $(src)/../../x86/include/asm/cpufeatures.h +vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h + +$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE + $(call if_changed,mkcapflags) + +quiet_cmd_quote2 = QUOTE $@ + cmd_quote2 = sed -e '/CONFIG/{' \ + -e 's/"CONFIG"//' \ + -e 'r $(obj)/config.tmp' \ + -e 'a \' \ + -e '""' \ + -e '}' \ + $< > $@ diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c new file mode 100644 index 0000000000..1fb12235ab --- /dev/null +++ b/arch/um/kernel/asm-offsets.c @@ -0,0 +1 @@ +#include <sysdep/kernel-offsets.h> diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in new file mode 100644 index 0000000000..3ece3c3b31 --- /dev/null +++ b/arch/um/kernel/config.c.in @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <init.h> + +static __initdata const char *config[] = { +"CONFIG" +}; + +static int __init print_config(char *line, int *add) +{ + int i; + for (i = 0; i < sizeof(config)/sizeof(config[0]); i++) + printf("%s", config[i]); + exit(0); +} + +__uml_setup("--showconfig", print_config, +"--showconfig\n" +" Prints the config file that this UML binary was generated from.\n\n" +); + diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c new file mode 100644 index 0000000000..484141b069 --- /dev/null +++ b/arch/um/kernel/dtb.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/init.h> +#include <linux/of_fdt.h> +#include <linux/printk.h> +#include <linux/memblock.h> +#include <init.h> + +#include "um_arch.h" + +static char *dtb __initdata; + +void uml_dtb_init(void) +{ + long long size; + void *area; + + area = uml_load_file(dtb, &size); + if (!area) + return; + + if (!early_init_dt_scan(area)) { + pr_err("invalid DTB %s\n", dtb); + memblock_free(area, size); + return; + } + + early_init_fdt_scan_reserved_mem(); + unflatten_device_tree(); +} + +static int __init uml_dtb_setup(char *line, int *add) +{ + dtb = line; + return 0; +} + +__uml_setup("dtb=", uml_dtb_setup, +"dtb=<file>\n" +" Boot the kernel with the devicetree blob from the specified file.\n" +); diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S new file mode 100644 index 0000000000..3385d653eb --- /dev/null +++ b/arch/um/kernel/dyn.lds.S @@ -0,0 +1,180 @@ +#include <asm/vmlinux.lds.h> +#include <asm/page.h> + +OUTPUT_FORMAT(ELF_FORMAT) +OUTPUT_ARCH(ELF_ARCH) +ENTRY(_start) +jiffies = jiffies_64; + +VERSION { + { + local: *; + }; +} + +SECTIONS +{ + PROVIDE (__executable_start = START); + . = START + SIZEOF_HEADERS; + .interp : { *(.interp) } + __binary_start = .; + . = ALIGN(4096); /* Init code and data */ + _text = .; + INIT_TEXT_SECTION(PAGE_SIZE) + + . = ALIGN(PAGE_SIZE); + + /* Read-only sections, merged into text segment: */ + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rel.init : { *(.rel.init) } + .rela.init : { *(.rela.init) } + .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } + .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } + .rel.fini : { *(.rel.fini) } + .rela.fini : { *(.rela.fini) } + .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } + .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } + .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } + .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } + .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } + .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } + .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } + .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } + .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } + .rel.plt : { + *(.rel.plt) + PROVIDE_HIDDEN(__rel_iplt_start = .); + *(.rel.iplt) + PROVIDE_HIDDEN(__rel_iplt_end = .); + } + .rela.plt : { + *(.rela.plt) + PROVIDE_HIDDEN(__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN(__rela_iplt_end = .); + } + .init : { + KEEP (*(.init)) + } =0x90909090 + .plt : { *(.plt) } + .text : { + _stext = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.fixup) + *(.stub .text.* .gnu.linkonce.t.*) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + + . = ALIGN(PAGE_SIZE); + } =0x90909090 + . = ALIGN(PAGE_SIZE); + .syscall_stub : { + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + } + .fini : { + KEEP (*(.fini)) + } =0x90909090 + + .kstrtab : { *(.kstrtab) } + + #include <asm/common.lds.S> + + __init_begin = .; + init.data : { INIT_DATA } + __init_end = .; + + /* Ensure the __preinit_array_start label is properly aligned. We + could instead move the label definition inside the section, but + the linker would then create the section even if it turns out to + be empty, which isn't pretty. */ + . = ALIGN(32 / 8); + .preinit_array : { *(.preinit_array) } + .init_array : { + *(.kasan_init) + *(.init_array.*) + *(.init_array) + } + .fini_array : { *(.fini_array) } + .data : { + INIT_TASK_DATA(KERNEL_STACK_SIZE) + . = ALIGN(KERNEL_STACK_SIZE); + *(.data..init_irqstack) + DATA_DATA + *(.data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .dynamic : { *(.dynamic) } + .ctors : { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP (*crtbegin.o(.ctors)) + /* We don't want to include the .ctor section from + from the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + } + .dtors : { + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + } + .jcr : { KEEP (*(.jcr)) } + .got : { *(.got.plt) *(.got) } + _edata = .; + PROVIDE (edata = .); + .bss : { + __bss_start = .; + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. */ + . = ALIGN(32 / 8); + . = ALIGN(32 / 8); + } + __bss_stop = .; + _end = .; + PROVIDE (end = .); + + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS + + DISCARDS +} diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c new file mode 100644 index 0000000000..c350c2331b --- /dev/null +++ b/arch/um/kernel/early_printk.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + */ + +#include <linux/kernel.h> +#include <linux/console.h> +#include <linux/init.h> +#include <os.h> + +static void early_console_write(struct console *con, const char *s, unsigned int n) +{ + um_early_printk(s, n); +} + +static struct console early_console_dev = { + .name = "earlycon", + .write = early_console_write, + .flags = CON_BOOT, + .index = -1, +}; + +static int __init setup_early_printk(char *buf) +{ + if (!early_console) { + early_console = &early_console_dev; + register_console(&early_console_dev); + } + return 0; +} + +early_param("earlyprintk", setup_early_printk); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c new file mode 100644 index 0000000000..827a0d3fa5 --- /dev/null +++ b/arch/um/kernel/exec.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/stddef.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/ptrace.h> +#include <linux/sched/mm.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/slab.h> +#include <asm/current.h> +#include <asm/processor.h> +#include <linux/uaccess.h> +#include <as-layout.h> +#include <mem_user.h> +#include <registers.h> +#include <skas.h> +#include <os.h> + +void flush_thread(void) +{ + void *data = NULL; + int ret; + + arch_flush_thread(¤t->thread.arch); + + ret = unmap(¤t->mm->context.id, 0, TASK_SIZE, 1, &data); + if (ret) { + printk(KERN_ERR "%s - clearing address space failed, err = %d\n", + __func__, ret); + force_sig(SIGKILL); + } + get_safe_registers(current_pt_regs()->regs.gp, + current_pt_regs()->regs.fp); + + __switch_mm(¤t->mm->context.id); +} + +void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) +{ + PT_REGS_IP(regs) = eip; + PT_REGS_SP(regs) = esp; + clear_thread_flag(TIF_SINGLESTEP); +#ifdef SUBARCH_EXECVE1 + SUBARCH_EXECVE1(regs->regs); +#endif +} +EXPORT_SYMBOL(start_thread); diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c new file mode 100644 index 0000000000..43edc2aa57 --- /dev/null +++ b/arch/um/kernel/exitcode.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/types.h> +#include <linux/uaccess.h> + +/* + * If read and write race, the read will still atomically read a valid + * value. + */ +int uml_exitcode = 0; + +static int exitcode_proc_show(struct seq_file *m, void *v) +{ + int val; + + /* + * Save uml_exitcode in a local so that we don't need to guarantee + * that sprintf accesses it atomically. + */ + val = uml_exitcode; + seq_printf(m, "%d\n", val); + return 0; +} + +static int exitcode_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, exitcode_proc_show, NULL); +} + +static ssize_t exitcode_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + char *end, buf[sizeof("nnnnn\0")]; + size_t size; + int tmp; + + size = min(count, sizeof(buf)); + if (copy_from_user(buf, buffer, size)) + return -EFAULT; + + tmp = simple_strtol(buf, &end, 0); + if ((*end != '\0') && !isspace(*end)) + return -EINVAL; + + uml_exitcode = tmp; + return count; +} + +static const struct proc_ops exitcode_proc_ops = { + .proc_open = exitcode_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = exitcode_proc_write, +}; + +static int make_proc_exitcode(void) +{ + struct proc_dir_entry *ent; + + ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_ops); + if (ent == NULL) { + printk(KERN_WARNING "make_proc_exitcode : Failed to register " + "/proc/exitcode\n"); + return 0; + } + return 0; +} + +__initcall(make_proc_exitcode); diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c new file mode 100644 index 0000000000..84d5369087 --- /dev/null +++ b/arch/um/kernel/gprof_syms.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/module.h> + +extern void mcount(void); +EXPORT_SYMBOL(mcount); diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c new file mode 100644 index 0000000000..47b8cb1a11 --- /dev/null +++ b/arch/um/kernel/initrd.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/init.h> +#include <linux/memblock.h> +#include <linux/initrd.h> +#include <asm/types.h> +#include <init.h> +#include <os.h> + +#include "um_arch.h" + +/* Changed by uml_initrd_setup, which is a setup */ +static char *initrd __initdata = NULL; + +int __init read_initrd(void) +{ + unsigned long long size; + void *area; + + if (!initrd) + return 0; + + area = uml_load_file(initrd, &size); + if (!area) + return 0; + + initrd_start = (unsigned long) area; + initrd_end = initrd_start + size; + return 0; +} + +static int __init uml_initrd_setup(char *line, int *add) +{ + initrd = line; + return 0; +} + +__uml_setup("initrd=", uml_initrd_setup, +"initrd=<initrd image>\n" +" This is used to boot UML from an initrd image. The argument is the\n" +" name of the file containing the image.\n\n" +); diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c new file mode 100644 index 0000000000..7220615b3b --- /dev/null +++ b/arch/um/kernel/ioport.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <asm/iomap.h> +#include <asm-generic/pci_iomap.h> + +void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, + unsigned int nr) +{ + return NULL; +} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c new file mode 100644 index 0000000000..635d44606b --- /dev/null +++ b/arch/um/kernel/irq.c @@ -0,0 +1,766 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 - Cambridge Greys Ltd + * Copyright (C) 2011 - 2014 Cisco Systems Inc + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + */ + +#include <linux/cpumask.h> +#include <linux/hardirq.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <irq_user.h> +#include <irq_kern.h> +#include <linux/time-internal.h> + + +/* When epoll triggers we do not know why it did so + * we can also have different IRQs for read and write. + * This is why we keep a small irq_reg array for each fd - + * one entry per IRQ type + */ +struct irq_reg { + void *id; + int irq; + /* it's cheaper to store this than to query it */ + int events; + bool active; + bool pending; + bool wakeup; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + bool pending_on_resume; + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *); + struct time_travel_event event; +#endif +}; + +struct irq_entry { + struct list_head list; + int fd; + struct irq_reg reg[NUM_IRQ_TYPES]; + bool suspended; + bool sigio_workaround; +}; + +static DEFINE_SPINLOCK(irq_lock); +static LIST_HEAD(active_fds); +static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); +static bool irqs_suspended; + +static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) +{ +/* + * irq->active guards against reentry + * irq->pending accumulates pending requests + * if pending is raised the irq_handler is re-run + * until pending is cleared + */ + if (irq->active) { + irq->active = false; + + do { + irq->pending = false; + do_IRQ(irq->irq, regs); + } while (irq->pending); + + irq->active = true; + } else { + irq->pending = true; + } +} + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +static void irq_event_handler(struct time_travel_event *ev) +{ + struct irq_reg *reg = container_of(ev, struct irq_reg, event); + + /* do nothing if suspended - just to cause a wakeup */ + if (irqs_suspended) + return; + + generic_handle_irq(reg->irq); +} + +static bool irq_do_timetravel_handler(struct irq_entry *entry, + enum um_irq_type t) +{ + struct irq_reg *reg = &entry->reg[t]; + + if (!reg->timetravel_handler) + return false; + + /* + * Handle all messages - we might get multiple even while + * interrupts are already suspended, due to suspend order + * etc. Note that time_travel_add_irq_event() will not add + * an event twice, if it's pending already "first wins". + */ + reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); + + if (!reg->event.pending) + return false; + + if (irqs_suspended) + reg->pending_on_resume = true; + return true; +} +#else +static bool irq_do_timetravel_handler(struct irq_entry *entry, + enum um_irq_type t) +{ + return false; +} +#endif + +static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, + struct uml_pt_regs *regs, + bool timetravel_handlers_only) +{ + struct irq_reg *reg = &entry->reg[t]; + + if (!reg->events) + return; + + if (os_epoll_triggered(idx, reg->events) <= 0) + return; + + if (irq_do_timetravel_handler(entry, t)) + return; + + /* + * If we're called to only run time-travel handlers then don't + * actually proceed but mark sigio as pending (if applicable). + * For suspend/resume, timetravel_handlers_only may be true + * despite time-travel not being configured and used. + */ + if (timetravel_handlers_only) { +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + mark_sigio_pending(); +#endif + return; + } + + irq_io_loop(reg, regs); +} + +static void _sigio_handler(struct uml_pt_regs *regs, + bool timetravel_handlers_only) +{ + struct irq_entry *irq_entry; + int n, i; + + if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) + return; + + while (1) { + /* This is now lockless - epoll keeps back-referencesto the irqs + * which have trigger it so there is no need to walk the irq + * list and lock it every time. We avoid locking by turning off + * IO for a specific fd by executing os_del_epoll_fd(fd) before + * we do any changes to the actual data structures + */ + n = os_waiting_for_events_epoll(); + + if (n <= 0) { + if (n == -EINTR) + continue; + else + break; + } + + for (i = 0; i < n ; i++) { + enum um_irq_type t; + + irq_entry = os_epoll_get_data_pointer(i); + + for (t = 0; t < NUM_IRQ_TYPES; t++) + sigio_reg_handler(i, irq_entry, t, regs, + timetravel_handlers_only); + } + } + + if (!timetravel_handlers_only) + free_irqs(); +} + +void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + _sigio_handler(regs, irqs_suspended); +} + +static struct irq_entry *get_irq_entry_by_fd(int fd) +{ + struct irq_entry *walk; + + lockdep_assert_held(&irq_lock); + + list_for_each_entry(walk, &active_fds, list) { + if (walk->fd == fd) + return walk; + } + + return NULL; +} + +static void free_irq_entry(struct irq_entry *to_free, bool remove) +{ + if (!to_free) + return; + + if (remove) + os_del_epoll_fd(to_free->fd); + list_del(&to_free->list); + kfree(to_free); +} + +static bool update_irq_entry(struct irq_entry *entry) +{ + enum um_irq_type i; + int events = 0; + + for (i = 0; i < NUM_IRQ_TYPES; i++) + events |= entry->reg[i].events; + + if (events) { + /* will modify (instead of add) if needed */ + os_add_epoll_fd(events, entry->fd, entry); + return true; + } + + os_del_epoll_fd(entry->fd); + return false; +} + +static void update_or_free_irq_entry(struct irq_entry *entry) +{ + if (!update_irq_entry(entry)) + free_irq_entry(entry, false); +} + +static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) +{ + struct irq_entry *irq_entry; + int err, events = os_event_mask(type); + unsigned long flags; + + err = os_set_fd_async(fd); + if (err < 0) + goto out; + + spin_lock_irqsave(&irq_lock, flags); + irq_entry = get_irq_entry_by_fd(fd); + if (irq_entry) { + /* cannot register the same FD twice with the same type */ + if (WARN_ON(irq_entry->reg[type].events)) { + err = -EALREADY; + goto out_unlock; + } + + /* temporarily disable to avoid IRQ-side locking */ + os_del_epoll_fd(fd); + } else { + irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); + if (!irq_entry) { + err = -ENOMEM; + goto out_unlock; + } + irq_entry->fd = fd; + list_add_tail(&irq_entry->list, &active_fds); + maybe_sigio_broken(fd); + } + + irq_entry->reg[type].id = dev_id; + irq_entry->reg[type].irq = irq; + irq_entry->reg[type].active = true; + irq_entry->reg[type].events = events; + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + if (um_irq_timetravel_handler_used()) { + irq_entry->reg[type].timetravel_handler = timetravel_handler; + irq_entry->reg[type].event.fn = irq_event_handler; + } +#endif + + WARN_ON(!update_irq_entry(irq_entry)); + spin_unlock_irqrestore(&irq_lock, flags); + + return 0; +out_unlock: + spin_unlock_irqrestore(&irq_lock, flags); +out: + return err; +} + +/* + * Remove the entry or entries for a specific FD, if you + * don't want to remove all the possible entries then use + * um_free_irq() or deactivate_fd() instead. + */ +void free_irq_by_fd(int fd) +{ + struct irq_entry *to_free; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + to_free = get_irq_entry_by_fd(fd); + free_irq_entry(to_free, true); + spin_unlock_irqrestore(&irq_lock, flags); +} +EXPORT_SYMBOL(free_irq_by_fd); + +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) +{ + struct irq_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type i; + + for (i = 0; i < NUM_IRQ_TYPES; i++) { + struct irq_reg *reg = &entry->reg[i]; + + if (!reg->events) + continue; + if (reg->irq != irq) + continue; + if (reg->id != dev) + continue; + + os_del_epoll_fd(entry->fd); + reg->events = 0; + update_or_free_irq_entry(entry); + goto out; + } + } +out: + spin_unlock_irqrestore(&irq_lock, flags); +} + +void deactivate_fd(int fd, int irqnum) +{ + struct irq_entry *entry; + unsigned long flags; + enum um_irq_type i; + + os_del_epoll_fd(fd); + + spin_lock_irqsave(&irq_lock, flags); + entry = get_irq_entry_by_fd(fd); + if (!entry) + goto out; + + for (i = 0; i < NUM_IRQ_TYPES; i++) { + if (!entry->reg[i].events) + continue; + if (entry->reg[i].irq == irqnum) + entry->reg[i].events = 0; + } + + update_or_free_irq_entry(entry); +out: + spin_unlock_irqrestore(&irq_lock, flags); + + ignore_sigio_fd(fd); +} +EXPORT_SYMBOL(deactivate_fd); + +/* + * Called just before shutdown in order to provide a clean exec + * environment in case the system is rebooting. No locking because + * that would cause a pointless shutdown hang if something hadn't + * released the lock. + */ +int deactivate_all_fds(void) +{ + struct irq_entry *entry; + + /* Stop IO. The IRQ loop has no lock so this is our + * only way of making sure we are safe to dispose + * of all IRQ handlers + */ + os_set_ioignore(); + + /* we can no longer call kfree() here so just deactivate */ + list_for_each_entry(entry, &active_fds, list) + os_del_epoll_fd(entry->fd); + os_close_epoll_fd(); + return 0; +} + +/* + * do_IRQ handles all normal device IRQs (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + irq_enter(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); + return 1; +} + +void um_free_irq(int irq, void *dev) +{ + if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, + "freeing invalid irq %d", irq)) + return; + + free_irq_by_irq_and_dev(irq, dev); + free_irq(irq, dev); + clear_bit(irq, irqs_allocated); +} +EXPORT_SYMBOL(um_free_irq); + +static int +_um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) +{ + int err; + + if (irq == UM_IRQ_ALLOC) { + int i; + + for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { + if (!test_and_set_bit(i, irqs_allocated)) { + irq = i; + break; + } + } + } + + if (irq < 0) + return -ENOSPC; + + if (fd != -1) { + err = activate_fd(irq, fd, type, dev_id, timetravel_handler); + if (err) + goto error; + } + + err = request_irq(irq, handler, irqflags, devname, dev_id); + if (err < 0) + goto error; + + return irq; +error: + clear_bit(irq, irqs_allocated); + return err; +} + +int um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + return _um_request_irq(irq, fd, type, handler, irqflags, + devname, dev_id, NULL); +} +EXPORT_SYMBOL(um_request_irq); + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +int um_request_irq_tt(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) +{ + return _um_request_irq(irq, fd, type, handler, irqflags, + devname, dev_id, timetravel_handler); +} +EXPORT_SYMBOL(um_request_irq_tt); + +void sigio_run_timetravel_handlers(void) +{ + _sigio_handler(NULL, true); +} +#endif + +#ifdef CONFIG_PM_SLEEP +void um_irqs_suspend(void) +{ + struct irq_entry *entry; + unsigned long flags; + + irqs_suspended = true; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + bool clear = true; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + if (!entry->reg[t].events) + continue; + + /* + * For the SIGIO_WRITE_IRQ, which is used to handle the + * SIGIO workaround thread, we need special handling: + * enable wake for it itself, but below we tell it about + * any FDs that should be suspended. + */ + if (entry->reg[t].wakeup || + entry->reg[t].irq == SIGIO_WRITE_IRQ +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + || entry->reg[t].timetravel_handler +#endif + ) { + clear = false; + break; + } + } + + if (clear) { + entry->suspended = true; + os_clear_fd_async(entry->fd); + entry->sigio_workaround = + !__ignore_sigio_fd(entry->fd); + } + } + spin_unlock_irqrestore(&irq_lock, flags); +} + +void um_irqs_resume(void) +{ + struct irq_entry *entry; + unsigned long flags; + + + local_irq_save(flags); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + /* + * We don't need to lock anything here since we're in resume + * and nothing else is running, but have disabled IRQs so we + * don't try anything else with the interrupt list from there. + */ + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + struct irq_reg *reg = &entry->reg[t]; + + if (reg->pending_on_resume) { + irq_enter(); + generic_handle_irq(reg->irq); + irq_exit(); + reg->pending_on_resume = false; + } + } + } +#endif + + spin_lock(&irq_lock); + list_for_each_entry(entry, &active_fds, list) { + if (entry->suspended) { + int err = os_set_fd_async(entry->fd); + + WARN(err < 0, "os_set_fd_async returned %d\n", err); + entry->suspended = false; + + if (entry->sigio_workaround) { + err = __add_sigio_fd(entry->fd); + WARN(err < 0, "add_sigio_returned %d\n", err); + } + } + } + spin_unlock_irqrestore(&irq_lock, flags); + + irqs_suspended = false; + send_sigio_to_self(); +} + +static int normal_irq_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + if (!entry->reg[t].events) + continue; + + if (entry->reg[t].irq != d->irq) + continue; + entry->reg[t].wakeup = on; + goto unlock; + } + } +unlock: + spin_unlock_irqrestore(&irq_lock, flags); + return 0; +} +#else +#define normal_irq_set_wake NULL +#endif + +/* + * irq_chip must define at least enable/disable and ack when + * the edge handler is used. + */ +static void dummy(struct irq_data *d) +{ +} + +/* This is used for everything other than the timer. */ +static struct irq_chip normal_irq_type = { + .name = "SIGIO", + .irq_disable = dummy, + .irq_enable = dummy, + .irq_ack = dummy, + .irq_mask = dummy, + .irq_unmask = dummy, + .irq_set_wake = normal_irq_set_wake, +}; + +static struct irq_chip alarm_irq_type = { + .name = "SIGALRM", + .irq_disable = dummy, + .irq_enable = dummy, + .irq_ack = dummy, + .irq_mask = dummy, + .irq_unmask = dummy, +}; + +void __init init_IRQ(void) +{ + int i; + + irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); + + for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) + irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); + /* Initialize EPOLL Loop */ + os_setup_epoll(); +} + +/* + * IRQ stack entry and exit: + * + * Unlike i386, UML doesn't receive IRQs on the normal kernel stack + * and switch over to the IRQ stack after some preparation. We use + * sigaltstack to receive signals on a separate stack from the start. + * These two functions make sure the rest of the kernel won't be too + * upset by being on a different stack. The IRQ stack has a + * thread_info structure at the bottom so that current et al continue + * to work. + * + * to_irq_stack copies the current task's thread_info to the IRQ stack + * thread_info and sets the tasks's stack to point to the IRQ stack. + * + * from_irq_stack copies the thread_info struct back (flags may have + * been modified) and resets the task's stack pointer. + * + * Tricky bits - + * + * What happens when two signals race each other? UML doesn't block + * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal + * could arrive while a previous one is still setting up the + * thread_info. + * + * There are three cases - + * The first interrupt on the stack - sets up the thread_info and + * handles the interrupt + * A nested interrupt interrupting the copying of the thread_info - + * can't handle the interrupt, as the stack is in an unknown state + * A nested interrupt not interrupting the copying of the + * thread_info - doesn't do any setup, just handles the interrupt + * + * The first job is to figure out whether we interrupted stack setup. + * This is done by xchging the signal mask with thread_info->pending. + * If the value that comes back is zero, then there is no setup in + * progress, and the interrupt can be handled. If the value is + * non-zero, then there is stack setup in progress. In order to have + * the interrupt handled, we leave our signal in the mask, and it will + * be handled by the upper handler after it has set up the stack. + * + * Next is to figure out whether we are the outer handler or a nested + * one. As part of setting up the stack, thread_info->real_thread is + * set to non-NULL (and is reset to NULL on exit). This is the + * nesting indicator. If it is non-NULL, then the stack is already + * set up and the handler can run. + */ + +static unsigned long pending_mask; + +unsigned long to_irq_stack(unsigned long *mask_out) +{ + struct thread_info *ti; + unsigned long mask, old; + int nested; + + mask = xchg(&pending_mask, *mask_out); + if (mask != 0) { + /* + * If any interrupts come in at this point, we want to + * make sure that their bits aren't lost by our + * putting our bit in. So, this loop accumulates bits + * until xchg returns the same value that we put in. + * When that happens, there were no new interrupts, + * and pending_mask contains a bit for each interrupt + * that came in. + */ + old = *mask_out; + do { + old |= mask; + mask = xchg(&pending_mask, old); + } while (mask != old); + return 1; + } + + ti = current_thread_info(); + nested = (ti->real_thread != NULL); + if (!nested) { + struct task_struct *task; + struct thread_info *tti; + + task = cpu_tasks[ti->cpu].task; + tti = task_thread_info(task); + + *ti = *tti; + ti->real_thread = tti; + task->stack = ti; + } + + mask = xchg(&pending_mask, 0); + *mask_out |= mask | nested; + return 0; +} + +unsigned long from_irq_stack(int nested) +{ + struct thread_info *ti, *to; + unsigned long mask; + + ti = current_thread_info(); + + pending_mask = 1; + + to = ti->real_thread; + current->stack = to; + ti->real_thread = NULL; + *to = *ti; + + mask = xchg(&pending_mask, 0); + return mask & ~1; +} + diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c new file mode 100644 index 0000000000..427dd5a61a --- /dev/null +++ b/arch/um/kernel/kmsg_dump.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kmsg_dump.h> +#include <linux/spinlock.h> +#include <linux/console.h> +#include <linux/string.h> +#include <shared/init.h> +#include <shared/kern.h> +#include <os.h> + +static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + static struct kmsg_dump_iter iter; + static DEFINE_SPINLOCK(lock); + static char line[1024]; + struct console *con; + unsigned long flags; + size_t len = 0; + int cookie; + + /* + * If no consoles are available to output crash information, dump + * the kmsg buffer to stdout. + */ + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + /* + * The ttynull console and disabled consoles are ignored + * since they cannot output. All other consoles are + * expected to output the crash information. + */ + if (strcmp(con->name, "ttynull") != 0 && + (console_srcu_read_flags(con) & CON_ENABLED)) { + break; + } + } + console_srcu_read_unlock(cookie); + if (con) + return; + + if (!spin_trylock_irqsave(&lock, flags)) + return; + + kmsg_dump_rewind(&iter); + + printf("kmsg_dump:\n"); + while (kmsg_dump_get_line(&iter, true, line, sizeof(line), &len)) { + line[len] = '\0'; + printf("%s", line); + } + + spin_unlock_irqrestore(&lock, flags); +} + +static struct kmsg_dumper kmsg_dumper = { + .dump = kmsg_dumper_stdout +}; + +int __init kmsg_dumper_stdout_init(void) +{ + return kmsg_dump_register(&kmsg_dumper); +} + +__uml_postsetup(kmsg_dumper_stdout_init); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c new file mode 100644 index 0000000000..3a85bde3e1 --- /dev/null +++ b/arch/um/kernel/ksyms.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/module.h> +#include <os.h> + +EXPORT_SYMBOL(um_set_signals); +EXPORT_SYMBOL(signals_enabled); + +EXPORT_SYMBOL(os_stat_fd); +EXPORT_SYMBOL(os_stat_file); +EXPORT_SYMBOL(os_access); +EXPORT_SYMBOL(os_set_exec_close); +EXPORT_SYMBOL(os_getpid); +EXPORT_SYMBOL(os_open_file); +EXPORT_SYMBOL(os_read_file); +EXPORT_SYMBOL(os_write_file); +EXPORT_SYMBOL(os_seek_file); +EXPORT_SYMBOL(os_lock_file); +EXPORT_SYMBOL(os_ioctl_generic); +EXPORT_SYMBOL(os_pipe); +EXPORT_SYMBOL(os_file_type); +EXPORT_SYMBOL(os_file_mode); +EXPORT_SYMBOL(os_file_size); +EXPORT_SYMBOL(os_flush_stdout); +EXPORT_SYMBOL(os_close_file); +EXPORT_SYMBOL(os_set_fd_async); +EXPORT_SYMBOL(os_set_fd_block); +EXPORT_SYMBOL(helper_wait); +EXPORT_SYMBOL(os_shutdown_socket); +EXPORT_SYMBOL(os_create_unix_socket); +EXPORT_SYMBOL(os_connect_socket); +EXPORT_SYMBOL(os_accept_connection); +EXPORT_SYMBOL(os_rcv_fd); +EXPORT_SYMBOL(run_helper); +EXPORT_SYMBOL(os_major); +EXPORT_SYMBOL(os_minor); +EXPORT_SYMBOL(os_makedev); +EXPORT_SYMBOL(os_eventfd); +EXPORT_SYMBOL(os_sendmsg_fds); + +EXPORT_SYMBOL(add_sigio_fd); +EXPORT_SYMBOL(ignore_sigio_fd); +EXPORT_SYMBOL(sigio_broken); + +EXPORT_SYMBOL(syscall); diff --git a/arch/um/kernel/load_file.c b/arch/um/kernel/load_file.c new file mode 100644 index 0000000000..5cecd0e291 --- /dev/null +++ b/arch/um/kernel/load_file.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ +#include <linux/memblock.h> +#include <os.h> + +#include "um_arch.h" + +static int __init __uml_load_file(const char *filename, void *buf, int size) +{ + int fd, n; + + fd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if (fd < 0) { + printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename, + -fd); + return -1; + } + n = os_read_file(fd, buf, size); + if (n != size) { + printk(KERN_ERR "Read of %d bytes from '%s' failed, " + "err = %d\n", size, + filename, -n); + return -1; + } + + os_close_file(fd); + return 0; +} + +void *uml_load_file(const char *filename, unsigned long long *size) +{ + void *area; + int err; + + *size = 0; + + if (!filename) + return NULL; + + err = os_file_size(filename, size); + if (err) + return NULL; + + if (*size == 0) { + printk(KERN_ERR "\"%s\" is empty\n", filename); + return NULL; + } + + area = memblock_alloc(*size, SMP_CACHE_BYTES); + if (!area) + panic("%s: Failed to allocate %llu bytes\n", __func__, *size); + + if (__uml_load_file(filename, area, *size)) { + memblock_free(area, *size); + return NULL; + } + + return area; +} diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c new file mode 100644 index 0000000000..8ccd56813f --- /dev/null +++ b/arch/um/kernel/maccess.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 Richard Weinberger <richrd@nod.at> + */ + +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <os.h> + +bool copy_from_kernel_nofault_allowed(const void *src, size_t size) +{ + void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); + + if ((unsigned long)src < PAGE_SIZE || size <= 0) + return false; + if (os_mincore(psrc, size + src - psrc) <= 0) + return false; + return true; +} diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c new file mode 100644 index 0000000000..38d5a71a57 --- /dev/null +++ b/arch/um/kernel/mem.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/stddef.h> +#include <linux/module.h> +#include <linux/memblock.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <asm/fixmap.h> +#include <asm/page.h> +#include <as-layout.h> +#include <init.h> +#include <kern.h> +#include <kern_util.h> +#include <mem_user.h> +#include <os.h> +#include <linux/sched/task.h> + +#ifdef CONFIG_KASAN +int kasan_um_is_ready; +void kasan_init(void) +{ + /* + * kasan_map_memory will map all of the required address space and + * the host machine will allocate physical memory as necessary. + */ + kasan_map_memory((void *)KASAN_SHADOW_START, KASAN_SHADOW_SIZE); + init_task.kasan_depth = 0; + kasan_um_is_ready = true; +} + +static void (*kasan_init_ptr)(void) +__section(".kasan_init") __used += kasan_init; +#endif + +/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ +unsigned long *empty_zero_page = NULL; +EXPORT_SYMBOL(empty_zero_page); + +/* + * Initialized during boot, and readonly for initializing page tables + * afterwards + */ +pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* Initialized at boot time, and readonly after that */ +unsigned long long highmem; +EXPORT_SYMBOL(highmem); +int kmalloc_ok = 0; + +/* Used during early boot */ +static unsigned long brk_end; + +void __init mem_init(void) +{ + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + /* Map in the area just after the brk now that kmalloc is about + * to be turned on. + */ + brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); + map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); + memblock_free((void *)brk_end, uml_reserved - brk_end); + uml_reserved = brk_end; + + /* this will put all low memory onto the freelists */ + memblock_free_all(); + max_low_pfn = totalram_pages(); + max_pfn = max_low_pfn; + kmalloc_ok = 1; +} + +/* + * Create a page table and place a pointer to it in a middle page + * directory entry. + */ +static void __init one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, + PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + set_pmd(pmd, __pmd(_KERNPG_TABLE + + (unsigned long) __pa(pte))); + BUG_ON(pte != pte_offset_kernel(pmd, 0)); + } +} + +static void __init one_md_table_init(pud_t *pud) +{ +#ifdef CONFIG_3_LEVEL_PGTABLES + pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pmd_table) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table))); + BUG_ON(pmd_table != pmd_offset(pud, 0)); +#endif +} + +static void __init fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = pgd_index(vaddr); + j = pmd_index(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); + if (pud_none(*pud)) + one_md_table_init(pud); + pmd = pmd_offset(pud, vaddr); + for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { + one_page_table_init(pmd); + vaddr += PMD_SIZE; + } + j = 0; + } +} + +static void __init fixaddr_user_init( void) +{ +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA + long size = FIXADDR_USER_END - FIXADDR_USER_START; + pte_t *pte; + phys_t p; + unsigned long v, vaddr = FIXADDR_USER_START; + + if (!size) + return; + + fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); + v = (unsigned long) memblock_alloc_low(size, PAGE_SIZE); + if (!v) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, size, PAGE_SIZE); + + memcpy((void *) v , (void *) FIXADDR_USER_START, size); + p = __pa(v); + for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, + p += PAGE_SIZE) { + pte = virt_to_kpte(vaddr); + pte_set_val(*pte, p, PAGE_READONLY); + } +#endif +} + +void __init paging_init(void) +{ + unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; + unsigned long vaddr; + + empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE, + PAGE_SIZE); + if (!empty_zero_page) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; + free_area_init(max_zone_pfn); + + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); + + fixaddr_user_init(); +} + +/* + * This can't do anything because nothing in the kernel image can be freed + * since it's not in kernel physical memory. + */ + +void free_initmem(void) +{ +} + +/* Allocate and free page tables. */ + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + + if (pgd) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + return pgd; +} + +void *uml_kmalloc(int size, int flags) +{ + return kmalloc(size, flags); +} + +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READONLY, + [VM_WRITE] = PAGE_COPY, + [VM_WRITE | VM_READ] = PAGE_COPY, + [VM_EXEC] = PAGE_READONLY, + [VM_EXEC | VM_READ] = PAGE_READONLY, + [VM_EXEC | VM_WRITE] = PAGE_COPY, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READONLY, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = PAGE_READONLY, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED +}; +DECLARE_VM_GET_PAGE_PROT diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c new file mode 100644 index 0000000000..91485119ae --- /dev/null +++ b/arch/um/kernel/physmem.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/module.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/pfn.h> +#include <asm/page.h> +#include <asm/sections.h> +#include <as-layout.h> +#include <init.h> +#include <kern.h> +#include <mem_user.h> +#include <os.h> + +static int physmem_fd = -1; + +/* Changed during early boot */ +unsigned long high_physmem; +EXPORT_SYMBOL(high_physmem); + +extern unsigned long long physmem_size; + +void __init mem_total_pages(unsigned long physmem, unsigned long iomem, + unsigned long highmem) +{ + unsigned long phys_pages, highmem_pages; + unsigned long iomem_pages, total_pages; + + phys_pages = physmem >> PAGE_SHIFT; + iomem_pages = iomem >> PAGE_SHIFT; + highmem_pages = highmem >> PAGE_SHIFT; + + total_pages = phys_pages + iomem_pages + highmem_pages; + + max_mapnr = total_pages; +} + +void map_memory(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x) +{ + __u64 offset; + int fd, err; + + fd = phys_mapping(phys, &offset); + err = os_map_memory((void *) virt, fd, offset, len, r, w, x); + if (err) { + if (err == -ENOMEM) + printk(KERN_ERR "try increasing the host's " + "/proc/sys/vm/max_map_count to <physical " + "memory size>/4096\n"); + panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " + "err = %d\n", virt, fd, offset, len, r, w, x, err); + } +} + +/** + * setup_physmem() - Setup physical memory for UML + * @start: Start address of the physical kernel memory, + * i.e start address of the executable image. + * @reserve_end: end address of the physical kernel memory. + * @len: Length of total physical memory that should be mapped/made + * available, in bytes. + * @highmem: Number of highmem bytes that should be mapped/made available. + * + * Creates an unlinked temporary file of size (len + highmem) and memory maps + * it on the last executable image address (uml_reserved). + * + * The offset is needed as the length of the total physical memory + * (len + highmem) includes the size of the memory used be the executable image, + * but the mapped-to address is the last address of the executable image + * (uml_reserved == end address of executable image). + * + * The memory mapped memory of the temporary file is used as backing memory + * of all user space processes/kernel tasks. + */ +void __init setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long long highmem) +{ + unsigned long reserve = reserve_end - start; + long map_size = len - reserve; + int err; + + if(map_size <= 0) { + os_warn("Too few physical memory! Needed=%lu, given=%lu\n", + reserve, len); + exit(1); + } + + physmem_fd = create_mem_file(len + highmem); + + err = os_map_memory((void *) reserve_end, physmem_fd, reserve, + map_size, 1, 1, 1); + if (err < 0) { + os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p " + "failed - errno = %d\n", map_size, + (void *) reserve_end, err); + exit(1); + } + + /* + * Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(__syscall_stub_start)); + os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE); + os_fsync_file(physmem_fd); + + memblock_add(__pa(start), len + highmem); + memblock_reserve(__pa(start), reserve); + + min_low_pfn = PFN_UP(__pa(reserve_end)); + max_low_pfn = min_low_pfn + (map_size >> PAGE_SHIFT); +} + +int phys_mapping(unsigned long phys, unsigned long long *offset_out) +{ + int fd = -1; + + if (phys < physmem_size) { + fd = physmem_fd; + *offset_out = phys; + } + else if (phys < __pa(end_iomem)) { + struct iomem_region *region = iomem_regions; + + while (region != NULL) { + if ((phys >= region->phys) && + (phys < region->phys + region->size)) { + fd = region->fd; + *offset_out = phys - region->phys; + break; + } + region = region->next; + } + } + else if (phys < __pa(end_iomem) + highmem) { + fd = physmem_fd; + *offset_out = phys - iomem_size; + } + + return fd; +} +EXPORT_SYMBOL(phys_mapping); + +static int __init uml_mem_setup(char *line, int *add) +{ + char *retptr; + physmem_size = memparse(line,&retptr); + return 0; +} +__uml_setup("mem=", uml_mem_setup, +"mem=<Amount of desired ram>\n" +" This controls how much \"physical\" memory the kernel allocates\n" +" for the system. The size is specified as a number followed by\n" +" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +" This is not related to the amount of memory in the host. It can\n" +" be more, and the excess, if it's ever used, will just be swapped out.\n" +" Example: mem=64M\n\n" +); + +extern int __init parse_iomem(char *str, int *add); + +__uml_setup("iomem=", parse_iomem, +"iomem=<name>,<file>\n" +" Configure <file> as an IO memory region named <name>.\n\n" +); + +/* + * This list is constructed in parse_iomem and addresses filled in + * setup_iomem, both of which run during early boot. Afterwards, it's + * unchanged. + */ +struct iomem_region *iomem_regions; + +/* Initialized in parse_iomem and unchanged thereafter */ +int iomem_size; + +unsigned long find_iomem(char *driver, unsigned long *len_out) +{ + struct iomem_region *region = iomem_regions; + + while (region != NULL) { + if (!strcmp(region->driver, driver)) { + *len_out = region->size; + return region->virt; + } + + region = region->next; + } + + return 0; +} +EXPORT_SYMBOL(find_iomem); + +static int setup_iomem(void) +{ + struct iomem_region *region = iomem_regions; + unsigned long iomem_start = high_physmem + PAGE_SIZE; + int err; + + while (region != NULL) { + err = os_map_memory((void *) iomem_start, region->fd, 0, + region->size, 1, 1, 0); + if (err) + printk(KERN_ERR "Mapping iomem region for driver '%s' " + "failed, errno = %d\n", region->driver, -err); + else { + region->virt = iomem_start; + region->phys = __pa(region->virt); + } + + iomem_start += region->size + PAGE_SIZE; + region = region->next; + } + + return 0; +} + +__initcall(setup_iomem); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c new file mode 100644 index 0000000000..106b7da2f8 --- /dev/null +++ b/arch/um/kernel/process.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright 2003 PathScale, Inc. + */ + +#include <linux/stddef.h> +#include <linux/err.h> +#include <linux/hardirq.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/personality.h> +#include <linux/proc_fs.h> +#include <linux/ptrace.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/seq_file.h> +#include <linux/tick.h> +#include <linux/threads.h> +#include <linux/resume_user_mode.h> +#include <asm/current.h> +#include <asm/mmu_context.h> +#include <linux/uaccess.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <skas.h> +#include <registers.h> +#include <linux/time-internal.h> +#include <linux/elfcore.h> + +/* + * This is a per-cpu array. A processor only modifies its entry and it only + * cares about its entry, so it's OK if another processor is modifying its + * entry. + */ +struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; + +static inline int external_pid(void) +{ + /* FIXME: Need to look up userspace_pid by cpu */ + return userspace_pid[0]; +} + +int pid_to_processor_id(int pid) +{ + int i; + + for (i = 0; i < ncpus; i++) { + if (cpu_tasks[i].pid == pid) + return i; + } + return -1; +} + +void free_stack(unsigned long stack, int order) +{ + free_pages(stack, order); +} + +unsigned long alloc_stack(int order, int atomic) +{ + unsigned long page; + gfp_t flags = GFP_KERNEL; + + if (atomic) + flags = GFP_ATOMIC; + page = __get_free_pages(flags, order); + + return page; +} + +static inline void set_current(struct task_struct *task) +{ + cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) + { external_pid(), task }); +} + +extern void arch_switch_to(struct task_struct *to); + +void *__switch_to(struct task_struct *from, struct task_struct *to) +{ + to->thread.prev_sched = from; + set_current(to); + + switch_threads(&from->thread.switch_buf, &to->thread.switch_buf); + arch_switch_to(current); + + return current->thread.prev_sched; +} + +void interrupt_end(void) +{ + struct pt_regs *regs = ¤t->thread.regs; + + if (need_resched()) + schedule(); + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) + do_signal(regs); + if (test_thread_flag(TIF_NOTIFY_RESUME)) + resume_user_mode_work(regs); +} + +int get_current_pid(void) +{ + return task_pid_nr(current); +} + +/* + * This is called magically, by its address being stuffed in a jmp_buf + * and being longjmp-d to. + */ +void new_thread_handler(void) +{ + int (*fn)(void *), n; + void *arg; + + if (current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + + fn = current->thread.request.u.thread.proc; + arg = current->thread.request.u.thread.arg; + + /* + * callback returns only if the kernel thread execs a process + */ + n = fn(arg); + userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); +} + +/* Called magically, see new_thread_handler above */ +void fork_handler(void) +{ + force_flush_all(); + + schedule_tail(current->thread.prev_sched); + + /* + * XXX: if interrupt_end() calls schedule, this call to + * arch_switch_to isn't needed. We could want to apply this to + * improve performance. -bb + */ + arch_switch_to(current); + + current->thread.prev_sched = NULL; + + userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); +} + +int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) +{ + unsigned long clone_flags = args->flags; + unsigned long sp = args->stack; + unsigned long tls = args->tls; + void (*handler)(void); + int ret = 0; + + p->thread = (struct thread_struct) INIT_THREAD; + + if (!args->fn) { + memcpy(&p->thread.regs.regs, current_pt_regs(), + sizeof(p->thread.regs.regs)); + PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); + if (sp != 0) + REGS_SP(p->thread.regs.regs.gp) = sp; + + handler = fork_handler; + + arch_copy_thread(¤t->thread.arch, &p->thread.arch); + } else { + get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); + p->thread.request.u.thread.proc = args->fn; + p->thread.request.u.thread.arg = args->fn_arg; + handler = new_thread_handler; + } + + new_thread(task_stack_page(p), &p->thread.switch_buf, handler); + + if (!args->fn) { + clear_flushed_tls(p); + + /* + * Set a new TLS for the child thread? + */ + if (clone_flags & CLONE_SETTLS) + ret = arch_set_tls(p, tls); + } + + return ret; +} + +void initial_thread_cb(void (*proc)(void *), void *arg) +{ + int save_kmalloc_ok = kmalloc_ok; + + kmalloc_ok = 0; + initial_thread_cb_skas(proc, arg); + kmalloc_ok = save_kmalloc_ok; +} + +void um_idle_sleep(void) +{ + if (time_travel_mode != TT_MODE_OFF) + time_travel_sleep(); + else + os_idle_sleep(); +} + +void arch_cpu_idle(void) +{ + cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); + um_idle_sleep(); +} + +int __cant_sleep(void) { + return in_atomic() || irqs_disabled() || in_interrupt(); + /* Is in_interrupt() really needed? */ +} + +int user_context(unsigned long sp) +{ + unsigned long stack; + + stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); + return stack != (unsigned long) current_thread_info(); +} + +extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; + +void do_uml_exitcalls(void) +{ + exitcall_t *call; + + call = &__uml_exitcall_end; + while (--call >= &__uml_exitcall_begin) + (*call)(); +} + +char *uml_strdup(const char *string) +{ + return kstrdup(string, GFP_KERNEL); +} +EXPORT_SYMBOL(uml_strdup); + +int copy_to_user_proc(void __user *to, void *from, int size) +{ + return copy_to_user(to, from, size); +} + +int copy_from_user_proc(void *to, void __user *from, int size) +{ + return copy_from_user(to, from, size); +} + +int clear_user_proc(void __user *buf, int size) +{ + return clear_user(buf, size); +} + +static atomic_t using_sysemu = ATOMIC_INIT(0); +int sysemu_supported; + +void set_using_sysemu(int value) +{ + if (value > sysemu_supported) + return; + atomic_set(&using_sysemu, value); +} + +int get_using_sysemu(void) +{ + return atomic_read(&using_sysemu); +} + +static int sysemu_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", get_using_sysemu()); + return 0; +} + +static int sysemu_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, sysemu_proc_show, NULL); +} + +static ssize_t sysemu_proc_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + char tmp[2]; + + if (copy_from_user(tmp, buf, 1)) + return -EFAULT; + + if (tmp[0] >= '0' && tmp[0] <= '2') + set_using_sysemu(tmp[0] - '0'); + /* We use the first char, but pretend to write everything */ + return count; +} + +static const struct proc_ops sysemu_proc_ops = { + .proc_open = sysemu_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = sysemu_proc_write, +}; + +int __init make_proc_sysemu(void) +{ + struct proc_dir_entry *ent; + if (!sysemu_supported) + return 0; + + ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops); + + if (ent == NULL) + { + printk(KERN_WARNING "Failed to register /proc/sysemu\n"); + return 0; + } + + return 0; +} + +late_initcall(make_proc_sysemu); + +int singlestepping(void * t) +{ + struct task_struct *task = t ? t : current; + + if (!test_thread_flag(TIF_SINGLESTEP)) + return 0; + + if (task->thread.singlestep_syscall) + return 1; + + return 2; +} + +/* + * Only x86 and x86_64 have an arch_align_stack(). + * All other arches have "#define arch_align_stack(x) (x)" + * in their asm/exec.h + * As this is included in UML from asm-um/system-generic.h, + * we can use it to behave as the subarch does. + */ +#ifndef arch_align_stack +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_u32_below(8192); + return sp & ~0xf; +} +#endif + +unsigned long __get_wchan(struct task_struct *p) +{ + unsigned long stack_page, sp, ip; + bool seen_sched = 0; + + stack_page = (unsigned long) task_stack_page(p); + /* Bail if the process has no kernel stack for some reason */ + if (stack_page == 0) + return 0; + + sp = p->thread.switch_buf->JB_SP; + /* + * Bail if the stack pointer is below the bottom of the kernel + * stack for some reason + */ + if (sp < stack_page) + return 0; + + while (sp < stack_page + THREAD_SIZE) { + ip = *((unsigned long *) sp); + if (in_sched_functions(ip)) + /* Ignore everything until we're above the scheduler */ + seen_sched = 1; + else if (kernel_text_address(ip) && seen_sched) + return ip; + + sp += sizeof(unsigned long); + } + + return 0; +} + +int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu) +{ + int cpu = current_thread_info()->cpu; + + return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); +} + diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c new file mode 100644 index 0000000000..5154b27de5 --- /dev/null +++ b/arch/um/kernel/ptrace.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/audit.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/uaccess.h> +#include <asm/ptrace-abi.h> + +void user_enable_single_step(struct task_struct *child) +{ + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child->thread.singlestep_syscall = 0; + +#ifdef SUBARCH_SET_SINGLESTEPPING + SUBARCH_SET_SINGLESTEPPING(child, 1); +#endif +} + +void user_disable_single_step(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + child->thread.singlestep_syscall = 0; + +#ifdef SUBARCH_SET_SINGLESTEPPING + SUBARCH_SET_SINGLESTEPPING(child, 0); +#endif +} + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ + user_disable_single_step(child); +} + +extern int peek_user(struct task_struct * child, long addr, long data); +extern int poke_user(struct task_struct * child, long addr, long data); + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int i, ret; + unsigned long __user *p = (void __user *)data; + void __user *vp = p; + + switch (request) { + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: + ret = peek_user(child, addr, data); + break; + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: + ret = poke_user(child, addr, data); + break; + + case PTRACE_SYSEMU: + case PTRACE_SYSEMU_SINGLESTEP: + ret = -EIO; + break; + +#ifdef PTRACE_GETREGS + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(p, MAX_REG_OFFSET)) { + ret = -EIO; + break; + } + for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) { + __put_user(getreg(child, i), p); + p++; + } + ret = 0; + break; + } +#endif +#ifdef PTRACE_SETREGS + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp = 0; + if (!access_ok(p, MAX_REG_OFFSET)) { + ret = -EIO; + break; + } + for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) { + __get_user(tmp, p); + putreg(child, i, tmp); + p++; + } + ret = 0; + break; + } +#endif + case PTRACE_GET_THREAD_AREA: + ret = ptrace_get_thread_area(child, addr, vp); + break; + + case PTRACE_SET_THREAD_AREA: + ret = ptrace_set_thread_area(child, addr, vp); + break; + + default: + ret = ptrace_request(child, request, addr, data); + if (ret == -EIO) + ret = subarch_ptrace(child, request, addr, data); + break; + } + + return ret; +} + +static void send_sigtrap(struct uml_pt_regs *regs, int error_code) +{ + /* Send us the fake SIGTRAP */ + force_sig_fault(SIGTRAP, TRAP_BRKPT, + /* User-mode eip? */ + UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL); +} + +/* + * XXX Check TIF_SINGLESTEP for singlestepping check and + * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check + */ +int syscall_trace_enter(struct pt_regs *regs) +{ + audit_syscall_entry(UPT_SYSCALL_NR(®s->regs), + UPT_SYSCALL_ARG1(®s->regs), + UPT_SYSCALL_ARG2(®s->regs), + UPT_SYSCALL_ARG3(®s->regs), + UPT_SYSCALL_ARG4(®s->regs)); + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return 0; + + return ptrace_report_syscall_entry(regs); +} + +void syscall_trace_leave(struct pt_regs *regs) +{ + int ptraced = current->ptrace; + + audit_syscall_exit(regs); + + /* Fake a debug trap */ + if (test_thread_flag(TIF_SINGLESTEP)) + send_sigtrap(®s->regs, 0); + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + + ptrace_report_syscall_exit(regs, 0); + /* force do_signal() --> is_syscall() */ + if (ptraced & PT_PTRACED) + set_thread_flag(TIF_SIGPENDING); +} diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c new file mode 100644 index 0000000000..48c0610d50 --- /dev/null +++ b/arch/um/kernel/reboot.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/sched/signal.h> +#include <linux/sched/task.h> +#include <linux/sched/mm.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/oom.h> +#include <kern_util.h> +#include <os.h> +#include <skas.h> + +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +static void kill_off_processes(void) +{ + struct task_struct *p; + int pid; + + read_lock(&tasklist_lock); + for_each_process(p) { + struct task_struct *t; + + t = find_lock_task_mm(p); + if (!t) + continue; + pid = t->mm->context.id.u.pid; + task_unlock(t); + os_kill_ptraced_process(pid, 1); + } + read_unlock(&tasklist_lock); +} + +void uml_cleanup(void) +{ + kmalloc_ok = 0; + do_uml_exitcalls(); + kill_off_processes(); +} + +void machine_restart(char * __unused) +{ + uml_cleanup(); + reboot_skas(); +} + +void machine_power_off(void) +{ + uml_cleanup(); + halt_skas(); +} + +void machine_halt(void) +{ + machine_power_off(); +} diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c new file mode 100644 index 0000000000..5085a50c3b --- /dev/null +++ b/arch/um/kernel/sigio.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + */ + +#include <linux/interrupt.h> +#include <irq_kern.h> +#include <os.h> +#include <sigio.h> + +/* Protected by sigio_lock() called from write_sigio_workaround */ +static int sigio_irq_fd = -1; + +static irqreturn_t sigio_interrupt(int irq, void *data) +{ + char c; + + os_read_file(sigio_irq_fd, &c, sizeof(c)); + return IRQ_HANDLED; +} + +int write_sigio_irq(int fd) +{ + int err; + + err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, + 0, "write sigio", NULL); + if (err < 0) { + printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " + "err = %d\n", err); + return -1; + } + sigio_irq_fd = fd; + return 0; +} + +/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ +static DEFINE_MUTEX(sigio_mutex); + +void sigio_lock(void) +{ + mutex_lock(&sigio_mutex); +} + +void sigio_unlock(void) +{ + mutex_unlock(&sigio_mutex); +} diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c new file mode 100644 index 0000000000..ae4658f576 --- /dev/null +++ b/arch/um/kernel/signal.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/ftrace.h> +#include <asm/siginfo.h> +#include <asm/signal.h> +#include <asm/unistd.h> +#include <frame_kern.h> +#include <kern_util.h> +#include <os.h> + +EXPORT_SYMBOL(block_signals); +EXPORT_SYMBOL(unblock_signals); + +void block_signals_trace(void) +{ + block_signals(); + if (current_thread_info()) + trace_hardirqs_off(); +} + +void unblock_signals_trace(void) +{ + if (current_thread_info()) + trace_hardirqs_on(); + unblock_signals(); +} + +void um_trace_signals_on(void) +{ + if (current_thread_info()) + trace_hardirqs_on(); +} + +void um_trace_signals_off(void) +{ + if (current_thread_info()) + trace_hardirqs_off(); +} + +/* + * OK, we're invoking a handler + */ +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) +{ + sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; + unsigned long sp; + int err; + + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + + /* Did we come from a system call? */ + if (PT_REGS_SYSCALL_NR(regs) >= 0) { + /* If so, check system call restarting.. */ + switch (PT_REGS_SYSCALL_RET(regs)) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + PT_REGS_SYSCALL_RET(regs) = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { + PT_REGS_SYSCALL_RET(regs) = -EINTR; + break; + } + fallthrough; + case -ERESTARTNOINTR: + PT_REGS_RESTART_SYSCALL(regs); + PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); + break; + } + } + + sp = PT_REGS_SP(regs); + if ((ksig->ka.sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) + sp = current->sas_ss_sp + current->sas_ss_size; + +#ifdef CONFIG_ARCH_HAS_SC_SIGNALS + if (!(ksig->ka.sa.sa_flags & SA_SIGINFO)) + err = setup_signal_stack_sc(sp, ksig, regs, oldset); + else +#endif + err = setup_signal_stack_si(sp, ksig, regs, oldset); + + signal_setup_done(err, ksig, singlestep); +} + +void do_signal(struct pt_regs *regs) +{ + struct ksignal ksig; + int handled_sig = 0; + + while (get_signal(&ksig)) { + handled_sig = 1; + /* Whee! Actually deliver the signal. */ + handle_signal(&ksig, regs); + } + + /* Did we come from a system call? */ + if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) { + /* Restart the system call - no handlers present */ + switch (PT_REGS_SYSCALL_RET(regs)) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); + PT_REGS_RESTART_SYSCALL(regs); + break; + case -ERESTART_RESTARTBLOCK: + PT_REGS_ORIG_SYSCALL(regs) = __NR_restart_syscall; + PT_REGS_RESTART_SYSCALL(regs); + break; + } + } + + /* + * This closes a way to execute a system call on the host. If + * you set a breakpoint on a system call instruction and singlestep + * from it, the tracing thread used to PTRACE_SINGLESTEP the process + * rather than PTRACE_SYSCALL it, allowing the system call to execute + * on the host. The tracing thread will check this flag and + * PTRACE_SYSCALL if necessary. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + current->thread.singlestep_syscall = + is_syscall(PT_REGS_IP(¤t->thread.regs)); + + /* + * if there's no signal to deliver, we just put the saved sigmask + * back + */ + if (!handled_sig) + restore_saved_sigmask(); +} diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile new file mode 100644 index 0000000000..f93972a257 --- /dev/null +++ b/arch/um/kernel/skas/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# + +obj-y := clone.o mmu.o process.o syscall.o uaccess.o + +# clone.o is in the stub, so it can't be built with profiling +# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> +# disable it + +CFLAGS_clone.o := $(CFLAGS_NO_HARDENING) +UNPROFILE_OBJS := clone.o + +KCOV_INSTRUMENT := n + +include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c new file mode 100644 index 0000000000..62435187dd --- /dev/null +++ b/arch/um/kernel/skas/clone.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <signal.h> +#include <sched.h> +#include <asm/unistd.h> +#include <sys/time.h> +#include <as-layout.h> +#include <ptrace_user.h> +#include <stub-data.h> +#include <sysdep/stub.h> + +/* + * This is in a separate file because it needs to be compiled with any + * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled + * + * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize + * on some systems. + */ + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_clone_handler(void) +{ + struct stub_data *data = get_stub_data(); + long err; + + err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, + (unsigned long)data + + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2); + if (err) { + data->parent_err = err; + goto done; + } + + err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + if (err) { + data->child_err = err; + goto done; + } + + remap_stack_and_trap(); + + done: + trap_myself(); +} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c new file mode 100644 index 0000000000..656fe16c9b --- /dev/null +++ b/arch/um/kernel/skas/mmu.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/mm.h> +#include <linux/sched/signal.h> +#include <linux/slab.h> + +#include <asm/pgalloc.h> +#include <asm/sections.h> +#include <as-layout.h> +#include <os.h> +#include <skas.h> + +int init_new_context(struct task_struct *task, struct mm_struct *mm) +{ + struct mm_context *from_mm = NULL; + struct mm_context *to_mm = &mm->context; + unsigned long stack = 0; + int ret = -ENOMEM; + + stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES)); + if (stack == 0) + goto out; + + to_mm->id.stack = stack; + if (current->mm != NULL && current->mm != &init_mm) + from_mm = ¤t->mm->context; + + block_signals_trace(); + if (from_mm) + to_mm->id.u.pid = copy_context_skas0(stack, + from_mm->id.u.pid); + else to_mm->id.u.pid = start_userspace(stack); + unblock_signals_trace(); + + if (to_mm->id.u.pid < 0) { + ret = to_mm->id.u.pid; + goto out_free; + } + + ret = init_new_ldt(to_mm, from_mm); + if (ret < 0) { + printk(KERN_ERR "init_new_context_skas - init_ldt" + " failed, errno = %d\n", ret); + goto out_free; + } + + return 0; + + out_free: + if (to_mm->id.stack != 0) + free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES)); + out: + return ret; +} + +void destroy_context(struct mm_struct *mm) +{ + struct mm_context *mmu = &mm->context; + + /* + * If init_new_context wasn't called, this will be + * zero, resulting in a kill(0), which will result in the + * whole UML suddenly dying. Also, cover negative and + * 1 cases, since they shouldn't happen either. + */ + if (mmu->id.u.pid < 2) { + printk(KERN_ERR "corrupt mm_context - pid = %d\n", + mmu->id.u.pid); + return; + } + os_kill_ptraced_process(mmu->id.u.pid, 1); + + free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); + free_ldt(mmu); +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c new file mode 100644 index 0000000000..f2ac134c97 --- /dev/null +++ b/arch/um/kernel/skas/process.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/init.h> +#include <linux/sched/mm.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/task.h> + +#include <as-layout.h> +#include <kern.h> +#include <os.h> +#include <skas.h> + +extern void start_kernel(void); + +static int __init start_kernel_proc(void *unused) +{ + int pid; + + block_signals_trace(); + pid = os_getpid(); + + cpu_tasks[0].pid = pid; + cpu_tasks[0].task = current; + + start_kernel(); + return 0; +} + +extern int userspace_pid[]; + +extern char cpu0_irqstack[]; + +int __init start_uml(void) +{ + stack_protections((unsigned long) &cpu0_irqstack); + set_sigstack(cpu0_irqstack, THREAD_SIZE); + + init_new_thread_signals(); + + init_task.thread.request.u.thread.proc = start_kernel_proc; + init_task.thread.request.u.thread.arg = NULL; + return start_idle_thread(task_stack_page(&init_task), + &init_task.thread.switch_buf); +} + +unsigned long current_stub_stack(void) +{ + if (current->mm == NULL) + return 0; + + return current->mm->context.id.stack; +} diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c new file mode 100644 index 0000000000..9ee19e566d --- /dev/null +++ b/arch/um/kernel/skas/syscall.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <kern_util.h> +#include <sysdep/ptrace.h> +#include <sysdep/ptrace_user.h> +#include <sysdep/syscalls.h> +#include <linux/time-internal.h> +#include <asm/unistd.h> + +void handle_syscall(struct uml_pt_regs *r) +{ + struct pt_regs *regs = container_of(r, struct pt_regs, regs); + int syscall; + + /* + * If we have infinite CPU resources, then make every syscall also a + * preemption point, since we don't have any other preemption in this + * case, and kernel threads would basically never run until userspace + * went to sleep, even if said userspace interacts with the kernel in + * various ways. + */ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) + schedule(); + + /* Initialize the syscall number and default return value. */ + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); + PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); + + if (syscall_trace_enter(regs)) + goto out; + + /* Do the seccomp check after ptrace; failures should be fast. */ + if (secure_computing() == -1) + goto out; + + syscall = UPT_SYSCALL_NR(r); + if (syscall >= 0 && syscall < __NR_syscalls) + PT_REGS_SET_SYSCALL_RETURN(regs, + EXECUTE_SYSCALL(syscall, regs)); + +out: + syscall_trace_leave(regs); +} diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c new file mode 100644 index 0000000000..aaee96f071 --- /dev/null +++ b/arch/um/kernel/skas/uaccess.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/page.h> +#include <kern_util.h> +#include <asm/futex.h> +#include <os.h> + +pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + if (mm == NULL) + return NULL; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + return pte_offset_kernel(pmd, addr); +} + +static pte_t *maybe_map(unsigned long virt, int is_write) +{ + pte_t *pte = virt_to_pte(current->mm, virt); + int err, dummy_code; + + if ((pte == NULL) || !pte_present(*pte) || + (is_write && !pte_write(*pte))) { + err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); + if (err) + return NULL; + pte = virt_to_pte(current->mm, virt); + } + if (!pte_present(*pte)) + pte = NULL; + + return pte; +} + +static int do_op_one_page(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), void *arg) +{ + struct page *page; + pte_t *pte; + int n; + + pte = maybe_map(addr, is_write); + if (pte == NULL) + return -1; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + addr = (unsigned long) page_address(page) + + (addr & ~PAGE_MASK); +#else + addr = (unsigned long) kmap_atomic(page) + + (addr & ~PAGE_MASK); +#endif + n = (*op)(addr, len, arg); + +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic((void *)addr); +#endif + + return n; +} + +static long buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long, int, void *), void *arg) +{ + long size, remain, n; + + size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); + remain = len; + + n = do_op_one_page(addr, size, is_write, op, arg); + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } + + addr += size; + remain -= size; + if (remain == 0) + goto out; + + while (addr < ((addr + remain) & PAGE_MASK)) { + n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg); + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } + + addr += PAGE_SIZE; + remain -= PAGE_SIZE; + } + if (remain == 0) + goto out; + + n = do_op_one_page(addr, remain, is_write, op, arg); + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } + + return 0; + out: + return remain; +} + +static int copy_chunk_from_user(unsigned long from, int len, void *arg) +{ + unsigned long *to_ptr = arg, to = *to_ptr; + + memcpy((void *) to, (void *) from, len); + *to_ptr += len; + return 0; +} + +unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to); +} +EXPORT_SYMBOL(raw_copy_from_user); + +static int copy_chunk_to_user(unsigned long to, int len, void *arg) +{ + unsigned long *from_ptr = arg, from = *from_ptr; + + memcpy((void *) to, (void *) from, len); + *from_ptr += len; + return 0; +} + +unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from); +} +EXPORT_SYMBOL(raw_copy_to_user); + +static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) +{ + char **to_ptr = arg, *to = *to_ptr; + int n; + + strncpy(to, (void *) from, len); + n = strnlen(to, len); + *to_ptr += n; + + if (n < len) + return 1; + return 0; +} + +long strncpy_from_user(char *dst, const char __user *src, long count) +{ + long n; + char *ptr = dst; + + if (!access_ok(src, 1)) + return -EFAULT; + n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, + &ptr); + if (n != 0) + return -EFAULT; + return strnlen(dst, count); +} +EXPORT_SYMBOL(strncpy_from_user); + +static int clear_chunk(unsigned long addr, int len, void *unused) +{ + memset((void *) addr, 0, len); + return 0; +} + +unsigned long __clear_user(void __user *mem, unsigned long len) +{ + return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL); +} +EXPORT_SYMBOL(__clear_user); + +static int strnlen_chunk(unsigned long str, int len, void *arg) +{ + int *len_ptr = arg, n; + + n = strnlen((void *) str, len); + *len_ptr += n; + + if (n < len) + return 1; + return 0; +} + +long strnlen_user(const char __user *str, long len) +{ + int count = 0, n; + + if (!access_ok(str, 1)) + return -EFAULT; + n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); + if (n == 0) + return count + 1; + return 0; +} +EXPORT_SYMBOL(strnlen_user); + +/** + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant + * argument and comparison of the previous + * futex value with another constant. + * + * @encoded_op: encoded operation to execute + * @uaddr: pointer to user space address + * + * Return: + * 0 - On success + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Operation not supported + */ + +int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) +{ + int oldval, ret; + struct page *page; + unsigned long addr = (unsigned long) uaddr; + pte_t *pte; + + ret = -EFAULT; + if (!access_ok(uaddr, sizeof(*uaddr))) + return -EFAULT; + preempt_disable(); + pte = maybe_map(addr, 1); + if (pte == NULL) + goto out_inuser; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + addr = (unsigned long) page_address(page) + + (((unsigned long) addr) & ~PAGE_MASK); +#else + addr = (unsigned long) kmap_atomic(page) + + ((unsigned long) addr & ~PAGE_MASK); +#endif + uaddr = (u32 *) addr; + oldval = *uaddr; + + ret = 0; + + switch (op) { + case FUTEX_OP_SET: + *uaddr = oparg; + break; + case FUTEX_OP_ADD: + *uaddr += oparg; + break; + case FUTEX_OP_OR: + *uaddr |= oparg; + break; + case FUTEX_OP_ANDN: + *uaddr &= ~oparg; + break; + case FUTEX_OP_XOR: + *uaddr ^= oparg; + break; + default: + ret = -ENOSYS; + } +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic((void *)addr); +#endif + +out_inuser: + preempt_enable(); + + if (ret == 0) + *oval = oldval; + + return ret; +} +EXPORT_SYMBOL(arch_futex_atomic_op_inuser); + +/** + * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the + * uaddr with newval if the current value is + * oldval. + * @uval: pointer to store content of @uaddr + * @uaddr: pointer to user space address + * @oldval: old value + * @newval: new value to store to @uaddr + * + * Return: + * 0 - On success + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + */ + +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + struct page *page; + pte_t *pte; + int ret = -EFAULT; + + if (!access_ok(uaddr, sizeof(*uaddr))) + return -EFAULT; + + preempt_disable(); + pte = maybe_map((unsigned long) uaddr, 1); + if (pte == NULL) + goto out_inatomic; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK); +#else + uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK); +#endif + + *uval = *uaddr; + + ret = cmpxchg(uaddr, oldval, newval); + +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic(uaddr); +#endif + ret = 0; + +out_inatomic: + preempt_enable(); + return ret; +} +EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic); diff --git a/arch/um/kernel/stacktrace.c b/arch/um/kernel/stacktrace.c new file mode 100644 index 0000000000..fd3b61b3d4 --- /dev/null +++ b/arch/um/kernel/stacktrace.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2013 Richard Weinberger <richard@nod.at> + * Copyright (C) 2014 Google Inc., Author: Daniel Walter <dwalter@google.com> + */ + +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <asm/stacktrace.h> + +void dump_trace(struct task_struct *tsk, + const struct stacktrace_ops *ops, + void *data) +{ + int reliable = 0; + unsigned long *sp, bp, addr; + struct pt_regs *segv_regs = tsk->thread.segv_regs; + struct stack_frame *frame; + + bp = get_frame_pointer(tsk, segv_regs); + sp = get_stack_pointer(tsk, segv_regs); + + frame = (struct stack_frame *)bp; + while (((long) sp & (THREAD_SIZE-1)) != 0) { + addr = READ_ONCE_NOCHECK(*sp); + if (__kernel_text_address(addr)) { + reliable = 0; + if ((unsigned long) sp == bp + sizeof(long)) { + frame = frame ? frame->next_frame : NULL; + bp = (unsigned long)frame; + reliable = 1; + } + ops->address(data, addr, reliable); + } + sp++; + } +} + +static void save_addr(void *data, unsigned long address, int reliable) +{ + struct stack_trace *trace = data; + + if (!reliable) + return; + if (trace->nr_entries >= trace->max_entries) + return; + + trace->entries[trace->nr_entries++] = address; +} + +static const struct stacktrace_ops dump_ops = { + .address = save_addr +}; + +static void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace) +{ + dump_trace(tsk, &dump_ops, trace); +} + +void save_stack_trace(struct stack_trace *trace) +{ + __save_stack_trace(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c new file mode 100644 index 0000000000..746715379f --- /dev/null +++ b/arch/um/kernel/sysrq.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2013 Richard Weinberger <richrd@nod.at> + */ + +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> + +#include <asm/sysrq.h> +#include <asm/stacktrace.h> +#include <os.h> + +static void _print_addr(void *data, unsigned long address, int reliable) +{ + const char *loglvl = data; + + printk("%s [<%08lx>] %s%pS\n", loglvl, address, reliable ? "" : "? ", + (void *)address); +} + +static const struct stacktrace_ops stackops = { + .address = _print_addr +}; + +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) +{ + struct pt_regs *segv_regs = current->thread.segv_regs; + int i; + + if (!segv_regs && os_is_signal_stack()) { + pr_err("Received SIGSEGV in SIGSEGV handler," + " aborting stack trace!\n"); + return; + } + + if (!stack) + stack = get_stack_pointer(task, segv_regs); + + printk("%sStack:\n", loglvl); + for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) { + if (kstack_end(stack)) + break; + if (i && ((i % STACKSLOTS_PER_LINE) == 0)) + pr_cont("\n"); + pr_cont(" %08lx", READ_ONCE_NOCHECK(*stack)); + stack++; + } + + printk("%sCall Trace:\n", loglvl); + dump_trace(current, &stackops, (void *)loglvl); +} diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c new file mode 100644 index 0000000000..fddd1dec27 --- /dev/null +++ b/arch/um/kernel/time.c @@ -0,0 +1,860 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2012-2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2019 Intel Corporation + */ + +#include <linux/clockchips.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/jiffies.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <asm/irq.h> +#include <asm/param.h> +#include <kern_util.h> +#include <os.h> +#include <linux/time-internal.h> +#include <linux/um_timetravel.h> +#include <shared/init.h> + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +enum time_travel_mode time_travel_mode; +EXPORT_SYMBOL_GPL(time_travel_mode); + +static bool time_travel_start_set; +static unsigned long long time_travel_start; +static unsigned long long time_travel_time; +static LIST_HEAD(time_travel_events); +static LIST_HEAD(time_travel_irqs); +static unsigned long long time_travel_timer_interval; +static unsigned long long time_travel_next_event; +static struct time_travel_event time_travel_timer_event; +static int time_travel_ext_fd = -1; +static unsigned int time_travel_ext_waiting; +static bool time_travel_ext_prev_request_valid; +static unsigned long long time_travel_ext_prev_request; +static bool time_travel_ext_free_until_valid; +static unsigned long long time_travel_ext_free_until; + +static void time_travel_set_time(unsigned long long ns) +{ + if (unlikely(ns < time_travel_time)) + panic("time-travel: time goes backwards %lld -> %lld\n", + time_travel_time, ns); + else if (unlikely(ns >= S64_MAX)) + panic("The system was going to sleep forever, aborting"); + + time_travel_time = ns; +} + +enum time_travel_message_handling { + TTMH_IDLE, + TTMH_POLL, + TTMH_READ, +}; + +static void time_travel_handle_message(struct um_timetravel_msg *msg, + enum time_travel_message_handling mode) +{ + struct um_timetravel_msg resp = { + .op = UM_TIMETRAVEL_ACK, + }; + int ret; + + /* + * We can't unlock here, but interrupt signals with a timetravel_handler + * (see um_request_irq_tt) get to the timetravel_handler anyway. + */ + if (mode != TTMH_READ) { + BUG_ON(mode == TTMH_IDLE && !irqs_disabled()); + + while (os_poll(1, &time_travel_ext_fd) != 0) { + /* nothing */ + } + } + + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + + if (ret == 0) + panic("time-travel external link is broken\n"); + if (ret != sizeof(*msg)) + panic("invalid time-travel message - %d bytes\n", ret); + + switch (msg->op) { + default: + WARN_ONCE(1, "time-travel: unexpected message %lld\n", + (unsigned long long)msg->op); + break; + case UM_TIMETRAVEL_ACK: + return; + case UM_TIMETRAVEL_RUN: + time_travel_set_time(msg->time); + break; + case UM_TIMETRAVEL_FREE_UNTIL: + time_travel_ext_free_until_valid = true; + time_travel_ext_free_until = msg->time; + break; + } + + resp.seq = msg->seq; + os_write_file(time_travel_ext_fd, &resp, sizeof(resp)); +} + +static u64 time_travel_ext_req(u32 op, u64 time) +{ + static int seq; + int mseq = ++seq; + struct um_timetravel_msg msg = { + .op = op, + .time = time, + .seq = mseq, + }; + + /* + * We need to block even the timetravel handlers of SIGIO here and + * only restore their use when we got the ACK - otherwise we may + * (will) get interrupted by that, try to queue the IRQ for future + * processing and thus send another request while we're still waiting + * for an ACK, but the peer doesn't know we got interrupted and will + * send the ACKs in the same order as the message, but we'd need to + * see them in the opposite order ... + * + * This wouldn't matter *too* much, but some ACKs carry the + * current time (for UM_TIMETRAVEL_GET) and getting another + * ACK without a time would confuse us a lot! + * + * The sequence number assignment that happens here lets us + * debug such message handling issues more easily. + */ + block_signals_hard(); + os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); + + while (msg.op != UM_TIMETRAVEL_ACK) + time_travel_handle_message(&msg, TTMH_READ); + + if (msg.seq != mseq) + panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", + msg.op, msg.seq, mseq, msg.time); + + if (op == UM_TIMETRAVEL_GET) + time_travel_set_time(msg.time); + unblock_signals_hard(); + + return msg.time; +} + +void __time_travel_wait_readable(int fd) +{ + int fds[2] = { fd, time_travel_ext_fd }; + int ret; + + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + while ((ret = os_poll(2, fds))) { + struct um_timetravel_msg msg; + + if (ret == 1) + time_travel_handle_message(&msg, TTMH_READ); + } +} +EXPORT_SYMBOL_GPL(__time_travel_wait_readable); + +static void time_travel_ext_update_request(unsigned long long time) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + /* asked for exactly this time previously */ + if (time_travel_ext_prev_request_valid && + time == time_travel_ext_prev_request) + return; + + /* + * if we're running and are allowed to run past the request + * then we don't need to update it either + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && + time < time_travel_ext_free_until) + return; + + time_travel_ext_prev_request = time; + time_travel_ext_prev_request_valid = true; + time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); +} + +void __time_travel_propagate_time(void) +{ + static unsigned long long last_propagated; + + if (last_propagated == time_travel_time) + return; + + time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); + last_propagated = time_travel_time; +} +EXPORT_SYMBOL_GPL(__time_travel_propagate_time); + +/* returns true if we must do a wait to the simtime device */ +static bool time_travel_ext_request(unsigned long long time) +{ + /* + * If we received an external sync point ("free until") then we + * don't have to request/wait for anything until then, unless + * we're already waiting. + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && + time < time_travel_ext_free_until) + return false; + + time_travel_ext_update_request(time); + return true; +} + +static void time_travel_ext_wait(bool idle) +{ + struct um_timetravel_msg msg = { + .op = UM_TIMETRAVEL_ACK, + }; + + time_travel_ext_prev_request_valid = false; + time_travel_ext_free_until_valid = false; + time_travel_ext_waiting++; + + time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); + + /* + * Here we are deep in the idle loop, so we have to break out of the + * kernel abstraction in a sense and implement this in terms of the + * UML system waiting on the VQ interrupt while sleeping, when we get + * the signal it'll call time_travel_ext_vq_notify_done() completing the + * call. + */ + while (msg.op != UM_TIMETRAVEL_RUN) + time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL); + + time_travel_ext_waiting--; + + /* we might request more stuff while polling - reset when we run */ + time_travel_ext_prev_request_valid = false; +} + +static void time_travel_ext_get_time(void) +{ + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); +} + +static void __time_travel_update_time(unsigned long long ns, bool idle) +{ + if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns)) + time_travel_ext_wait(idle); + else + time_travel_set_time(ns); +} + +static struct time_travel_event *time_travel_first_event(void) +{ + return list_first_entry_or_null(&time_travel_events, + struct time_travel_event, + list); +} + +static void __time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + struct time_travel_event *tmp; + bool inserted = false; + unsigned long flags; + + if (e->pending) + return; + + e->pending = true; + e->time = time; + + local_irq_save(flags); + list_for_each_entry(tmp, &time_travel_events, list) { + /* + * Add the new entry before one with higher time, + * or if they're equal and both on stack, because + * in that case we need to unwind the stack in the + * right order, and the later event (timer sleep + * or such) must be dequeued first. + */ + if ((tmp->time > e->time) || + (tmp->time == e->time && tmp->onstack && e->onstack)) { + list_add_tail(&e->list, &tmp->list); + inserted = true; + break; + } + } + + if (!inserted) + list_add_tail(&e->list, &time_travel_events); + + tmp = time_travel_first_event(); + time_travel_ext_update_request(tmp->time); + time_travel_next_event = tmp->time; + local_irq_restore(flags); +} + +static void time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + if (WARN_ON(!e->fn)) + return; + + __time_travel_add_event(e, time); +} + +void time_travel_add_event_rel(struct time_travel_event *e, + unsigned long long delay_ns) +{ + time_travel_add_event(e, time_travel_time + delay_ns); +} + +void time_travel_periodic_timer(struct time_travel_event *e) +{ + time_travel_add_event(&time_travel_timer_event, + time_travel_time + time_travel_timer_interval); + deliver_alarm(); +} + +void deliver_time_travel_irqs(void) +{ + struct time_travel_event *e; + unsigned long flags; + + /* + * Don't do anything for most cases. Note that because here we have + * to disable IRQs (and re-enable later) we'll actually recurse at + * the end of the function, so this is strictly necessary. + */ + if (likely(list_empty(&time_travel_irqs))) + return; + + local_irq_save(flags); + irq_enter(); + while ((e = list_first_entry_or_null(&time_travel_irqs, + struct time_travel_event, + list))) { + list_del(&e->list); + e->pending = false; + e->fn(e); + } + irq_exit(); + local_irq_restore(flags); +} + +static void time_travel_deliver_event(struct time_travel_event *e) +{ + if (e == &time_travel_timer_event) { + /* + * deliver_alarm() does the irq_enter/irq_exit + * by itself, so must handle it specially here + */ + e->fn(e); + } else if (irqs_disabled()) { + list_add_tail(&e->list, &time_travel_irqs); + /* + * set pending again, it was set to false when the + * event was deleted from the original list, but + * now it's still pending until we deliver the IRQ. + */ + e->pending = true; + } else { + unsigned long flags; + + local_irq_save(flags); + irq_enter(); + e->fn(e); + irq_exit(); + local_irq_restore(flags); + } +} + +bool time_travel_del_event(struct time_travel_event *e) +{ + unsigned long flags; + + if (!e->pending) + return false; + local_irq_save(flags); + list_del(&e->list); + e->pending = false; + local_irq_restore(flags); + return true; +} + +static void time_travel_update_time(unsigned long long next, bool idle) +{ + struct time_travel_event ne = { + .onstack = true, + }; + struct time_travel_event *e; + bool finished = idle; + + /* add it without a handler - we deal with that specifically below */ + __time_travel_add_event(&ne, next); + + do { + e = time_travel_first_event(); + + BUG_ON(!e); + __time_travel_update_time(e->time, idle); + + /* new events may have been inserted while we were waiting */ + if (e == time_travel_first_event()) { + BUG_ON(!time_travel_del_event(e)); + BUG_ON(time_travel_time != e->time); + + if (e == &ne) { + finished = true; + } else { + if (e->onstack) + panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n", + time_travel_time, e->time, e); + time_travel_deliver_event(e); + } + } + + e = time_travel_first_event(); + if (e) + time_travel_ext_update_request(e->time); + } while (ne.pending && !finished); + + time_travel_del_event(&ne); +} + +void time_travel_ndelay(unsigned long nsec) +{ + time_travel_update_time(time_travel_time + nsec, false); +} +EXPORT_SYMBOL(time_travel_ndelay); + +void time_travel_add_irq_event(struct time_travel_event *e) +{ + BUG_ON(time_travel_mode != TT_MODE_EXTERNAL); + + time_travel_ext_get_time(); + /* + * We could model interrupt latency here, for now just + * don't have any latency at all and request the exact + * same time (again) to run the interrupt... + */ + time_travel_add_event(e, time_travel_time); +} +EXPORT_SYMBOL_GPL(time_travel_add_irq_event); + +static void time_travel_oneshot_timer(struct time_travel_event *e) +{ + deliver_alarm(); +} + +void time_travel_sleep(void) +{ + /* + * Wait "forever" (using S64_MAX because there are some potential + * wrapping issues, especially with the current TT_MODE_EXTERNAL + * controller application. + */ + unsigned long long next = S64_MAX; + + if (time_travel_mode == TT_MODE_BASIC) + os_timer_disable(); + + time_travel_update_time(next, true); + + if (time_travel_mode == TT_MODE_BASIC && + time_travel_timer_event.pending) { + if (time_travel_timer_event.fn == time_travel_periodic_timer) { + /* + * This is somewhat wrong - we should get the first + * one sooner like the os_timer_one_shot() below... + */ + os_timer_set_interval(time_travel_timer_interval); + } else { + os_timer_one_shot(time_travel_timer_event.time - next); + } + } +} + +static void time_travel_handle_real_alarm(void) +{ + time_travel_set_time(time_travel_next_event); + + time_travel_del_event(&time_travel_timer_event); + + if (time_travel_timer_event.fn == time_travel_periodic_timer) + time_travel_add_event(&time_travel_timer_event, + time_travel_time + + time_travel_timer_interval); +} + +static void time_travel_set_interval(unsigned long long interval) +{ + time_travel_timer_interval = interval; +} + +static int time_travel_connect_external(const char *socket) +{ + const char *sep; + unsigned long long id = (unsigned long long)-1; + int rc; + + if ((sep = strchr(socket, ':'))) { + char buf[25] = {}; + if (sep - socket > sizeof(buf) - 1) + goto invalid_number; + + memcpy(buf, socket, sep - socket); + if (kstrtoull(buf, 0, &id)) { +invalid_number: + panic("time-travel: invalid external ID in string '%s'\n", + socket); + return -EINVAL; + } + + socket = sep + 1; + } + + rc = os_connect_socket(socket); + if (rc < 0) { + panic("time-travel: failed to connect to external socket %s\n", + socket); + return rc; + } + + time_travel_ext_fd = rc; + + time_travel_ext_req(UM_TIMETRAVEL_START, id); + + return 1; +} + +static void time_travel_set_start(void) +{ + if (time_travel_start_set) + return; + + switch (time_travel_mode) { + case TT_MODE_EXTERNAL: + time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); + /* controller gave us the *current* time, so adjust by that */ + time_travel_ext_get_time(); + time_travel_start -= time_travel_time; + break; + case TT_MODE_INFCPU: + case TT_MODE_BASIC: + if (!time_travel_start_set) + time_travel_start = os_persistent_clock_emulation(); + break; + case TT_MODE_OFF: + /* we just read the host clock with os_persistent_clock_emulation() */ + break; + } + + time_travel_start_set = true; +} +#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ +#define time_travel_start_set 0 +#define time_travel_start 0 +#define time_travel_time 0 +#define time_travel_ext_waiting 0 + +static inline void time_travel_update_time(unsigned long long ns, bool retearly) +{ +} + +static inline void time_travel_handle_real_alarm(void) +{ +} + +static void time_travel_set_interval(unsigned long long interval) +{ +} + +static inline void time_travel_set_start(void) +{ +} + +/* fail link if this actually gets used */ +extern u64 time_travel_ext_req(u32 op, u64 time); + +/* these are empty macros so the struct/fn need not exist */ +#define time_travel_add_event(e, time) do { } while (0) +/* externally not usable - redefine here so we can */ +#undef time_travel_del_event +#define time_travel_del_event(e) do { } while (0) +#endif + +void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + unsigned long flags; + + /* + * In basic time-travel mode we still get real interrupts + * (signals) but since we don't read time from the OS, we + * must update the simulated time here to the expiry when + * we get a signal. + * This is not the case in inf-cpu mode, since there we + * never get any real signals from the OS. + */ + if (time_travel_mode == TT_MODE_BASIC) + time_travel_handle_real_alarm(); + + local_irq_save(flags); + do_IRQ(TIMER_IRQ, regs); + local_irq_restore(flags); +} + +static int itimer_shutdown(struct clock_event_device *evt) +{ + if (time_travel_mode != TT_MODE_OFF) + time_travel_del_event(&time_travel_timer_event); + + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) + os_timer_disable(); + + return 0; +} + +static int itimer_set_periodic(struct clock_event_device *evt) +{ + unsigned long long interval = NSEC_PER_SEC / HZ; + + if (time_travel_mode != TT_MODE_OFF) { + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_periodic_timer); + time_travel_set_interval(interval); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + interval); + } + + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) + os_timer_set_interval(interval); + + return 0; +} + +static int itimer_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + delta += 1; + + if (time_travel_mode != TT_MODE_OFF) { + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_oneshot_timer); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + delta); + } + + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) + return os_timer_one_shot(delta); + + return 0; +} + +static int itimer_one_shot(struct clock_event_device *evt) +{ + return itimer_next_event(0, evt); +} + +static struct clock_event_device timer_clockevent = { + .name = "posix-timer", + .rating = 250, + .cpumask = cpu_possible_mask, + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_state_shutdown = itimer_shutdown, + .set_state_periodic = itimer_set_periodic, + .set_state_oneshot = itimer_one_shot, + .set_next_event = itimer_next_event, + .shift = 0, + .max_delta_ns = 0xffffffff, + .max_delta_ticks = 0xffffffff, + .min_delta_ns = TIMER_MIN_DELTA, + .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM + .irq = 0, + .mult = 1, +}; + +static irqreturn_t um_timer(int irq, void *dev) +{ + if (get_current()->mm != NULL) + { + /* userspace - relay signal, results in correct userspace timers */ + os_alarm_process(get_current()->mm->context.id.u.pid); + } + + (*timer_clockevent.event_handler)(&timer_clockevent); + + return IRQ_HANDLED; +} + +static u64 timer_read(struct clocksource *cs) +{ + if (time_travel_mode != TT_MODE_OFF) { + /* + * We make reading the timer cost a bit so that we don't get + * stuck in loops that expect time to move more than the + * exact requested sleep amount, e.g. python's socket server, + * see https://bugs.python.org/issue37026. + * + * However, don't do that when we're in interrupt or such as + * then we might recurse into our own processing, and get to + * even more waiting, and that's not good - it messes up the + * "what do I do next" and onstack event we use to know when + * to return from time_travel_update_time(). + */ + if (!irqs_disabled() && !in_interrupt() && !in_softirq() && + !time_travel_ext_waiting) + time_travel_update_time(time_travel_time + + TIMER_MULTIPLIER, + false); + return time_travel_time / TIMER_MULTIPLIER; + } + + return os_nsecs() / TIMER_MULTIPLIER; +} + +static struct clocksource timer_clocksource = { + .name = "timer", + .rating = 300, + .read = timer_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static void __init um_timer_setup(void) +{ + int err; + + err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL); + if (err != 0) + printk(KERN_ERR "register_timer : request_irq failed - " + "errno = %d\n", -err); + + err = os_timer_create(); + if (err != 0) { + printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); + return; + } + + err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER); + if (err) { + printk(KERN_ERR "clocksource_register_hz returned %d\n", err); + return; + } + clockevents_register_device(&timer_clockevent); +} + +void read_persistent_clock64(struct timespec64 *ts) +{ + long long nsecs; + + time_travel_set_start(); + + if (time_travel_mode != TT_MODE_OFF) + nsecs = time_travel_start + time_travel_time; + else + nsecs = os_persistent_clock_emulation(); + + set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC, + nsecs % NSEC_PER_SEC); +} + +void __init time_init(void) +{ + timer_set_signal_handler(); + late_time_init = um_timer_setup; +} + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +unsigned long calibrate_delay_is_known(void) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) + return 1; + return 0; +} + +int setup_time_travel(char *str) +{ + if (strcmp(str, "=inf-cpu") == 0) { + time_travel_mode = TT_MODE_INFCPU; + timer_clockevent.name = "time-travel-timer-infcpu"; + timer_clocksource.name = "time-travel-clock"; + return 1; + } + + if (strncmp(str, "=ext:", 5) == 0) { + time_travel_mode = TT_MODE_EXTERNAL; + timer_clockevent.name = "time-travel-timer-external"; + timer_clocksource.name = "time-travel-clock-external"; + return time_travel_connect_external(str + 5); + } + + if (!*str) { + time_travel_mode = TT_MODE_BASIC; + timer_clockevent.name = "time-travel-timer"; + timer_clocksource.name = "time-travel-clock"; + return 1; + } + + return -EINVAL; +} + +__setup("time-travel", setup_time_travel); +__uml_help(setup_time_travel, +"time-travel\n" +"This option just enables basic time travel mode, in which the clock/timers\n" +"inside the UML instance skip forward when there's nothing to do, rather than\n" +"waiting for real time to elapse. However, instance CPU speed is limited by\n" +"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n" +"clock (but quicker when there's nothing to do).\n" +"\n" +"time-travel=inf-cpu\n" +"This enables time travel mode with infinite processing power, in which there\n" +"are no wall clock timers, and any CPU processing happens - as seen from the\n" +"guest - instantly. This can be useful for accurate simulation regardless of\n" +"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" +"easily lead to getting stuck (e.g. if anything in the system busy loops).\n" +"\n" +"time-travel=ext:[ID:]/path/to/socket\n" +"This enables time travel mode similar to =inf-cpu, except the system will\n" +"use the given socket to coordinate with a central scheduler, in order to\n" +"have more than one system simultaneously be on simulated time. The virtio\n" +"driver code in UML knows about this so you can also simulate networks and\n" +"devices using it, assuming the device has the right capabilities.\n" +"The optional ID is a 64-bit integer that's sent to the central scheduler.\n"); + +int setup_time_travel_start(char *str) +{ + int err; + + err = kstrtoull(str, 0, &time_travel_start); + if (err) + return err; + + time_travel_start_set = 1; + return 1; +} + +__setup("time-travel-start", setup_time_travel_start); +__uml_help(setup_time_travel_start, +"time-travel-start=<seconds>\n" +"Configure the UML instance's wall clock to start at this value rather than\n" +"the host's wall clock at the time of UML boot.\n"); +#endif diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c new file mode 100644 index 0000000000..7d050ab0f7 --- /dev/null +++ b/arch/um/kernel/tlb.c @@ -0,0 +1,604 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched/signal.h> + +#include <asm/tlbflush.h> +#include <as-layout.h> +#include <mem_user.h> +#include <os.h> +#include <skas.h> +#include <kern_util.h> + +struct host_vm_change { + struct host_vm_op { + enum { NONE, MMAP, MUNMAP, MPROTECT } type; + union { + struct { + unsigned long addr; + unsigned long len; + unsigned int prot; + int fd; + __u64 offset; + } mmap; + struct { + unsigned long addr; + unsigned long len; + } munmap; + struct { + unsigned long addr; + unsigned long len; + unsigned int prot; + } mprotect; + } u; + } ops[1]; + int userspace; + int index; + struct mm_struct *mm; + void *data; + int force; +}; + +#define INIT_HVC(mm, force, userspace) \ + ((struct host_vm_change) \ + { .ops = { { .type = NONE } }, \ + .mm = mm, \ + .data = NULL, \ + .userspace = userspace, \ + .index = 0, \ + .force = force }) + +static void report_enomem(void) +{ + printk(KERN_ERR "UML ran out of memory on the host side! " + "This can happen due to a memory limitation or " + "vm.max_map_count has been reached.\n"); +} + +static int do_ops(struct host_vm_change *hvc, int end, + int finished) +{ + struct host_vm_op *op; + int i, ret = 0; + + for (i = 0; i < end && !ret; i++) { + op = &hvc->ops[i]; + switch (op->type) { + case MMAP: + if (hvc->userspace) + ret = map(&hvc->mm->context.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.prot, + op->u.mmap.fd, + op->u.mmap.offset, finished, + &hvc->data); + else + map_memory(op->u.mmap.addr, op->u.mmap.offset, + op->u.mmap.len, 1, 1, 1); + break; + case MUNMAP: + if (hvc->userspace) + ret = unmap(&hvc->mm->context.id, + op->u.munmap.addr, + op->u.munmap.len, finished, + &hvc->data); + else + ret = os_unmap_memory( + (void *) op->u.munmap.addr, + op->u.munmap.len); + + break; + case MPROTECT: + if (hvc->userspace) + ret = protect(&hvc->mm->context.id, + op->u.mprotect.addr, + op->u.mprotect.len, + op->u.mprotect.prot, + finished, &hvc->data); + else + ret = os_protect_memory( + (void *) op->u.mprotect.addr, + op->u.mprotect.len, + 1, 1, 1); + break; + default: + printk(KERN_ERR "Unknown op type %d in do_ops\n", + op->type); + BUG(); + break; + } + } + + if (ret == -ENOMEM) + report_enomem(); + + return ret; +} + +static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, + unsigned int prot, struct host_vm_change *hvc) +{ + __u64 offset; + struct host_vm_op *last; + int fd = -1, ret = 0; + + if (hvc->userspace) + fd = phys_mapping(phys, &offset); + else + offset = phys; + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MMAP) && + (last->u.mmap.addr + last->u.mmap.len == virt) && + (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && + (last->u.mmap.offset + last->u.mmap.len == offset)) { + last->u.mmap.len += len; + return 0; + } + } + + if (hvc->index == ARRAY_SIZE(hvc->ops)) { + ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); + hvc->index = 0; + } + + hvc->ops[hvc->index++] = ((struct host_vm_op) + { .type = MMAP, + .u = { .mmap = { .addr = virt, + .len = len, + .prot = prot, + .fd = fd, + .offset = offset } + } }); + return ret; +} + +static int add_munmap(unsigned long addr, unsigned long len, + struct host_vm_change *hvc) +{ + struct host_vm_op *last; + int ret = 0; + + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MUNMAP) && + (last->u.munmap.addr + last->u.mmap.len == addr)) { + last->u.munmap.len += len; + return 0; + } + } + + if (hvc->index == ARRAY_SIZE(hvc->ops)) { + ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); + hvc->index = 0; + } + + hvc->ops[hvc->index++] = ((struct host_vm_op) + { .type = MUNMAP, + .u = { .munmap = { .addr = addr, + .len = len } } }); + return ret; +} + +static int add_mprotect(unsigned long addr, unsigned long len, + unsigned int prot, struct host_vm_change *hvc) +{ + struct host_vm_op *last; + int ret = 0; + + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MPROTECT) && + (last->u.mprotect.addr + last->u.mprotect.len == addr) && + (last->u.mprotect.prot == prot)) { + last->u.mprotect.len += len; + return 0; + } + } + + if (hvc->index == ARRAY_SIZE(hvc->ops)) { + ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); + hvc->index = 0; + } + + hvc->ops[hvc->index++] = ((struct host_vm_op) + { .type = MPROTECT, + .u = { .mprotect = { .addr = addr, + .len = len, + .prot = prot } } }); + return ret; +} + +#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) + +static inline int update_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + pte_t *pte; + int r, w, x, prot, ret = 0; + + pte = pte_offset_kernel(pmd, addr); + do { + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) + w = 0; + + prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0)); + if (hvc->force || pte_newpage(*pte)) { + if (pte_present(*pte)) { + if (pte_newpage(*pte)) + ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, prot, hvc); + } else + ret = add_munmap(addr, PAGE_SIZE, hvc); + } else if (pte_newprot(*pte)) + ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); + *pte = pte_mkuptodate(*pte); + } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); + return ret; +} + +static inline int update_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + pmd_t *pmd; + unsigned long next; + int ret = 0; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (!pmd_present(*pmd)) { + if (hvc->force || pmd_newpage(*pmd)) { + ret = add_munmap(addr, next - addr, hvc); + pmd_mkuptodate(*pmd); + } + } + else ret = update_pte_range(pmd, addr, next, hvc); + } while (pmd++, addr = next, ((addr < end) && !ret)); + return ret; +} + +static inline int update_pud_range(p4d_t *p4d, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + pud_t *pud; + unsigned long next; + int ret = 0; + + pud = pud_offset(p4d, addr); + do { + next = pud_addr_end(addr, end); + if (!pud_present(*pud)) { + if (hvc->force || pud_newpage(*pud)) { + ret = add_munmap(addr, next - addr, hvc); + pud_mkuptodate(*pud); + } + } + else ret = update_pmd_range(pud, addr, next, hvc); + } while (pud++, addr = next, ((addr < end) && !ret)); + return ret; +} + +static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + p4d_t *p4d; + unsigned long next; + int ret = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (!p4d_present(*p4d)) { + if (hvc->force || p4d_newpage(*p4d)) { + ret = add_munmap(addr, next - addr, hvc); + p4d_mkuptodate(*p4d); + } + } else + ret = update_pud_range(p4d, addr, next, hvc); + } while (p4d++, addr = next, ((addr < end) && !ret)); + return ret; +} + +static void fix_range_common(struct mm_struct *mm, unsigned long start_addr, + unsigned long end_addr, int force) +{ + pgd_t *pgd; + struct host_vm_change hvc; + unsigned long addr = start_addr, next; + int ret = 0, userspace = 1; + + hvc = INIT_HVC(mm, force, userspace); + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end_addr); + if (!pgd_present(*pgd)) { + if (force || pgd_newpage(*pgd)) { + ret = add_munmap(addr, next - addr, &hvc); + pgd_mkuptodate(*pgd); + } + } else + ret = update_p4d_range(pgd, addr, next, &hvc); + } while (pgd++, addr = next, ((addr < end_addr) && !ret)); + + if (!ret) + ret = do_ops(&hvc, hvc.index, 1); + + /* This is not an else because ret is modified above */ + if (ret) { + struct mm_id *mm_idp = ¤t->mm->context.id; + + printk(KERN_ERR "fix_range_common: failed, killing current " + "process: %d\n", task_tgid_vnr(current)); + mm_idp->kill = 1; + } +} + +static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) +{ + struct mm_struct *mm; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr, last; + int updated = 0, err = 0, force = 0, userspace = 0; + struct host_vm_change hvc; + + mm = &init_mm; + hvc = INIT_HVC(mm, force, userspace); + for (addr = start; addr < end;) { + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) { + last = ADD_ROUND(addr, PGDIR_SIZE); + if (last > end) + last = end; + if (pgd_newpage(*pgd)) { + updated = 1; + err = add_munmap(addr, last - addr, &hvc); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) { + last = ADD_ROUND(addr, P4D_SIZE); + if (last > end) + last = end; + if (p4d_newpage(*p4d)) { + updated = 1; + err = add_munmap(addr, last - addr, &hvc); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) { + last = ADD_ROUND(addr, PUD_SIZE); + if (last > end) + last = end; + if (pud_newpage(*pud)) { + updated = 1; + err = add_munmap(addr, last - addr, &hvc); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + last = ADD_ROUND(addr, PMD_SIZE); + if (last > end) + last = end; + if (pmd_newpage(*pmd)) { + updated = 1; + err = add_munmap(addr, last - addr, &hvc); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pte = pte_offset_kernel(pmd, addr); + if (!pte_present(*pte) || pte_newpage(*pte)) { + updated = 1; + err = add_munmap(addr, PAGE_SIZE, &hvc); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + if (pte_present(*pte)) + err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, 0, &hvc); + } + else if (pte_newprot(*pte)) { + updated = 1; + err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); + } + addr += PAGE_SIZE; + } + if (!err) + err = do_ops(&hvc, hvc.index, 1); + + if (err < 0) + panic("flush_tlb_kernel failed, errno = %d\n", err); + return updated; +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + struct mm_struct *mm = vma->vm_mm; + void *flush = NULL; + int r, w, x, prot, err = 0; + struct mm_id *mm_id; + + address &= PAGE_MASK; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto kill; + + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto kill; + + pud = pud_offset(p4d, address); + if (!pud_present(*pud)) + goto kill; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + goto kill; + + pte = pte_offset_kernel(pmd, address); + + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) { + w = 0; + } + + mm_id = &mm->context.id; + prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0)); + if (pte_newpage(*pte)) { + if (pte_present(*pte)) { + unsigned long long offset; + int fd; + + fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); + err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, + 1, &flush); + } + else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); + } + else if (pte_newprot(*pte)) + err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); + + if (err) { + if (err == -ENOMEM) + report_enomem(); + + goto kill; + } + + *pte = pte_mkuptodate(*pte); + + return; + +kill: + printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); + force_sig(SIGKILL); +} + +void flush_tlb_all(void) +{ + /* + * Don't bother flushing if this address space is about to be + * destroyed. + */ + if (atomic_read(¤t->mm->mm_users) == 0) + return; + + flush_tlb_mm(current->mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_tlb_kernel_range_common(start, end); +} + +void flush_tlb_kernel_vm(void) +{ + flush_tlb_kernel_range_common(start_vm, end_vm); +} + +void __flush_tlb_one(unsigned long addr) +{ + flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); +} + +static void fix_range(struct mm_struct *mm, unsigned long start_addr, + unsigned long end_addr, int force) +{ + /* + * Don't bother flushing if this address space is about to be + * destroyed. + */ + if (atomic_read(&mm->mm_users) == 0) + return; + + fix_range_common(mm, start_addr, end_addr, force); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (vma->vm_mm == NULL) + flush_tlb_kernel_range_common(start, end); + else fix_range(vma->vm_mm, start, end, 0); +} +EXPORT_SYMBOL(flush_tlb_range); + +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + fix_range(mm, start, end, 0); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + for_each_vma(vmi, vma) + fix_range(mm, vma->vm_start, vma->vm_end, 0); +} + +void force_flush_all(void) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + mmap_read_lock(mm); + for_each_vma(vmi, vma) + fix_range(mm, vma->vm_start, vma->vm_end, 1); + mmap_read_unlock(mm); +} diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c new file mode 100644 index 0000000000..6d8ae86ae9 --- /dev/null +++ b/arch/um/kernel/trap.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/mm.h> +#include <linux/sched/signal.h> +#include <linux/hardirq.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/sched/debug.h> +#include <asm/current.h> +#include <asm/tlbflush.h> +#include <arch.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <skas.h> + +/* + * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by + * segv(). + */ +int handle_page_fault(unsigned long address, unsigned long ip, + int is_write, int is_user, int *code_out) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + pmd_t *pmd; + pte_t *pte; + int err = -EFAULT; + unsigned int flags = FAULT_FLAG_DEFAULT; + + *code_out = SEGV_MAPERR; + + /* + * If the fault was with pagefaults disabled, don't take the fault, just + * fail. + */ + if (faulthandler_disabled()) + goto out_nosemaphore; + + if (is_user) + flags |= FAULT_FLAG_USER; +retry: + mmap_read_lock(mm); + vma = find_vma(mm, address); + if (!vma) + goto out; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out; + if (is_user && !ARCH_IS_STACKGROW(address)) + goto out; + vma = expand_stack(mm, address); + if (!vma) + goto out_nosemaphore; + +good_area: + *code_out = SEGV_ACCERR; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + flags |= FAULT_FLAG_WRITE; + } else { + /* Don't require VM_READ|VM_EXEC for write faults! */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out; + } + + do { + vm_fault_t fault; + + fault = handle_mm_fault(vma, address, flags, NULL); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + goto out_nosemaphore; + + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) { + goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto out; + } else if (fault & VM_FAULT_SIGBUS) { + err = -EACCES; + goto out; + } + BUG(); + } + if (fault & VM_FAULT_RETRY) { + flags |= FAULT_FLAG_TRIED; + + goto retry; + } + + pmd = pmd_off(mm, address); + pte = pte_offset_kernel(pmd, address); + } while (!pte_present(*pte)); + err = 0; + /* + * The below warning was added in place of + * pte_mkyoung(); if (is_write) pte_mkdirty(); + * If it's triggered, we'd see normally a hang here (a clean pte is + * marked read-only to emulate the dirty bit). + * However, the generic code can mark a PTE writable but clean on a + * concurrent read fault, triggering this harmlessly. So comment it out. + */ +#if 0 + WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); +#endif + flush_tlb_page(vma, address); +out: + mmap_read_unlock(mm); +out_nosemaphore: + return err; + +out_of_memory: + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + mmap_read_unlock(mm); + if (!is_user) + goto out_nosemaphore; + pagefault_out_of_memory(); + return 0; +} + +static void show_segv_info(struct uml_pt_regs *regs) +{ + struct task_struct *tsk = current; + struct faultinfo *fi = UPT_FAULTINFO(regs); + + if (!unhandled_signal(tsk, SIGSEGV)) + return; + + if (!printk_ratelimit()) + return; + + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), + (void *)UPT_IP(regs), (void *)UPT_SP(regs), + fi->error_code); + + print_vma_addr(KERN_CONT " in ", UPT_IP(regs)); + printk(KERN_CONT "\n"); +} + +static void bad_segv(struct faultinfo fi, unsigned long ip) +{ + current->thread.arch.faultinfo = fi; + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi)); +} + +void fatal_sigsegv(void) +{ + force_fatal_sig(SIGSEGV); + do_signal(¤t->thread.regs); + /* + * This is to tell gcc that we're not returning - do_signal + * can, in general, return, but in this case, it's not, since + * we just got a fatal SIGSEGV queued. + */ + os_dump_core(); +} + +/** + * segv_handler() - the SIGSEGV handler + * @sig: the signal number + * @unused_si: the signal info struct; unused in this handler + * @regs: the ptrace register information + * + * The handler first extracts the faultinfo from the UML ptrace regs struct. + * If the userfault did not happen in an UML userspace process, bad_segv is called. + * Otherwise the signal did happen in a cloned userspace process, handle it. + */ +void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + struct faultinfo * fi = UPT_FAULTINFO(regs); + + if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) { + show_segv_info(regs); + bad_segv(*fi, UPT_IP(regs)); + return; + } + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); +} + +/* + * We give a *copy* of the faultinfo in the regs to segv. + * This must be done, since nesting SEGVs could overwrite + * the info in the regs. A pointer to the info then would + * give us bad data! + */ +unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, + struct uml_pt_regs *regs) +{ + jmp_buf *catcher; + int si_code; + int err; + int is_write = FAULT_WRITE(fi); + unsigned long address = FAULT_ADDRESS(fi); + + if (!is_user && regs) + current->thread.segv_regs = container_of(regs, struct pt_regs, regs); + + if (!is_user && (address >= start_vm) && (address < end_vm)) { + flush_tlb_kernel_vm(); + goto out; + } + else if (current->mm == NULL) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with no mm"); + } + else if (!is_user && address > PAGE_SIZE && address < TASK_SIZE) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx", + address, ip); + } + + if (SEGV_IS_FIXABLE(&fi)) + err = handle_page_fault(address, ip, is_write, is_user, + &si_code); + else { + err = -EFAULT; + /* + * A thread accessed NULL, we get a fault, but CR2 is invalid. + * This code is used in __do_copy_from_user() of TT mode. + * XXX tt mode is gone, so maybe this isn't needed any more + */ + address = 0; + } + + catcher = current->thread.fault_catcher; + if (!err) + goto out; + else if (catcher != NULL) { + current->thread.fault_addr = (void *) address; + UML_LONGJMP(catcher, 1); + } + else if (current->thread.fault_addr != NULL) + panic("fault_addr set but no fault catcher"); + else if (!is_user && arch_fixup(ip, regs)) + goto out; + + if (!is_user) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", + address, ip); + } + + show_segv_info(regs); + + if (err == -EACCES) { + current->thread.arch.faultinfo = fi; + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); + } else { + BUG_ON(err != -EFAULT); + current->thread.arch.faultinfo = fi; + force_sig_fault(SIGSEGV, si_code, (void __user *) address); + } + +out: + if (regs) + current->thread.segv_regs = NULL; + + return 0; +} + +void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) +{ + int code, err; + if (!UPT_IS_USER(regs)) { + if (sig == SIGBUS) + printk(KERN_ERR "Bus error - the host /dev/shm or /tmp " + "mount likely just ran out of space\n"); + panic("Kernel mode signal %d", sig); + } + + arch_examine_signal(sig, regs); + + /* Is the signal layout for the signal known? + * Signal data must be scrubbed to prevent information leaks. + */ + code = si->si_code; + err = si->si_errno; + if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) { + struct faultinfo *fi = UPT_FAULTINFO(regs); + current->thread.arch.faultinfo = *fi; + force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi)); + } else { + printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n", + sig, code, err); + force_sig(sig); + } +} + +void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) +{ + if (current->thread.fault_catcher != NULL) + UML_LONGJMP(current->thread.fault_catcher, 1); + else + relay_signal(sig, si, regs); +} + +void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + do_IRQ(WINCH_IRQ, regs); +} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c new file mode 100644 index 0000000000..b1bfed0c85 --- /dev/null +++ b/arch/um/kernel/um_arch.c @@ -0,0 +1,541 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/panic_notifier.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/utsname.h> +#include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/kmsg_dump.h> +#include <linux/suspend.h> +#include <linux/random.h> + +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <as-layout.h> +#include <arch.h> +#include <init.h> +#include <kern.h> +#include <kern_util.h> +#include <mem_user.h> +#include <os.h> + +#include "um_arch.h" + +#define DEFAULT_COMMAND_LINE_ROOT "root=98:0" +#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty0" + +/* Changed in add_arg and setup_arch, which run before SMP is started */ +static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 }; + +static void __init add_arg(char *arg) +{ + if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { + os_warn("add_arg: Too many command line arguments!\n"); + exit(1); + } + if (strlen(command_line) > 0) + strcat(command_line, " "); + strcat(command_line, arg); +} + +/* + * These fields are initialized at boot time and not changed. + * XXX This structure is used only in the non-SMP case. Maybe this + * should be moved to smp.c. + */ +struct cpuinfo_um boot_cpu_data = { + .loops_per_jiffy = 0, + .ipi_pipe = { -1, -1 }, + .cache_alignment = L1_CACHE_BYTES, + .x86_capability = { 0 } +}; + +EXPORT_SYMBOL(boot_cpu_data); + +union thread_union cpu0_irqstack + __section(".data..init_irqstack") = + { .thread_info = INIT_THREAD_INFO(init_task) }; + +/* Changed in setup_arch, which is called in early boot */ +static char host_info[(__NEW_UTS_LEN + 1) * 5]; + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + int i = 0; + + seq_printf(m, "processor\t: %d\n", i); + seq_printf(m, "vendor_id\t: User Mode Linux\n"); + seq_printf(m, "model name\t: UML\n"); + seq_printf(m, "mode\t\t: skas\n"); + seq_printf(m, "host\t\t: %s\n", host_info); + seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no"); + seq_printf(m, "flags\t\t:"); + for (i = 0; i < 32*NCAPINTS; i++) + if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL)) + seq_printf(m, " %s", x86_cap_flags[i]); + seq_printf(m, "\n"); + seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment); + seq_printf(m, "bogomips\t: %lu.%02lu\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); + + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* Set in linux_main */ +unsigned long uml_physmem; +EXPORT_SYMBOL(uml_physmem); + +unsigned long uml_reserved; /* Also modified in mem_init */ +unsigned long start_vm; +unsigned long end_vm; + +/* Set in uml_ncpus_setup */ +int ncpus = 1; + +/* Set in early boot */ +static int have_root __initdata; +static int have_console __initdata; + +/* Set in uml_mem_setup and modified in linux_main */ +long long physmem_size = 64 * 1024 * 1024; +EXPORT_SYMBOL(physmem_size); + +static const char *usage_string = +"User Mode Linux v%s\n" +" available at http://user-mode-linux.sourceforge.net/\n\n"; + +static int __init uml_version_setup(char *line, int *add) +{ + /* Explicitly use printf() to show version in stdout */ + printf("%s\n", init_utsname()->release); + exit(0); + + return 0; +} + +__uml_setup("--version", uml_version_setup, +"--version\n" +" Prints the version number of the kernel.\n\n" +); + +static int __init uml_root_setup(char *line, int *add) +{ + have_root = 1; + return 0; +} + +__uml_setup("root=", uml_root_setup, +"root=<file containing the root fs>\n" +" This is actually used by the generic kernel in exactly the same\n" +" way as in any other kernel. If you configure a number of block\n" +" devices and want to boot off something other than ubd0, you \n" +" would use something like:\n" +" root=/dev/ubd5\n\n" +); + +static int __init no_skas_debug_setup(char *line, int *add) +{ + os_warn("'debug' is not necessary to gdb UML in skas mode - run\n"); + os_warn("'gdb linux'\n"); + + return 0; +} + +__uml_setup("debug", no_skas_debug_setup, +"debug\n" +" this flag is not needed to run gdb on UML in skas mode\n\n" +); + +static int __init uml_console_setup(char *line, int *add) +{ + have_console = 1; + return 0; +} + +__uml_setup("console=", uml_console_setup, +"console=<preferred console>\n" +" Specify the preferred console output driver\n\n" +); + +static int __init Usage(char *line, int *add) +{ + const char **p; + + printf(usage_string, init_utsname()->release); + p = &__uml_help_start; + /* Explicitly use printf() to show help in stdout */ + while (p < &__uml_help_end) { + printf("%s", *p); + p++; + } + exit(0); + return 0; +} + +__uml_setup("--help", Usage, +"--help\n" +" Prints this message.\n\n" +); + +static void __init uml_checksetup(char *line, int *add) +{ + struct uml_param *p; + + p = &__uml_setup_start; + while (p < &__uml_setup_end) { + size_t n; + + n = strlen(p->str); + if (!strncmp(line, p->str, n) && p->setup_func(line + n, add)) + return; + p++; + } +} + +static void __init uml_postsetup(void) +{ + initcall_t *p; + + p = &__uml_postsetup_start; + while (p < &__uml_postsetup_end) { + (*p)(); + p++; + } + return; +} + +static int panic_exit(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + kmsg_dump(KMSG_DUMP_PANIC); + bust_spinlocks(1); + bust_spinlocks(0); + uml_exitcode = 1; + os_dump_core(); + + return NOTIFY_DONE; +} + +static struct notifier_block panic_exit_notifier = { + .notifier_call = panic_exit, + .priority = INT_MAX - 1, /* run as 2nd notifier, won't return */ +}; + +void uml_finishsetup(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &panic_exit_notifier); + + uml_postsetup(); + + new_thread_handler(); +} + +/* Set during early boot */ +unsigned long stub_start; +unsigned long task_size; +EXPORT_SYMBOL(task_size); + +unsigned long host_task_size; + +unsigned long brk_start; +unsigned long end_iomem; +EXPORT_SYMBOL(end_iomem); + +#define MIN_VMALLOC (32 * 1024 * 1024) + +static void parse_host_cpu_flags(char *line) +{ + int i; + for (i = 0; i < 32*NCAPINTS; i++) { + if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i])) + set_cpu_cap(&boot_cpu_data, i); + } +} +static void parse_cache_line(char *line) +{ + long res; + char *to_parse = strstr(line, ":"); + if (to_parse) { + to_parse++; + while (*to_parse != 0 && isspace(*to_parse)) { + to_parse++; + } + if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res)) + boot_cpu_data.cache_alignment = res; + else + boot_cpu_data.cache_alignment = L1_CACHE_BYTES; + } +} + +int __init linux_main(int argc, char **argv) +{ + unsigned long avail, diff; + unsigned long virtmem_size, max_physmem; + unsigned long stack; + unsigned int i; + int add; + + for (i = 1; i < argc; i++) { + if ((i == 1) && (argv[i][0] == ' ')) + continue; + add = 1; + uml_checksetup(argv[i], &add); + if (add) + add_arg(argv[i]); + } + if (have_root == 0) + add_arg(DEFAULT_COMMAND_LINE_ROOT); + + if (have_console == 0) + add_arg(DEFAULT_COMMAND_LINE_CONSOLE); + + host_task_size = os_get_top_address(); + /* reserve a few pages for the stubs (taking care of data alignment) */ + /* align the data portion */ + BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES)); + stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1); + /* another page for the code portion */ + stub_start -= PAGE_SIZE; + host_task_size = stub_start; + + /* + * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps + * out + */ + task_size = host_task_size & PGDIR_MASK; + + /* OS sanity checks that need to happen before the kernel runs */ + os_early_checks(); + + get_host_cpu_features(parse_host_cpu_flags, parse_cache_line); + + brk_start = (unsigned long) sbrk(0); + + /* + * Increase physical memory size for exec-shield users + * so they actually get what they asked for. This should + * add zero for non-exec shield users + */ + + diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + if (diff > 1024 * 1024) { + os_info("Adding %ld bytes to physical memory to account for " + "exec-shield gap\n", diff); + physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + } + + uml_physmem = (unsigned long) __binary_start & PAGE_MASK; + + /* Reserve up to 4M after the current brk */ + uml_reserved = ROUND_4M(brk_start) + (1 << 22); + + setup_machinename(init_utsname()->machine); + + highmem = 0; + iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; + + /* + * Zones have to begin on a 1 << MAX_ORDER page boundary, + * so this makes sure that's true for highmem + */ + max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); + if (physmem_size + iomem_size > max_physmem) { + highmem = physmem_size + iomem_size - max_physmem; + physmem_size -= highmem; + } + + high_physmem = uml_physmem + physmem_size; + end_iomem = high_physmem + iomem_size; + high_memory = (void *) end_iomem; + + start_vm = VMALLOC_START; + + virtmem_size = physmem_size; + stack = (unsigned long) argv; + stack &= ~(1024 * 1024 - 1); + avail = stack - start_vm; + if (physmem_size > avail) + virtmem_size = avail; + end_vm = start_vm + virtmem_size; + + if (virtmem_size < physmem_size) + os_info("Kernel virtual memory size shrunk to %lu bytes\n", + virtmem_size); + + os_flush_stdout(); + + return start_uml(); +} + +int __init __weak read_initrd(void) +{ + return 0; +} + +void __init setup_arch(char **cmdline_p) +{ + u8 rng_seed[32]; + + stack_protections((unsigned long) &init_thread_info); + setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); + mem_total_pages(physmem_size, iomem_size, highmem); + uml_dtb_init(); + read_initrd(); + + paging_init(); + strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + setup_hostinfo(host_info, sizeof host_info); + + if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) { + add_bootloader_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); + } +} + +void __init arch_cpu_finalize_init(void) +{ + arch_check_bugs(); + os_check_bugs(); +} + +void apply_seal_endbr(s32 *start, s32 *end) +{ +} + +void apply_retpolines(s32 *start, s32 *end) +{ +} + +void apply_returns(s32 *start, s32 *end) +{ +} + +void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi) +{ +} + +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ +} + +void *text_poke(void *addr, const void *opcode, size_t len) +{ + /* + * In UML, the only reference to this function is in + * apply_relocate_add(), which shouldn't ever actually call this + * because UML doesn't have live patching. + */ + WARN_ON(1); + + return memcpy(addr, opcode, len); +} + +void text_poke_sync(void) +{ +} + +void uml_pm_wake(void) +{ + pm_system_wakeup(); +} + +#ifdef CONFIG_PM_SLEEP +static int um_suspend_valid(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + +static int um_suspend_prepare(void) +{ + um_irqs_suspend(); + return 0; +} + +static int um_suspend_enter(suspend_state_t state) +{ + if (WARN_ON(state != PM_SUSPEND_MEM)) + return -EINVAL; + + /* + * This is identical to the idle sleep, but we've just + * (during suspend) turned off all interrupt sources + * except for the ones we want, so now we can only wake + * up on something we actually want to wake up on. All + * timing has also been suspended. + */ + um_idle_sleep(); + return 0; +} + +static void um_suspend_finish(void) +{ + um_irqs_resume(); +} + +const struct platform_suspend_ops um_suspend_ops = { + .valid = um_suspend_valid, + .prepare = um_suspend_prepare, + .enter = um_suspend_enter, + .finish = um_suspend_finish, +}; + +static int init_pm_wake_signal(void) +{ + /* + * In external time-travel mode we can't use signals to wake up + * since that would mess with the scheduling. We'll have to do + * some additional work to support wakeup on virtio devices or + * similar, perhaps implementing a fake RTC controller that can + * trigger wakeup (and request the appropriate scheduling from + * the external scheduler when going to suspend.) + */ + if (time_travel_mode != TT_MODE_EXTERNAL) + register_pm_wake_signal(); + + suspend_set_ops(&um_suspend_ops); + + return 0; +} + +late_initcall(init_pm_wake_signal); +#endif diff --git a/arch/um/kernel/um_arch.h b/arch/um/kernel/um_arch.h new file mode 100644 index 0000000000..1e07fb7ee3 --- /dev/null +++ b/arch/um/kernel/um_arch.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __UML_ARCH_H__ +#define __UML_ARCH_H__ + +extern void * __init uml_load_file(const char *filename, unsigned long long *size); + +#ifdef CONFIG_OF +extern void __init uml_dtb_init(void); +#else +static inline void uml_dtb_init(void) { } +#endif + +#endif diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c new file mode 100644 index 0000000000..72bc60ade3 --- /dev/null +++ b/arch/um/kernel/umid.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <asm/errno.h> +#include <init.h> +#include <kern.h> +#include <os.h> + +/* Changed by set_umid_arg */ +static int umid_inited; + +static int __init set_umid_arg(char *name, int *add) +{ + int err; + + if (umid_inited) { + os_warn("umid already set\n"); + return 0; + } + + *add = 0; + err = set_umid(name); + if (err == -EEXIST) + os_warn("umid '%s' already in use\n", name); + else if (!err) + umid_inited = 1; + + return 0; +} + +__uml_setup("umid=", set_umid_arg, +"umid=<name>\n" +" This is used to assign a unique identity to this UML machine and\n" +" is used for naming the pid file and management console socket.\n\n" +); + diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S new file mode 100644 index 0000000000..5c92d58a78 --- /dev/null +++ b/arch/um/kernel/uml.lds.S @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/vmlinux.lds.h> +#include <asm/page.h> + +OUTPUT_FORMAT(ELF_FORMAT) +OUTPUT_ARCH(ELF_ARCH) +ENTRY(_start) +jiffies = jiffies_64; + +VERSION { + { + local: *; + }; +} + +SECTIONS +{ + /* This must contain the right address - not quite the default ELF one.*/ + PROVIDE (__executable_start = START); + /* Static binaries stick stuff here, like the sigreturn trampoline, + * invisibly to objdump. So, just make __binary_start equal to the very + * beginning of the executable, and if there are unmapped pages after this, + * they are forever unusable. + */ + __binary_start = START; + + . = START + SIZEOF_HEADERS; + . = ALIGN(PAGE_SIZE); + + _text = .; + INIT_TEXT_SECTION(0) + + .text : + { + _stext = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.fixup) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + *(.gnu.linkonce.t*) + } + + . = ALIGN(PAGE_SIZE); + .syscall_stub : { + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + } + + /* + * These are needed even in a static link, even if they wind up being empty. + * Newer glibc needs these __rel{,a}_iplt_{start,end} symbols. + */ + .rel.plt : { + *(.rel.plt) + PROVIDE_HIDDEN(__rel_iplt_start = .); + *(.rel.iplt) + PROVIDE_HIDDEN(__rel_iplt_end = .); + } + .rela.plt : { + *(.rela.plt) + PROVIDE_HIDDEN(__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN(__rela_iplt_end = .); + } + + #include <asm/common.lds.S> + + __init_begin = .; + init.data : { INIT_DATA } + __init_end = .; + + .data : + { + INIT_TASK_DATA(KERNEL_STACK_SIZE) + . = ALIGN(KERNEL_STACK_SIZE); + *(.data..init_irqstack) + DATA_DATA + *(.gnu.linkonce.d*) + CONSTRUCTORS + } + .data1 : { *(.data1) } + .ctors : + { + *(.ctors) + } + .dtors : + { + *(.dtors) + } + + .got : { *(.got.plt) *(.got) } + .eh_frame : { KEEP (*(.eh_frame)) } + .dynamic : { *(.dynamic) } + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { *(.sdata) } + _edata = .; + PROVIDE (edata = .); + . = ALIGN(PAGE_SIZE); + __bss_start = .; + PROVIDE(_bss_start = .); + SBSS(0) + BSS(0) + __bss_stop = .; + _end = .; + PROVIDE (end = .); + + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS + + DISCARDS +} diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S new file mode 100644 index 0000000000..53d719c04b --- /dev/null +++ b/arch/um/kernel/vmlinux.lds.S @@ -0,0 +1,8 @@ +#define RUNTIME_DISCARD_EXIT +KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER); + +#ifdef CONFIG_LD_SCRIPT_STATIC +#include "uml.lds.S" +#else +#include "dyn.lds.S" +#endif diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile new file mode 100644 index 0000000000..544e0b344c --- /dev/null +++ b/arch/um/os-Linux/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# + +# Don't instrument UML-specific code +KCOV_INSTRUMENT := n + +obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ + registers.o sigio.o signal.o start_up.o time.o tty.o \ + umid.o user_syms.o util.o drivers/ skas/ + +CFLAGS_signal.o += -Wframe-larger-than=4096 + +obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o + +USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ + main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ + tty.o umid.o util.o + +include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile new file mode 100644 index 0000000000..cf2d75bb18 --- /dev/null +++ b/arch/um/os-Linux/drivers/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) +# + +ethertap-objs := ethertap_kern.o ethertap_user.o +tuntap-objs := tuntap_kern.o tuntap_user.o + +obj-y = +obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o +obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o + +include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h new file mode 100644 index 0000000000..a475259f90 --- /dev/null +++ b/arch/um/os-Linux/drivers/etap.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __DRIVERS_ETAP_H +#define __DRIVERS_ETAP_H + +#include <net_user.h> + +struct ethertap_data { + char *dev_name; + char *gate_addr; + int data_fd; + int control_fd; + void *dev; +}; + +extern const struct net_user_info ethertap_user_info; + +#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c new file mode 100644 index 0000000000..3182e759d8 --- /dev/null +++ b/arch/um/os-Linux/drivers/ethertap_kern.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by various other people who didn't put their name here. + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include "etap.h" +#include <net_kern.h> + +struct ethertap_init { + char *dev_name; + char *gate_addr; +}; + +static void etap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct ethertap_data *epri; + struct ethertap_init *init = data; + + pri = netdev_priv(dev); + epri = (struct ethertap_data *) pri->user; + epri->dev_name = init->dev_name; + epri->gate_addr = init->gate_addr; + epri->data_fd = -1; + epri->control_fd = -1; + epri->dev = dev; + + printk(KERN_INFO "ethertap backend - %s", epri->dev_name); + if (epri->gate_addr != NULL) + printk(KERN_CONT ", IP = %s", epri->gate_addr); + printk(KERN_CONT "\n"); +} + +static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + int len; + + len = net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); + if (len <= 0) + return(len); + + skb_pull(skb, 2); + len -= 2; + return len; +} + +static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + skb_push(skb, 2); + return net_send(fd, skb->data, skb->len); +} + +const struct net_kern_info ethertap_kern_info = { + .init = etap_init, + .protocol = eth_protocol, + .read = etap_read, + .write = etap_write, +}; + +int ethertap_setup(char *str, char **mac_out, void *data) +{ + struct ethertap_init *init = data; + + *init = ((struct ethertap_init) + { .dev_name = NULL, + .gate_addr = NULL }); + if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, + &init->gate_addr)) + return 0; + if (init->dev_name == NULL) { + printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); + return 0; + } + + return 1; +} + +static struct transport ethertap_transport = { + .list = LIST_HEAD_INIT(ethertap_transport.list), + .name = "ethertap", + .setup = ethertap_setup, + .user = ðertap_user_info, + .kern = ðertap_kern_info, + .private_size = sizeof(struct ethertap_data), + .setup_size = sizeof(struct ethertap_init), +}; + +static int register_ethertap(void) +{ + register_transport(ðertap_transport); + return 0; +} + +late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c new file mode 100644 index 0000000000..9483021d86 --- /dev/null +++ b/arch/um/os-Linux/drivers/ethertap_user.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include "etap.h" +#include <os.h> +#include <net_user.h> +#include <um_malloc.h> + +#define MAX_PACKET ETH_MAX_PACKET + +static int etap_user_init(void *data, void *dev) +{ + struct ethertap_data *pri = data; + + pri->dev = dev; + return 0; +} + +struct addr_change { + enum { ADD_ADDR, DEL_ADDR } what; + unsigned char addr[4]; + unsigned char netmask[4]; +}; + +static void etap_change(int op, unsigned char *addr, unsigned char *netmask, + int fd) +{ + struct addr_change change; + char *output; + int n; + + change.what = op; + memcpy(change.addr, addr, sizeof(change.addr)); + memcpy(change.netmask, netmask, sizeof(change.netmask)); + CATCH_EINTR(n = write(fd, &change, sizeof(change))); + if (n != sizeof(change)) { + printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", + errno); + return; + } + + output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); + if (output == NULL) + printk(UM_KERN_ERR "etap_change : Failed to allocate output " + "buffer\n"); + read_output(fd, output, UM_KERN_PAGE_SIZE); + if (output != NULL) { + printk("%s", output); + kfree(output); + } +} + +static void etap_open_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); +} + +static void etap_close_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); +} + +struct etap_pre_exec_data { + int control_remote; + int control_me; + int data_me; +}; + +static void etap_pre_exec(void *arg) +{ + struct etap_pre_exec_data *data = arg; + + dup2(data->control_remote, 1); + close(data->data_me); + close(data->control_me); +} + +static int etap_tramp(char *dev, char *gate, int control_me, + int control_remote, int data_me, int data_remote) +{ + struct etap_pre_exec_data pe_data; + int pid, err, n; + char version_buf[sizeof("nnnnn\0")]; + char data_fd_buf[sizeof("nnnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, + data_fd_buf, gate_buf, NULL }; + char *nosetup_args[] = { "uml_net", version_buf, "ethertap", + dev, data_fd_buf, NULL }; + char **args, c; + + sprintf(data_fd_buf, "%d", data_remote); + sprintf(version_buf, "%d", UML_NET_VERSION); + if (gate != NULL) { + strncpy(gate_buf, gate, 15); + args = setup_args; + } + else args = nosetup_args; + + err = 0; + pe_data.control_remote = control_remote; + pe_data.control_me = control_me; + pe_data.data_me = data_me; + pid = run_helper(etap_pre_exec, &pe_data, args); + + if (pid < 0) + err = pid; + close(data_remote); + close(control_remote); + CATCH_EINTR(n = read(control_me, &c, sizeof(c))); + if (n != sizeof(c)) { + err = -errno; + printk(UM_KERN_ERR "etap_tramp : read of status failed, " + "err = %d\n", -err); + return err; + } + if (c != 1) { + printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); + err = helper_wait(pid); + } + return err; +} + +static int etap_open(void *data) +{ + struct ethertap_data *pri = data; + char *output; + int data_fds[2], control_fds[2], err, output_len; + + err = tap_open_common(pri->dev, pri->gate_addr); + if (err) + return err; + + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - data socketpair failed - " + "err = %d\n", errno); + return err; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - control socketpair failed - " + "err = %d\n", errno); + goto out_close_data; + } + + err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], + control_fds[1], data_fds[0], data_fds[1]); + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + read_output(control_fds[0], output, output_len); + + if (output == NULL) + printk(UM_KERN_ERR "etap_open : failed to allocate output " + "buffer\n"); + else { + printk("%s", output); + kfree(output); + } + + if (err < 0) { + printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); + goto out_close_control; + } + + pri->data_fd = data_fds[0]; + pri->control_fd = control_fds[0]; + iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); + return data_fds[0]; + +out_close_control: + close(control_fds[0]); + close(control_fds[1]); +out_close_data: + close(data_fds[0]); + close(data_fds[1]); + return err; +} + +static void etap_close(int fd, void *data) +{ + struct ethertap_data *pri = data; + + iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); + close(fd); + + if (shutdown(pri->data_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " + "errno = %d\n", errno); + + if (shutdown(pri->control_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown control socket " + "failed, errno = %d\n", errno); + + close(pri->data_fd); + pri->data_fd = -1; + close(pri->control_fd); + pri->control_fd = -1; +} + +static void etap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + tap_check_ips(pri->gate_addr, addr); + if (pri->control_fd == -1) + return; + etap_open_addr(addr, netmask, &pri->control_fd); +} + +static void etap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + if (pri->control_fd == -1) + return; + + etap_close_addr(addr, netmask, &pri->control_fd); +} + +const struct net_user_info ethertap_user_info = { + .init = etap_user_init, + .open = etap_open, + .close = etap_close, + .remove = NULL, + .add_address = etap_add_addr, + .delete_address = etap_del_addr, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, +}; diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h new file mode 100644 index 0000000000..e364e42abf --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __UM_TUNTAP_H +#define __UM_TUNTAP_H + +#include <net_user.h> + +struct tuntap_data { + char *dev_name; + int fixed_config; + char *gate_addr; + int fd; + void *dev; +}; + +extern const struct net_user_info tuntap_user_info; + +#endif diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c new file mode 100644 index 0000000000..adcb6717be --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap_kern.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/netdevice.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <asm/errno.h> +#include <net_kern.h> +#include "tuntap.h" + +struct tuntap_init { + char *dev_name; + char *gate_addr; +}; + +static void tuntap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct tuntap_data *tpri; + struct tuntap_init *init = data; + + pri = netdev_priv(dev); + tpri = (struct tuntap_data *) pri->user; + tpri->dev_name = init->dev_name; + tpri->fixed_config = (init->dev_name != NULL); + tpri->gate_addr = init->gate_addr; + tpri->fd = -1; + tpri->dev = dev; + + printk(KERN_INFO "TUN/TAP backend - "); + if (tpri->gate_addr != NULL) + printk(KERN_CONT "IP = %s", tpri->gate_addr); + printk(KERN_CONT "\n"); +} + +static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_read(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_write(fd, skb->data, skb->len); +} + +const struct net_kern_info tuntap_kern_info = { + .init = tuntap_init, + .protocol = eth_protocol, + .read = tuntap_read, + .write = tuntap_write, +}; + +int tuntap_setup(char *str, char **mac_out, void *data) +{ + struct tuntap_init *init = data; + + *init = ((struct tuntap_init) + { .dev_name = NULL, + .gate_addr = NULL }); + if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, + &init->gate_addr)) + return 0; + + return 1; +} + +static struct transport tuntap_transport = { + .list = LIST_HEAD_INIT(tuntap_transport.list), + .name = "tuntap", + .setup = tuntap_setup, + .user = &tuntap_user_info, + .kern = &tuntap_kern_info, + .private_size = sizeof(struct tuntap_data), + .setup_size = sizeof(struct tuntap_init), +}; + +static int register_tuntap(void) +{ + register_transport(&tuntap_transport); + return 0; +} + +late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c new file mode 100644 index 0000000000..2284e9c1cb --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include <kern_util.h> +#include <os.h> +#include "tuntap.h" + +static int tuntap_user_init(void *data, void *dev) +{ + struct tuntap_data *pri = data; + + pri->dev = dev; + return 0; +} + +static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + tap_check_ips(pri->gate_addr, addr); + if ((pri->fd == -1) || pri->fixed_config) + return; + open_addr(addr, netmask, pri->dev_name); +} + +static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + if ((pri->fd == -1) || pri->fixed_config) + return; + close_addr(addr, netmask, pri->dev_name); +} + +struct tuntap_pre_exec_data { + int stdout_fd; + int close_me; +}; + +static void tuntap_pre_exec(void *arg) +{ + struct tuntap_pre_exec_data *data = arg; + + dup2(data->stdout_fd, 1); + close(data->close_me); +} + +static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, + char *buffer, int buffer_len, int *used_out) +{ + struct tuntap_pre_exec_data data; + char version_buf[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, + NULL }; + char buf[CMSG_SPACE(sizeof(*fd_out))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + int pid, n, err; + + sprintf(version_buf, "%d", UML_NET_VERSION); + + data.stdout_fd = remote; + data.close_me = me; + + pid = run_helper(tuntap_pre_exec, &data, argv); + + if (pid < 0) + return pid; + + close(remote); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + if (buffer != NULL) { + iov = ((struct iovec) { buffer, buffer_len }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + } + else { + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + } + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + n = recvmsg(me, &msg, 0); + *used_out = n; + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " + "errno = %d\n", errno); + return err; + } + helper_wait(pid); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "message\n"); + return -EINVAL; + } + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "descriptor\n"); + return -EINVAL; + } + *fd_out = ((int *) CMSG_DATA(cmsg))[0]; + os_set_exec_close(*fd_out); + return 0; +} + +static int tuntap_open(void *data) +{ + struct ifreq ifr; + struct tuntap_data *pri = data; + char *output, *buffer; + int err, fds[2], len, used; + + err = tap_open_common(pri->dev, pri->gate_addr); + if (err < 0) + return err; + + if (pri->fixed_config) { + pri->fd = os_open_file("/dev/net/tun", + of_cloexec(of_rdwr(OPENFLAGS())), 0); + if (pri->fd < 0) { + printk(UM_KERN_ERR "Failed to open /dev/net/tun, " + "err = %d\n", -pri->fd); + return pri->fd; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strscpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); + if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { + err = -errno; + printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", + errno); + close(pri->fd); + return err; + } + } + else { + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open : socketpair failed - " + "errno = %d\n", errno); + return err; + } + + buffer = get_output_buffer(&len); + if (buffer != NULL) + len--; + used = 0; + + err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], + fds[1], buffer, len, &used); + + output = buffer; + if (err < 0) { + printk("%s", output); + free_output_buffer(buffer); + printk(UM_KERN_ERR "tuntap_open_tramp failed - " + "err = %d\n", -err); + return err; + } + + pri->dev_name = uml_strdup(buffer); + output += IFNAMSIZ; + printk("%s", output); + free_output_buffer(buffer); + + close(fds[0]); + iter_addresses(pri->dev, open_addr, pri->dev_name); + } + + return pri->fd; +} + +static void tuntap_close(int fd, void *data) +{ + struct tuntap_data *pri = data; + + if (!pri->fixed_config) + iter_addresses(pri->dev, close_addr, pri->dev_name); + close(fd); + pri->fd = -1; +} + +const struct net_user_info tuntap_user_info = { + .init = tuntap_user_init, + .open = tuntap_open, + .close = tuntap_close, + .remove = NULL, + .add_address = tuntap_add_addr, + .delete_address = tuntap_del_addr, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c new file mode 100644 index 0000000000..344ac403fb --- /dev/null +++ b/arch/um/os-Linux/elf_aux.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/um/kernel/elf_aux.c + * + * Scan the ELF auxiliary vector provided by the host to extract + * information about vsyscall-page, etc. + * + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser (bodo.stroesser@fujitsu-siemens.com) + */ +#include <elf.h> +#include <stddef.h> +#include <init.h> +#include <elf_user.h> +#include <mem_user.h> + +typedef Elf32_auxv_t elf_auxv_t; + +/* These are initialized very early in boot and never changed */ +char * elf_aux_platform; +extern long elf_aux_hwcap; +unsigned long vsyscall_ehdr; +unsigned long vsyscall_end; +unsigned long __kernel_vsyscall; + +__init void scan_elf_aux( char **envp) +{ + long page_size = 0; + elf_auxv_t * auxv; + + while ( *envp++ != NULL) ; + + for ( auxv = (elf_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) { + switch ( auxv->a_type ) { + case AT_SYSINFO: + __kernel_vsyscall = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (__kernel_vsyscall < (unsigned long) envp) + __kernel_vsyscall = 0; + break; + case AT_SYSINFO_EHDR: + vsyscall_ehdr = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (vsyscall_ehdr < (unsigned long) envp) + vsyscall_ehdr = 0; + break; + case AT_HWCAP: + elf_aux_hwcap = auxv->a_un.a_val; + break; + case AT_PLATFORM: + /* elf.h removed the pointer elements from + * a_un, so we have to use a_val, which is + * all that's left. + */ + elf_aux_platform = + (char *) (long) auxv->a_un.a_val; + break; + case AT_PAGESZ: + page_size = auxv->a_un.a_val; + break; + } + } + if ( ! __kernel_vsyscall || ! vsyscall_ehdr || + ! elf_aux_hwcap || ! elf_aux_platform || + ! page_size || (vsyscall_ehdr % page_size) ) { + __kernel_vsyscall = 0; + vsyscall_ehdr = 0; + elf_aux_hwcap = 0; + elf_aux_platform = "i586"; + } + else { + vsyscall_end = vsyscall_ehdr + page_size; + } +} diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c new file mode 100644 index 0000000000..c09a5fd5e2 --- /dev/null +++ b/arch/um/os-Linux/execvp.c @@ -0,0 +1,150 @@ +/* Copyright (C) 2006 by Paolo Giarrusso - modified from glibc' execvp.c. + Original copyright notice follows: + + Copyright (C) 1991,92,1995-99,2002,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ +#include <unistd.h> + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <limits.h> + +#ifndef TEST +#include <um_malloc.h> +#else +#include <stdio.h> +#define um_kmalloc malloc +#endif +#include <os.h> + +/* Execute FILE, searching in the `PATH' environment variable if it contains + no slashes, with arguments ARGV and environment from `environ'. */ +int execvp_noalloc(char *buf, const char *file, char *const argv[]) +{ + if (*file == '\0') { + return -ENOENT; + } + + if (strchr (file, '/') != NULL) { + /* Don't search when it contains a slash. */ + execv(file, argv); + } else { + int got_eacces; + size_t len, pathlen; + char *name, *p; + char *path = getenv("PATH"); + if (path == NULL) + path = ":/bin:/usr/bin"; + + len = strlen(file) + 1; + pathlen = strlen(path); + /* Copy the file name at the top. */ + name = memcpy(buf + pathlen + 1, file, len); + /* And add the slash. */ + *--name = '/'; + + got_eacces = 0; + p = path; + do { + char *startp; + + path = p; + //Let's avoid this GNU extension. + //p = strchrnul (path, ':'); + p = strchr(path, ':'); + if (!p) + p = strchr(path, '\0'); + + if (p == path) + /* Two adjacent colons, or a colon at the beginning or the end + of `PATH' means to search the current directory. */ + startp = name + 1; + else + startp = memcpy(name - (p - path), path, p - path); + + /* Try to execute this name. If it works, execv will not return. */ + execv(startp, argv); + + /* + if (errno == ENOEXEC) { + } + */ + + switch (errno) { + case EACCES: + /* Record the we got a `Permission denied' error. If we end + up finding no executable we can use, we want to diagnose + that we did find one but were denied access. */ + got_eacces = 1; + break; + case ENOENT: + case ESTALE: + case ENOTDIR: + /* Those errors indicate the file is missing or not executable + by us, in which case we want to just try the next path + directory. */ + case ENODEV: + case ETIMEDOUT: + /* Some strange filesystems like AFS return even + stranger error numbers. They cannot reasonably mean + anything else so ignore those, too. */ + case ENOEXEC: + /* We won't go searching for the shell + * if it is not executable - the Linux + * kernel already handles this enough, + * for us. */ + break; + + default: + /* Some other error means we found an executable file, but + something went wrong executing it; return the error to our + caller. */ + return -errno; + } + } while (*p++ != '\0'); + + /* We tried every element and none of them worked. */ + if (got_eacces) + /* At least one failure was due to permissions, so report that + error. */ + return -EACCES; + } + + /* Return the error from the last attempt (probably ENOENT). */ + return -errno; +} +#ifdef TEST +int main(int argc, char**argv) +{ + char buf[PATH_MAX]; + int ret; + argc--; + if (!argc) { + os_warn("Not enough arguments\n"); + return 1; + } + argv++; + if (ret = execvp_noalloc(buf, argv[0], argv)) { + errno = -ret; + perror("execvp_noalloc"); + } + return 0; +} +#endif diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c new file mode 100644 index 0000000000..fc4450db59 --- /dev/null +++ b/arch/um/os-Linux/file.c @@ -0,0 +1,707 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <linux/falloc.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/un.h> +#include <sys/types.h> +#include <sys/eventfd.h> +#include <poll.h> +#include <os.h> + +static void copy_stat(struct uml_stat *dst, const struct stat64 *src) +{ + *dst = ((struct uml_stat) { + .ust_dev = src->st_dev, /* device */ + .ust_ino = src->st_ino, /* inode */ + .ust_mode = src->st_mode, /* protection */ + .ust_nlink = src->st_nlink, /* number of hard links */ + .ust_uid = src->st_uid, /* user ID of owner */ + .ust_gid = src->st_gid, /* group ID of owner */ + .ust_size = src->st_size, /* total size, in bytes */ + .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ + .ust_blocks = src->st_blocks, /* number of blocks allocated */ + .ust_atime = src->st_atime, /* time of last access */ + .ust_mtime = src->st_mtime, /* time of last modification */ + .ust_ctime = src->st_ctime, /* time of last change */ + }); +} + +int os_stat_fd(const int fd, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + CATCH_EINTR(err = fstat64(fd, &sbuf)); + if (err < 0) + return -errno; + + if (ubuf != NULL) + copy_stat(ubuf, &sbuf); + return err; +} + +int os_stat_file(const char *file_name, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + CATCH_EINTR(err = stat64(file_name, &sbuf)); + if (err < 0) + return -errno; + + if (ubuf != NULL) + copy_stat(ubuf, &sbuf); + return err; +} + +int os_access(const char *file, int mode) +{ + int amode, err; + + amode = (mode & OS_ACC_R_OK ? R_OK : 0) | + (mode & OS_ACC_W_OK ? W_OK : 0) | + (mode & OS_ACC_X_OK ? X_OK : 0) | + (mode & OS_ACC_F_OK ? F_OK : 0); + + err = access(file, amode); + if (err < 0) + return -errno; + + return 0; +} + +/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ +int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) +{ + int err; + + err = ioctl(fd, cmd, arg); + if (err < 0) + return -errno; + + return err; +} + +/* FIXME: ensure namebuf in os_get_if_name is big enough */ +int os_get_ifname(int fd, char* namebuf) +{ + if (ioctl(fd, SIOCGIFNAME, namebuf) < 0) + return -errno; + + return 0; +} + +int os_set_slip(int fd) +{ + int disc, sencap; + + disc = N_SLIP; + if (ioctl(fd, TIOCSETD, &disc) < 0) + return -errno; + + sencap = 0; + if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) + return -errno; + + return 0; +} + +int os_mode_fd(int fd, int mode) +{ + int err; + + CATCH_EINTR(err = fchmod(fd, mode)); + if (err < 0) + return -errno; + + return 0; +} + +int os_file_type(char *file) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) + return err; + + if (S_ISDIR(buf.ust_mode)) + return OS_TYPE_DIR; + else if (S_ISLNK(buf.ust_mode)) + return OS_TYPE_SYMLINK; + else if (S_ISCHR(buf.ust_mode)) + return OS_TYPE_CHARDEV; + else if (S_ISBLK(buf.ust_mode)) + return OS_TYPE_BLOCKDEV; + else if (S_ISFIFO(buf.ust_mode)) + return OS_TYPE_FIFO; + else if (S_ISSOCK(buf.ust_mode)) + return OS_TYPE_SOCK; + else return OS_TYPE_FILE; +} + +int os_file_mode(const char *file, struct openflags *mode_out) +{ + int err; + + *mode_out = OPENFLAGS(); + + err = access(file, W_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_write(*mode_out); + + err = access(file, R_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_read(*mode_out); + + return err; +} + +int os_open_file(const char *file, struct openflags flags, int mode) +{ + int fd, err, f = 0; + + if (flags.r && flags.w) + f = O_RDWR; + else if (flags.r) + f = O_RDONLY; + else if (flags.w) + f = O_WRONLY; + else f = 0; + + if (flags.s) + f |= O_SYNC; + if (flags.c) + f |= O_CREAT; + if (flags.t) + f |= O_TRUNC; + if (flags.e) + f |= O_EXCL; + if (flags.a) + f |= O_APPEND; + + fd = open64(file, f, mode); + if (fd < 0) + return -errno; + + if (flags.cl && fcntl(fd, F_SETFD, 1)) { + err = -errno; + close(fd); + return err; + } + + return fd; +} + +int os_connect_socket(const char *name) +{ + struct sockaddr_un sock; + int fd, err; + + sock.sun_family = AF_UNIX; + snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + err = -errno; + goto out; + } + + err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); + if (err) { + err = -errno; + goto out_close; + } + + return fd; + +out_close: + close(fd); +out: + return err; +} + +void os_close_file(int fd) +{ + close(fd); +} +int os_fsync_file(int fd) +{ + if (fsync(fd) < 0) + return -errno; + return 0; +} + +int os_seek_file(int fd, unsigned long long offset) +{ + unsigned long long actual; + + actual = lseek64(fd, offset, SEEK_SET); + if (actual != offset) + return -errno; + return 0; +} + +int os_read_file(int fd, void *buf, int len) +{ + int n = read(fd, buf, len); + + if (n < 0) + return -errno; + return n; +} + +int os_pread_file(int fd, void *buf, int len, unsigned long long offset) +{ + int n = pread(fd, buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + +int os_write_file(int fd, const void *buf, int len) +{ + int n = write(fd, (void *) buf, len); + + if (n < 0) + return -errno; + return n; +} + +int os_sync_file(int fd) +{ + int n = fdatasync(fd); + + if (n < 0) + return -errno; + return n; +} + +int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset) +{ + int n = pwrite(fd, (void *) buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + + +int os_file_size(const char *file, unsigned long long *size_out) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; + } + + if (S_ISBLK(buf.ust_mode)) { + int fd; + long blocks; + + fd = open(file, O_RDONLY, 0); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't open \"%s\", " + "errno = %d\n", file, errno); + return err; + } + if (ioctl(fd, BLKGETSIZE, &blocks) < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't get the block size of " + "\"%s\", errno = %d\n", file, errno); + close(fd); + return err; + } + *size_out = ((long long) blocks) * 512; + close(fd); + } + else *size_out = buf.ust_size; + + return 0; +} + +int os_file_modtime(const char *file, long long *modtime) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; + } + + *modtime = buf.ust_mtime; + return 0; +} + +int os_set_exec_close(int fd) +{ + int err; + + CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC)); + + if (err < 0) + return -errno; + return err; +} + +int os_pipe(int *fds, int stream, int close_on_exec) +{ + int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; + + err = socketpair(AF_UNIX, type, 0, fds); + if (err < 0) + return -errno; + + if (!close_on_exec) + return 0; + + err = os_set_exec_close(fds[0]); + if (err < 0) + goto error; + + err = os_set_exec_close(fds[1]); + if (err < 0) + goto error; + + return 0; + + error: + printk(UM_KERN_ERR "os_pipe : Setting FD_CLOEXEC failed, err = %d\n", + -err); + close(fds[1]); + close(fds[0]); + return err; +} + +int os_set_fd_async(int fd) +{ + int err, flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + flags |= O_ASYNC | O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { + err = -errno; + printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC " + "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); + return err; + } + + if ((fcntl(fd, F_SETSIG, SIGIO) < 0) || + (fcntl(fd, F_SETOWN, os_getpid()) < 0)) { + err = -errno; + printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN " + "(or F_SETSIG) fd %d, errno = %d\n", fd, errno); + return err; + } + + return 0; +} + +int os_clear_fd_async(int fd) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + flags &= ~(O_ASYNC | O_NONBLOCK); + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + return 0; +} + +int os_set_fd_block(int fd, int blocking) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + if (blocking) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + + return 0; +} + +int os_accept_connection(int fd) +{ + int new; + + new = accept(fd, NULL, 0); + if (new < 0) + return -errno; + return new; +} + +#ifndef SHUT_RD +#define SHUT_RD 0 +#endif + +#ifndef SHUT_WR +#define SHUT_WR 1 +#endif + +#ifndef SHUT_RDWR +#define SHUT_RDWR 2 +#endif + +int os_shutdown_socket(int fd, int r, int w) +{ + int what, err; + + if (r && w) + what = SHUT_RDWR; + else if (r) + what = SHUT_RD; + else if (w) + what = SHUT_WR; + else + return -EINVAL; + + err = shutdown(fd, what); + if (err < 0) + return -errno; + return 0; +} + +int os_rcv_fd(int fd, int *helper_pid_out) +{ + int new, n; + char buf[CMSG_SPACE(sizeof(new))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + iov = ((struct iovec) { .iov_base = helper_pid_out, + .iov_len = sizeof(*helper_pid_out) }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + + n = recvmsg(fd, &msg, 0); + if (n < 0) + return -errno; + else if (n != iov.iov_len) + *helper_pid_out = -1; + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + printk(UM_KERN_ERR "rcv_fd didn't receive anything, " + "error = %d\n", errno); + return -1; + } + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n"); + return -1; + } + + new = ((int *) CMSG_DATA(cmsg))[0]; + return new; +} + +int os_create_unix_socket(const char *file, int len, int close_on_exec) +{ + struct sockaddr_un addr; + int sock, err; + + sock = socket(PF_UNIX, SOCK_DGRAM, 0); + if (sock < 0) + return -errno; + + if (close_on_exec) { + err = os_set_exec_close(sock); + if (err < 0) + printk(UM_KERN_ERR "create_unix_socket : " + "close_on_exec failed, err = %d", -err); + } + + addr.sun_family = AF_UNIX; + + snprintf(addr.sun_path, len, "%s", file); + + err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0) + return -errno; + + return sock; +} + +void os_flush_stdout(void) +{ + fflush(stdout); +} + +int os_lock_file(int fd, int excl) +{ + int type = excl ? F_WRLCK : F_RDLCK; + struct flock lock = ((struct flock) { .l_type = type, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 } ); + int err, save; + + err = fcntl(fd, F_SETLK, &lock); + if (!err) + goto out; + + save = -errno; + err = fcntl(fd, F_GETLK, &lock); + if (err) { + err = -errno; + goto out; + } + + printk(UM_KERN_ERR "F_SETLK failed, file already locked by pid %d\n", + lock.l_pid); + err = save; + out: + return err; +} + +unsigned os_major(unsigned long long dev) +{ + return major(dev); +} + +unsigned os_minor(unsigned long long dev) +{ + return minor(dev); +} + +unsigned long long os_makedev(unsigned major, unsigned minor) +{ + return makedev(major, minor); +} + +int os_falloc_punch(int fd, unsigned long long offset, int len) +{ + int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len); + + if (n < 0) + return -errno; + return n; +} + +int os_falloc_zeroes(int fd, unsigned long long offset, int len) +{ + int n = fallocate(fd, FALLOC_FL_ZERO_RANGE|FALLOC_FL_KEEP_SIZE, offset, len); + + if (n < 0) + return -errno; + return n; +} + +int os_eventfd(unsigned int initval, int flags) +{ + int fd = eventfd(initval, flags); + + if (fd < 0) + return -errno; + return fd; +} + +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, + unsigned int fds_num) +{ + struct iovec iov = { + .iov_base = (void *) buf, + .iov_len = len, + }; + union { + char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)]; + struct cmsghdr align; + } u; + unsigned int fds_size = sizeof(*fds) * fds_num; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = u.control, + .msg_controllen = CMSG_SPACE(fds_size), + }; + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + int err; + + if (fds_num > OS_SENDMSG_MAX_FDS) + return -EINVAL; + memset(u.control, 0, sizeof(u.control)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(fds_size); + memcpy(CMSG_DATA(cmsg), fds, fds_size); + err = sendmsg(fd, &msg, 0); + + if (err < 0) + return -errno; + return err; +} + +int os_poll(unsigned int n, const int *fds) +{ + /* currently need 2 FDs at most so avoid dynamic allocation */ + struct pollfd pollfds[2] = {}; + unsigned int i; + int ret; + + if (n > ARRAY_SIZE(pollfds)) + return -EINVAL; + + for (i = 0; i < n; i++) { + pollfds[i].fd = fds[i]; + pollfds[i].events = POLLIN; + } + + ret = poll(pollfds, n, -1); + if (ret < 0) + return -errno; + + /* Return the index of the available FD */ + for (i = 0; i < n; i++) { + if (pollfds[i].revents) + return i; + } + + return -EIO; +} diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c new file mode 100644 index 0000000000..b459745f52 --- /dev/null +++ b/arch/um/os-Linux/helper.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <linux/limits.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <kern_util.h> +#include <os.h> +#include <um_malloc.h> + +struct helper_data { + void (*pre_exec)(void*); + void *pre_data; + char **argv; + int fd; + char *buf; +}; + +static int helper_child(void *arg) +{ + struct helper_data *data = arg; + char **argv = data->argv; + int err, ret; + + if (data->pre_exec != NULL) + (*data->pre_exec)(data->pre_data); + err = execvp_noalloc(data->buf, argv[0], argv); + + /* If the exec succeeds, we don't get here */ + CATCH_EINTR(ret = write(data->fd, &err, sizeof(err))); + + return 0; +} + +/* Returns either the pid of the child process we run or -E* on failure. */ +int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) +{ + struct helper_data data; + unsigned long stack, sp; + int pid, fds[2], ret, n; + + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); + if (ret < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : pipe failed, errno = %d\n", + errno); + goto out_free; + } + + ret = os_set_exec_close(fds[1]); + if (ret < 0) { + printk(UM_KERN_ERR "run_helper : setting FD_CLOEXEC failed, " + "ret = %d\n", -ret); + goto out_close; + } + + sp = stack + UM_KERN_PAGE_SIZE; + data.pre_exec = pre_exec; + data.pre_data = pre_data; + data.argv = argv; + data.fd = fds[1]; + data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : + uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + pid = clone(helper_child, (void *) sp, CLONE_VM, &data); + if (pid < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : clone failed, errno = %d\n", + errno); + goto out_free2; + } + + close(fds[1]); + fds[1] = -1; + + /* + * Read the errno value from the child, if the exec failed, or get 0 if + * the exec succeeded because the pipe fd was set as close-on-exec. + */ + n = read(fds[0], &ret, sizeof(ret)); + if (n == 0) { + ret = pid; + } else { + if (n < 0) { + n = -errno; + printk(UM_KERN_ERR "run_helper : read on pipe failed, " + "ret = %d\n", -n); + ret = n; + } + CATCH_EINTR(waitpid(pid, NULL, __WALL)); + } + + if (ret < 0) + printk(UM_KERN_ERR "run_helper : failed to exec %s on host: %s\n", + argv[0], strerror(-ret)); + +out_free2: + kfree(data.buf); +out_close: + if (fds[1] != -1) + close(fds[1]); + close(fds[0]); +out_free: + free_stack(stack, 0); + return ret; +} + +int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, + unsigned long *stack_out) +{ + unsigned long stack, sp; + int pid, status, err; + + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; + + sp = stack + UM_KERN_PAGE_SIZE; + pid = clone(proc, (void *) sp, flags, arg); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "run_helper_thread : clone failed, " + "errno = %d\n", errno); + return err; + } + if (stack_out == NULL) { + CATCH_EINTR(pid = waitpid(pid, &status, __WALL)); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "run_helper_thread - wait failed, " + "errno = %d\n", errno); + pid = err; + } + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) + printk(UM_KERN_ERR "run_helper_thread - thread " + "returned status 0x%x\n", status); + free_stack(stack, 0); + } else + *stack_out = stack; + return pid; +} + +int helper_wait(int pid) +{ + int ret, status; + int wflags = __WALL; + + CATCH_EINTR(ret = waitpid(pid, &status, wflags)); + if (ret < 0) { + printk(UM_KERN_ERR "helper_wait : waitpid process %d failed, " + "errno = %d\n", pid, errno); + return -errno; + } else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + printk(UM_KERN_ERR "helper_wait : process %d exited with " + "status 0x%x\n", pid, status); + return -ECHILD; + } else + return 0; +} diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c new file mode 100644 index 0000000000..cf7e49c08b --- /dev/null +++ b/arch/um/os-Linux/irq.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 - Cambridge Greys Ltd + * Copyright (C) 2011 - 2014 Cisco Systems Inc + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdlib.h> +#include <errno.h> +#include <sys/epoll.h> +#include <signal.h> +#include <string.h> +#include <irq_user.h> +#include <os.h> +#include <um_malloc.h> + +/* Epoll support */ + +static int epollfd = -1; + +#define MAX_EPOLL_EVENTS 64 + +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; + +/* Helper to return an Epoll data pointer from an epoll event structure. + * We need to keep this one on the userspace side to keep includes separate + */ + +void *os_epoll_get_data_pointer(int index) +{ + return epoll_events[index].data.ptr; +} + +/* Helper to compare events versus the events in the epoll structure. + * Same as above - needs to be on the userspace side + */ + + +int os_epoll_triggered(int index, int events) +{ + return epoll_events[index].events & events; +} +/* Helper to set the event mask. + * The event mask is opaque to the kernel side, because it does not have + * access to the right includes/defines for EPOLL constants. + */ + +int os_event_mask(enum um_irq_type irq_type) +{ + if (irq_type == IRQ_READ) + return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP; + if (irq_type == IRQ_WRITE) + return EPOLLOUT; + return 0; +} + +/* + * Initial Epoll Setup + */ +int os_setup_epoll(void) +{ + epollfd = epoll_create(MAX_EPOLL_EVENTS); + return epollfd; +} + +/* + * Helper to run the actual epoll_wait + */ +int os_waiting_for_events_epoll(void) +{ + int n, err; + + n = epoll_wait(epollfd, + (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0); + if (n < 0) { + err = -errno; + if (errno != EINTR) + printk( + UM_KERN_ERR "os_waiting_for_events:" + " epoll returned %d, error = %s\n", n, + strerror(errno) + ); + return err; + } + return n; +} + + +/* + * Helper to add a fd to epoll + */ +int os_add_epoll_fd(int events, int fd, void *data) +{ + struct epoll_event event; + int result; + + event.data.ptr = data; + event.events = events | EPOLLET; + result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event); + if ((result) && (errno == EEXIST)) + result = os_mod_epoll_fd(events, fd, data); + if (result) + printk("epollctl add err fd %d, %s\n", fd, strerror(errno)); + return result; +} + +/* + * Helper to mod the fd event mask and/or data backreference + */ +int os_mod_epoll_fd(int events, int fd, void *data) +{ + struct epoll_event event; + int result; + + event.data.ptr = data; + event.events = events; + result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event); + if (result) + printk(UM_KERN_ERR + "epollctl mod err fd %d, %s\n", fd, strerror(errno)); + return result; +} + +/* + * Helper to delete the epoll fd + */ +int os_del_epoll_fd(int fd) +{ + struct epoll_event event; + /* This is quiet as we use this as IO ON/OFF - so it is often + * invoked on a non-existent fd + */ + return epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event); +} + +void os_set_ioignore(void) +{ + signal(SIGIO, SIG_IGN); +} + +void os_close_epoll_fd(void) +{ + /* Needed so we do not leak an fd when rebooting */ + os_close_file(epollfd); +} diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c new file mode 100644 index 0000000000..c8a42ecbd7 --- /dev/null +++ b/arch/um/os-Linux/main.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <sys/resource.h> +#include <as-layout.h> +#include <init.h> +#include <kern_util.h> +#include <os.h> +#include <um_malloc.h> + +#define PGD_BOUND (4 * 1024 * 1024) +#define STACKSIZE (8 * 1024 * 1024) +#define THREAD_NAME_LEN (256) + +long elf_aux_hwcap; + +static void set_stklim(void) +{ + struct rlimit lim; + + if (getrlimit(RLIMIT_STACK, &lim) < 0) { + perror("getrlimit"); + exit(1); + } + if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { + lim.rlim_cur = STACKSIZE; + if (setrlimit(RLIMIT_STACK, &lim) < 0) { + perror("setrlimit"); + exit(1); + } + } +} + +static void last_ditch_exit(int sig) +{ + uml_cleanup(); + exit(1); +} + +static void install_fatal_handler(int sig) +{ + struct sigaction action; + + /* All signals are enabled in this handler ... */ + sigemptyset(&action.sa_mask); + + /* + * ... including the signal being handled, plus we want the + * handler reset to the default behavior, so that if an exit + * handler is hanging for some reason, the UML will just die + * after this signal is sent a second time. + */ + action.sa_flags = SA_RESETHAND | SA_NODEFER; + action.sa_restorer = NULL; + action.sa_handler = last_ditch_exit; + if (sigaction(sig, &action, NULL) < 0) { + os_warn("failed to install handler for signal %d " + "- errno = %d\n", sig, errno); + exit(1); + } +} + +#define UML_LIB_PATH ":" OS_LIB_PATH "/uml" + +static void setup_env_path(void) +{ + char *new_path = NULL; + char *old_path = NULL; + int path_len = 0; + + old_path = getenv("PATH"); + /* + * if no PATH variable is set or it has an empty value + * just use the default + /usr/lib/uml + */ + if (!old_path || (path_len = strlen(old_path)) == 0) { + if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) + perror("couldn't putenv"); + return; + } + + /* append /usr/lib/uml to the existing path */ + path_len += strlen("PATH=" UML_LIB_PATH) + 1; + new_path = malloc(path_len); + if (!new_path) { + perror("couldn't malloc to set a new PATH"); + return; + } + snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); + if (putenv(new_path)) { + perror("couldn't putenv to set a new PATH"); + free(new_path); + } +} + +extern void scan_elf_aux( char **envp); + +int __init main(int argc, char **argv, char **envp) +{ + char **new_argv; + int ret, i, err; + + set_stklim(); + + setup_env_path(); + + setsid(); + + new_argv = malloc((argc + 1) * sizeof(char *)); + if (new_argv == NULL) { + perror("Mallocing argv"); + exit(1); + } + for (i = 0; i < argc; i++) { + new_argv[i] = strdup(argv[i]); + if (new_argv[i] == NULL) { + perror("Mallocing an arg"); + exit(1); + } + } + new_argv[argc] = NULL; + + /* + * Allow these signals to bring down a UML if all other + * methods of control fail. + */ + install_fatal_handler(SIGINT); + install_fatal_handler(SIGTERM); + +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA + scan_elf_aux(envp); +#endif + + change_sig(SIGPIPE, 0); + ret = linux_main(argc, argv); + + /* + * Disable SIGPROF - I have no idea why libc doesn't do this or turn + * off the profiling time, but UML dies with a SIGPROF just before + * exiting when profiling is active. + */ + change_sig(SIGPROF, 0); + + /* + * This signal stuff used to be in the reboot case. However, + * sometimes a timer signal can come in when we're halting (reproducably + * when writing out gcov information, presumably because that takes + * some time) and cause a segfault. + */ + + /* stop timers and set timer signal to be ignored */ + os_timer_disable(); + + /* disable SIGIO for the fds and set SIGIO to be ignored */ + err = deactivate_all_fds(); + if (err) + os_warn("deactivate_all_fds failed, errno = %d\n", -err); + + /* + * Let any pending signals fire now. This ensures + * that they won't be delivered after the exec, when + * they are definitely not expected. + */ + unblock_signals(); + + os_info("\n"); + /* Reboot */ + if (ret) { + execvp(new_argv[0], new_argv); + perror("Failed to exec kernel"); + ret = 1; + } + return uml_exitcode; +} + +extern void *__real_malloc(int); + +void *__wrap_malloc(int size) +{ + void *ret; + + if (!kmalloc_ok) + return __real_malloc(size); + else if (size <= UM_KERN_PAGE_SIZE) + /* finding contiguous pages can be hard*/ + ret = uml_kmalloc(size, UM_GFP_KERNEL); + else ret = vmalloc(size); + + /* + * glibc people insist that if malloc fails, errno should be + * set by malloc as well. So we do. + */ + if (ret == NULL) + errno = ENOMEM; + + return ret; +} + +void *__wrap_calloc(int n, int size) +{ + void *ptr = __wrap_malloc(n * size); + + if (ptr == NULL) + return NULL; + memset(ptr, 0, n * size); + return ptr; +} + +extern void __real_free(void *); + +extern unsigned long high_physmem; + +void __wrap_free(void *ptr) +{ + unsigned long addr = (unsigned long) ptr; + + /* + * We need to know how the allocation happened, so it can be correctly + * freed. This is done by seeing what region of memory the pointer is + * in - + * physical memory - kmalloc/kfree + * kernel virtual memory - vmalloc/vfree + * anywhere else - malloc/free + * If kmalloc is not yet possible, then either high_physmem and/or + * end_vm are still 0 (as at startup), in which case we call free, or + * we have set them, but anyway addr has not been allocated from those + * areas. So, in both cases __real_free is called. + * + * CAN_KMALLOC is checked because it would be bad to free a buffer + * with kmalloc/vmalloc after they have been turned off during + * shutdown. + * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so + * there is a possibility for memory leaks. + */ + + if ((addr >= uml_physmem) && (addr < high_physmem)) { + if (kmalloc_ok) + kfree(ptr); + } + else if ((addr >= start_vm) && (addr < end_vm)) { + if (kmalloc_ok) + vfree(ptr); + } + else __real_free(ptr); +} diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c new file mode 100644 index 0000000000..8530b2e086 --- /dev/null +++ b/arch/um/os-Linux/mem.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/vfs.h> +#include <linux/magic.h> +#include <init.h> +#include <os.h> + +/* + * kasan_map_memory - maps memory from @start with a size of @len. + * The allocated memory is filled with zeroes upon success. + * @start: the start address of the memory to be mapped + * @len: the length of the memory to be mapped + * + * This function is used to map shadow memory for KASAN in uml + */ +void kasan_map_memory(void *start, size_t len) +{ + if (mmap(start, + len, + PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, + -1, + 0) == MAP_FAILED) { + os_info("Couldn't allocate shadow memory: %s\n.", + strerror(errno)); + exit(1); + } +} + +/* Set by make_tempfile() during early boot. */ +static char *tempdir = NULL; + +/* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */ +static int __init check_tmpfs(const char *dir) +{ + struct statfs st; + + os_info("Checking if %s is on tmpfs...", dir); + if (statfs(dir, &st) < 0) { + os_info("%s\n", strerror(errno)); + } else if (st.f_type != TMPFS_MAGIC) { + os_info("no\n"); + } else { + os_info("OK\n"); + return 0; + } + return -1; +} + +/* + * Choose the tempdir to use. We want something on tmpfs so that our memory is + * not subject to the host's vm.dirty_ratio. If a tempdir is specified in the + * environment, we use that even if it's not on tmpfs, but we warn the user. + * Otherwise, we try common tmpfs locations, and if no tmpfs directory is found + * then we fall back to /tmp. + */ +static char * __init choose_tempdir(void) +{ + static const char * const vars[] = { + "TMPDIR", + "TMP", + "TEMP", + NULL + }; + static const char fallback_dir[] = "/tmp"; + static const char * const tmpfs_dirs[] = { + "/dev/shm", + fallback_dir, + NULL + }; + int i; + const char *dir; + + os_info("Checking environment variables for a tempdir..."); + for (i = 0; vars[i]; i++) { + dir = getenv(vars[i]); + if ((dir != NULL) && (*dir != '\0')) { + os_info("%s\n", dir); + if (check_tmpfs(dir) >= 0) + goto done; + else + goto warn; + } + } + os_info("none found\n"); + + for (i = 0; tmpfs_dirs[i]; i++) { + dir = tmpfs_dirs[i]; + if (check_tmpfs(dir) >= 0) + goto done; + } + + dir = fallback_dir; +warn: + os_warn("Warning: tempdir %s is not on tmpfs\n", dir); +done: + /* Make a copy since getenv results may not remain valid forever. */ + return strdup(dir); +} + +/* + * Create an unlinked tempfile in a suitable tempdir. template must be the + * basename part of the template with a leading '/'. + */ +static int __init make_tempfile(const char *template) +{ + char *tempname; + int fd; + + if (tempdir == NULL) { + tempdir = choose_tempdir(); + if (tempdir == NULL) { + os_warn("Failed to choose tempdir: %s\n", + strerror(errno)); + return -1; + } + } + +#ifdef O_TMPFILE + fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700); + /* + * If the running system does not support O_TMPFILE flag then retry + * without it. + */ + if (fd != -1 || (errno != EINVAL && errno != EISDIR && + errno != EOPNOTSUPP)) + return fd; +#endif + + tempname = malloc(strlen(tempdir) + strlen(template) + 1); + if (tempname == NULL) + return -1; + + strcpy(tempname, tempdir); + strcat(tempname, template); + fd = mkstemp(tempname); + if (fd < 0) { + os_warn("open - cannot create %s: %s\n", tempname, + strerror(errno)); + goto out; + } + if (unlink(tempname) < 0) { + perror("unlink"); + goto close; + } + free(tempname); + return fd; +close: + close(fd); +out: + free(tempname); + return -1; +} + +#define TEMPNAME_TEMPLATE "/vm_file-XXXXXX" + +static int __init create_tmp_file(unsigned long long len) +{ + int fd, err; + char zero; + + fd = make_tempfile(TEMPNAME_TEMPLATE); + if (fd < 0) + exit(1); + + /* + * Seek to len - 1 because writing a character there will + * increase the file size by one byte, to the desired length. + */ + if (lseek64(fd, len - 1, SEEK_SET) < 0) { + perror("lseek64"); + exit(1); + } + + zero = 0; + + err = write(fd, &zero, 1); + if (err != 1) { + perror("write"); + exit(1); + } + + return fd; +} + +int __init create_mem_file(unsigned long long len) +{ + int err, fd; + + fd = create_tmp_file(len); + + err = os_set_exec_close(fd); + if (err < 0) { + errno = -err; + perror("exec_close"); + } + return fd; +} + +void __init check_tmpexec(void) +{ + void *addr; + int err, fd = create_tmp_file(UM_KERN_PAGE_SIZE); + + addr = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); + os_info("Checking PROT_EXEC mmap in %s...", tempdir); + if (addr == MAP_FAILED) { + err = errno; + os_warn("%s\n", strerror(err)); + close(fd); + if (err == EPERM) + os_warn("%s must be not mounted noexec\n", tempdir); + exit(1); + } + os_info("OK\n"); + munmap(addr, UM_KERN_PAGE_SIZE); + + close(fd); +} diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c new file mode 100644 index 0000000000..e52dd37dda --- /dev/null +++ b/arch/um/os-Linux/process.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include <init.h> +#include <longjmp.h> +#include <os.h> + +#define ARBITRARY_ADDR -1 +#define FAILURE_PID -1 + +#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") +#define COMM_SCANF "%*[^)])" + +unsigned long os_process_pc(int pid) +{ + char proc_stat[STAT_PATH_LEN], buf[256]; + unsigned long pc = ARBITRARY_ADDR; + int fd, err; + + sprintf(proc_stat, "/proc/%d/stat", pid); + fd = open(proc_stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " + "errno = %d\n", proc_stat, errno); + goto out; + } + CATCH_EINTR(err = read(fd, buf, sizeof(buf))); + if (err < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " + "err = %d\n", proc_stat, errno); + goto out_close; + } + os_close_file(fd); + pc = ARBITRARY_ADDR; + if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %lu", &pc) != 1) + printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", + buf); + out_close: + close(fd); + out: + return pc; +} + +int os_process_parent(int pid) +{ + char stat[STAT_PATH_LEN]; + char data[256]; + int parent = FAILURE_PID, n, fd; + + if (pid == -1) + return parent; + + snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); + fd = open(stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, + errno); + return parent; + } + + CATCH_EINTR(n = read(fd, data, sizeof(data))); + close(fd); + + if (n < 0) { + printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, + errno); + return parent; + } + + parent = FAILURE_PID; + n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); + if (n != 1) + printk(UM_KERN_ERR "Failed to scan '%s'\n", data); + + return parent; +} + +void os_alarm_process(int pid) +{ + kill(pid, SIGALRM); +} + +void os_stop_process(int pid) +{ + kill(pid, SIGSTOP); +} + +void os_kill_process(int pid, int reap_child) +{ + kill(pid, SIGKILL); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* Kill off a ptraced child by all means available. kill it normally first, + * then PTRACE_KILL it, then PTRACE_CONT it in case it's in a run state from + * which it can't exit directly. + */ + +void os_kill_ptraced_process(int pid, int reap_child) +{ + kill(pid, SIGKILL); + ptrace(PTRACE_KILL, pid); + ptrace(PTRACE_CONT, pid); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* Don't use the glibc version, which caches the result in TLS. It misses some + * syscalls, and also breaks with clone(), which does not unshare the TLS. + */ + +int os_getpid(void) +{ + return syscall(__NR_getpid); +} + +int os_getpgrp(void) +{ + return getpgrp(); +} + +int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, + int r, int w, int x) +{ + void *loc; + int prot; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, + fd, off); + if (loc == MAP_FAILED) + return -errno; + return 0; +} + +int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) +{ + int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0)); + + if (mprotect(addr, len, prot) < 0) + return -errno; + + return 0; +} + +int os_unmap_memory(void *addr, int len) +{ + int err; + + err = munmap(addr, len); + if (err < 0) + return -errno; + return 0; +} + +#ifndef MADV_REMOVE +#define MADV_REMOVE KERNEL_MADV_REMOVE +#endif + +int os_drop_memory(void *addr, int length) +{ + int err; + + err = madvise(addr, length, MADV_REMOVE); + if (err < 0) + err = -errno; + return err; +} + +int __init can_drop_memory(void) +{ + void *addr; + int fd, ok = 0; + + printk(UM_KERN_INFO "Checking host MADV_REMOVE support..."); + fd = create_mem_file(UM_KERN_PAGE_SIZE); + if (fd < 0) { + printk(UM_KERN_ERR "Creating test memory file failed, " + "err = %d\n", -fd); + goto out; + } + + addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "Mapping test memory file failed, " + "err = %d\n", -errno); + goto out_close; + } + + if (madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0) { + printk(UM_KERN_ERR "MADV_REMOVE failed, err = %d\n", -errno); + goto out_unmap; + } + + printk(UM_KERN_CONT "OK\n"); + ok = 1; + +out_unmap: + munmap(addr, UM_KERN_PAGE_SIZE); +out_close: + close(fd); +out: + return ok; +} + +static int os_page_mincore(void *addr) +{ + char vec[2]; + int ret; + + ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); + if (ret < 0) { + if (errno == ENOMEM || errno == EINVAL) + return 0; + else + return -errno; + } + + return vec[0] & 1; +} + +int os_mincore(void *addr, unsigned long len) +{ + char *vec; + int ret, i; + + if (len <= UM_KERN_PAGE_SIZE) + return os_page_mincore(addr); + + vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); + if (!vec) + return -ENOMEM; + + ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); + if (ret < 0) { + if (errno == ENOMEM || errno == EINVAL) + ret = 0; + else + ret = -errno; + + goto out; + } + + for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) { + if (!(vec[i] & 1)) { + ret = 0; + goto out; + } + } + + ret = 1; +out: + free(vec); + return ret; +} + +void init_new_thread_signals(void) +{ + set_handler(SIGSEGV); + set_handler(SIGTRAP); + set_handler(SIGFPE); + set_handler(SIGILL); + set_handler(SIGBUS); + signal(SIGHUP, SIG_IGN); + set_handler(SIGIO); + signal(SIGWINCH, SIG_IGN); +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c new file mode 100644 index 0000000000..b123955be7 --- /dev/null +++ b/arch/um/os-Linux/registers.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <errno.h> +#include <string.h> +#include <sys/ptrace.h> +#include <sysdep/ptrace.h> +#include <sysdep/ptrace_user.h> +#include <registers.h> + +int save_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +int restore_pid_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +/* This is set once at boot time and not changed thereafter */ + +static unsigned long exec_regs[MAX_REG_NR]; +static unsigned long exec_fp_regs[FP_SIZE]; + +int init_pid_registers(int pid) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); + if (err < 0) + return -errno; + + arch_init_registers(pid); + get_fp_registers(pid, exec_fp_regs); + return 0; +} + +void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) +{ + memcpy(regs, exec_regs, sizeof(exec_regs)); + + if (fp_regs) + memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs)); +} diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c new file mode 100644 index 0000000000..9e71794839 --- /dev/null +++ b/arch/um/os-Linux/sigio.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pty.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <kern_util.h> +#include <init.h> +#include <os.h> +#include <sigio.h> +#include <um_malloc.h> + +/* + * Protected by sigio_lock(), also used by sigio_cleanup, which is an + * exitcall. + */ +static int write_sigio_pid = -1; +static unsigned long write_sigio_stack; + +/* + * These arrays are initialized before the sigio thread is started, and + * the descriptors closed after it is killed. So, it can't see them change. + * On the UML side, they are changed under the sigio_lock. + */ +#define SIGIO_FDS_INIT {-1, -1} + +static int write_sigio_fds[2] = SIGIO_FDS_INIT; +static int sigio_private[2] = SIGIO_FDS_INIT; + +struct pollfds { + struct pollfd *poll; + int size; + int used; +}; + +/* + * Protected by sigio_lock(). Used by the sigio thread, but the UML thread + * synchronizes with it. + */ +static struct pollfds current_poll; +static struct pollfds next_poll; +static struct pollfds all_sigio_fds; + +static int write_sigio_thread(void *unused) +{ + struct pollfds *fds, tmp; + struct pollfd *p; + int i, n, respond_fd; + char c; + + os_fix_helper_signals(); + fds = ¤t_poll; + while (1) { + n = poll(fds->poll, fds->used, -1); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "write_sigio_thread : poll returned " + "%d, errno = %d\n", n, errno); + } + for (i = 0; i < fds->used; i++) { + p = &fds->poll[i]; + if (p->revents == 0) + continue; + if (p->fd == sigio_private[1]) { + CATCH_EINTR(n = read(sigio_private[1], &c, + sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR + "write_sigio_thread : " + "read on socket failed, " + "err = %d\n", errno); + tmp = current_poll; + current_poll = next_poll; + next_poll = tmp; + respond_fd = sigio_private[1]; + } + else { + respond_fd = write_sigio_fds[1]; + fds->used--; + memmove(&fds->poll[i], &fds->poll[i + 1], + (fds->used - i) * sizeof(*fds->poll)); + } + + CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR "write_sigio_thread : " + "write on socket failed, err = %d\n", + errno); + } + } + + return 0; +} + +static int need_poll(struct pollfds *polls, int n) +{ + struct pollfd *new; + + if (n <= polls->size) + return 0; + + new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); + if (new == NULL) { + printk(UM_KERN_ERR "need_poll : failed to allocate new " + "pollfds\n"); + return -ENOMEM; + } + + memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); + kfree(polls->poll); + + polls->poll = new; + polls->size = n; + return 0; +} + +/* + * Must be called with sigio_lock held, because it's needed by the marked + * critical section. + */ +static void update_thread(void) +{ + unsigned long flags; + int n; + char c; + + flags = um_set_signals_trace(0); + CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", + errno); + goto fail; + } + + CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", + errno); + goto fail; + } + + um_set_signals_trace(flags); + return; + fail: + /* Critical section start */ + if (write_sigio_pid != -1) { + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + } + write_sigio_pid = -1; + close(sigio_private[0]); + close(sigio_private[1]); + close(write_sigio_fds[0]); + close(write_sigio_fds[1]); + /* Critical section end */ + um_set_signals_trace(flags); +} + +int __add_sigio_fd(int fd) +{ + struct pollfd *p; + int err, i, n; + + for (i = 0; i < all_sigio_fds.used; i++) { + if (all_sigio_fds.poll[i].fd == fd) + break; + } + if (i == all_sigio_fds.used) + return -ENOSPC; + + p = &all_sigio_fds.poll[i]; + + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + return 0; + } + + n = current_poll.used; + err = need_poll(&next_poll, n + 1); + if (err) + return err; + + memcpy(next_poll.poll, current_poll.poll, + current_poll.used * sizeof(struct pollfd)); + next_poll.poll[n] = *p; + next_poll.used = n + 1; + update_thread(); + + return 0; +} + + +int add_sigio_fd(int fd) +{ + int err; + + sigio_lock(); + err = __add_sigio_fd(fd); + sigio_unlock(); + + return err; +} + +int __ignore_sigio_fd(int fd) +{ + struct pollfd *p; + int err, i, n = 0; + + /* + * This is called from exitcalls elsewhere in UML - if + * sigio_cleanup has already run, then update_thread will hang + * or fail because the thread is no longer running. + */ + if (write_sigio_pid == -1) + return -EIO; + + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + break; + } + if (i == current_poll.used) + return -ENOENT; + + err = need_poll(&next_poll, current_poll.used - 1); + if (err) + return err; + + for (i = 0; i < current_poll.used; i++) { + p = ¤t_poll.poll[i]; + if (p->fd != fd) + next_poll.poll[n++] = *p; + } + next_poll.used = current_poll.used - 1; + + update_thread(); + + return 0; +} + +int ignore_sigio_fd(int fd) +{ + int err; + + sigio_lock(); + err = __ignore_sigio_fd(fd); + sigio_unlock(); + + return err; +} + +static struct pollfd *setup_initial_poll(int fd) +{ + struct pollfd *p; + + p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); + if (p == NULL) { + printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " + "poll\n"); + return NULL; + } + *p = ((struct pollfd) { .fd = fd, + .events = POLLIN, + .revents = 0 }); + return p; +} + +static void write_sigio_workaround(void) +{ + struct pollfd *p; + int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + + /* We call this *tons* of times - and most ones we must just fail. */ + sigio_lock(); + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); + + if (l_write_sigio_pid != -1) + return; + + err = os_pipe(l_write_sigio_fds, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " + "err = %d\n", -err); + return; + } + err = os_pipe(l_sigio_private, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " + "err = %d\n", -err); + goto out_close1; + } + + p = setup_initial_poll(l_sigio_private[1]); + if (!p) + goto out_close2; + + sigio_lock(); + + /* + * Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. + */ + if (write_sigio_pid != -1) + goto out_free; + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + + if (write_sigio_irq(l_write_sigio_fds[0])) + goto out_clear_poll; + + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + CLONE_FILES | CLONE_VM, + &write_sigio_stack); + + if (write_sigio_pid < 0) + goto out_clear; + + sigio_unlock(); + return; + +out_clear: + write_sigio_pid = -1; + write_sigio_fds[0] = -1; + write_sigio_fds[1] = -1; + sigio_private[0] = -1; + sigio_private[1] = -1; +out_clear_poll: + current_poll = ((struct pollfds) { .poll = NULL, + .size = 0, + .used = 0 }); +out_free: + sigio_unlock(); + kfree(p); +out_close2: + close(l_sigio_private[0]); + close(l_sigio_private[1]); +out_close1: + close(l_write_sigio_fds[0]); + close(l_write_sigio_fds[1]); +} + +void sigio_broken(int fd) +{ + int err; + + write_sigio_workaround(); + + sigio_lock(); + err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); + if (err) { + printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " + "for descriptor %d\n", fd); + goto out; + } + + all_sigio_fds.poll[all_sigio_fds.used++] = + ((struct pollfd) { .fd = fd, + .events = POLLIN, + .revents = 0 }); +out: + sigio_unlock(); +} + +/* Changed during early boot */ +static int pty_output_sigio; + +void maybe_sigio_broken(int fd) +{ + if (!isatty(fd)) + return; + + if (pty_output_sigio) + return; + + sigio_broken(fd); +} + +static void sigio_cleanup(void) +{ + if (write_sigio_pid == -1) + return; + + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + write_sigio_pid = -1; +} + +__uml_exitcall(sigio_cleanup); + +/* Used as a flag during SIGIO testing early in boot */ +static int got_sigio; + +static void __init handler(int sig) +{ + got_sigio = 1; +} + +struct openpty_arg { + int master; + int slave; + int err; +}; + +static void openpty_cb(void *arg) +{ + struct openpty_arg *info = arg; + + info->err = 0; + if (openpty(&info->master, &info->slave, NULL, NULL, NULL)) + info->err = -errno; +} + +static int async_pty(int master, int slave) +{ + int flags; + + flags = fcntl(master, F_GETFL); + if (flags < 0) + return -errno; + + if ((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)) + return -errno; + + if ((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) + return -errno; + + return 0; +} + +static void __init check_one_sigio(void (*proc)(int, int)) +{ + struct sigaction old, new; + struct openpty_arg pty = { .master = -1, .slave = -1 }; + int master, slave, err; + + initial_thread_cb(openpty_cb, &pty); + if (pty.err) { + printk(UM_KERN_ERR "check_one_sigio failed, errno = %d\n", + -pty.err); + return; + } + + master = pty.master; + slave = pty.slave; + + if ((master == -1) || (slave == -1)) { + printk(UM_KERN_ERR "check_one_sigio failed to allocate a " + "pty\n"); + return; + } + + /* Not now, but complain so we now where we failed. */ + err = raw(master); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : raw failed, errno = %d\n", + -err); + return; + } + + err = async_pty(master, slave); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigio_async failed, " + "err = %d\n", -err); + return; + } + + if (sigaction(SIGIO, NULL, &old) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 1 failed, " + "errno = %d\n", errno); + return; + } + + new = old; + new.sa_handler = handler; + if (sigaction(SIGIO, &new, NULL) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 2 failed, " + "errno = %d\n", errno); + return; + } + + got_sigio = 0; + (*proc)(master, slave); + + close(master); + close(slave); + + if (sigaction(SIGIO, &old, NULL) < 0) + printk(UM_KERN_ERR "check_one_sigio : sigaction 3 failed, " + "errno = %d\n", errno); +} + +static void tty_output(int master, int slave) +{ + int n; + char buf[512]; + + printk(UM_KERN_INFO "Checking that host ptys support output SIGIO..."); + + memset(buf, 0, sizeof(buf)); + + while (write(master, buf, sizeof(buf)) > 0) ; + if (errno != EAGAIN) + printk(UM_KERN_ERR "tty_output : write failed, errno = %d\n", + errno); + while (((n = read(slave, buf, sizeof(buf))) > 0) && + !({ barrier(); got_sigio; })) + ; + + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_output_sigio = 1; + } else if (n == -EAGAIN) + printk(UM_KERN_CONT "No, enabling workaround\n"); + else + printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n); +} + +static void __init check_sigio(void) +{ + if ((access("/dev/ptmx", R_OK) < 0) && + (access("/dev/ptyp0", R_OK) < 0)) { + printk(UM_KERN_WARNING "No pseudo-terminals available - " + "skipping pty SIGIO check\n"); + return; + } + check_one_sigio(tty_output); +} + +/* Here because it only does the SIGIO testing for now */ +void __init os_check_bugs(void) +{ + check_sigio(); +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c new file mode 100644 index 0000000000..24a403a70a --- /dev/null +++ b/arch/um/os-Linux/signal.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <strings.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <sysdep/mcontext.h> +#include <um_malloc.h> +#include <sys/ucontext.h> +#include <timetravel.h> + +void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { + [SIGTRAP] = relay_signal, + [SIGFPE] = relay_signal, + [SIGILL] = relay_signal, + [SIGWINCH] = winch, + [SIGBUS] = bus_handler, + [SIGSEGV] = segv_handler, + [SIGIO] = sigio_handler, +}; + +static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) +{ + struct uml_pt_regs r; + int save_errno = errno; + + r.is_user = 0; + if (sig == SIGSEGV) { + /* For segfaults, we want the data from the sigcontext. */ + get_regs_from_mc(&r, mc); + GET_FAULTINFO_FROM_MC(r.faultinfo, mc); + } + + /* enable signals if sig isn't IRQ signal */ + if ((sig != SIGIO) && (sig != SIGWINCH)) + unblock_signals_trace(); + + (*sig_info[sig])(sig, si, &r); + + errno = save_errno; +} + +/* + * These are the asynchronous signals. SIGPROF is excluded because we want to + * be able to profile all of UML, not just the non-critical sections. If + * profiling is not thread-safe, then that is not my problem. We can disable + * profiling when SMP is enabled in that case. + */ +#define SIGIO_BIT 0 +#define SIGIO_MASK (1 << SIGIO_BIT) + +#define SIGALRM_BIT 1 +#define SIGALRM_MASK (1 << SIGALRM_BIT) + +int signals_enabled; +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +static int signals_blocked; +#else +#define signals_blocked 0 +#endif +static unsigned int signals_pending; +static unsigned int signals_active = 0; + +void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) +{ + int enabled = signals_enabled; + + if ((signals_blocked || !enabled) && (sig == SIGIO)) { + /* + * In TT_MODE_EXTERNAL, need to still call time-travel + * handlers unless signals are also blocked for the + * external time message processing. This will mark + * signals_pending by itself (only if necessary.) + */ + if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL) + sigio_run_timetravel_handlers(); + else + signals_pending |= SIGIO_MASK; + return; + } + + block_signals_trace(); + + sig_handler_common(sig, si, mc); + + um_set_signals_trace(enabled); +} + +static void timer_real_alarm_handler(mcontext_t *mc) +{ + struct uml_pt_regs regs; + + if (mc != NULL) + get_regs_from_mc(®s, mc); + else + memset(®s, 0, sizeof(regs)); + timer_handler(SIGALRM, NULL, ®s); +} + +void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) +{ + int enabled; + + enabled = signals_enabled; + if (!signals_enabled) { + signals_pending |= SIGALRM_MASK; + return; + } + + block_signals_trace(); + + signals_active |= SIGALRM_MASK; + + timer_real_alarm_handler(mc); + + signals_active &= ~SIGALRM_MASK; + + um_set_signals_trace(enabled); +} + +void deliver_alarm(void) { + timer_alarm_handler(SIGALRM, NULL, NULL); +} + +void timer_set_signal_handler(void) +{ + set_handler(SIGALRM); +} + +void set_sigstack(void *sig_stack, int size) +{ + stack_t stack = { + .ss_flags = 0, + .ss_sp = sig_stack, + .ss_size = size + }; + + if (sigaltstack(&stack, NULL) != 0) + panic("enabling signal stack failed, errno = %d\n", errno); +} + +static void sigusr1_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) +{ + uml_pm_wake(); +} + +void register_pm_wake_signal(void) +{ + set_handler(SIGUSR1); +} + +static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { + [SIGSEGV] = sig_handler, + [SIGBUS] = sig_handler, + [SIGILL] = sig_handler, + [SIGFPE] = sig_handler, + [SIGTRAP] = sig_handler, + + [SIGIO] = sig_handler, + [SIGWINCH] = sig_handler, + [SIGALRM] = timer_alarm_handler, + + [SIGUSR1] = sigusr1_handler, +}; + +static void hard_handler(int sig, siginfo_t *si, void *p) +{ + ucontext_t *uc = p; + mcontext_t *mc = &uc->uc_mcontext; + unsigned long pending = 1UL << sig; + + do { + int nested, bail; + + /* + * pending comes back with one bit set for each + * interrupt that arrived while setting up the stack, + * plus a bit for this interrupt, plus the zero bit is + * set if this is a nested interrupt. + * If bail is true, then we interrupted another + * handler setting up the stack. In this case, we + * have to return, and the upper handler will deal + * with this interrupt. + */ + bail = to_irq_stack(&pending); + if (bail) + return; + + nested = pending & 1; + pending &= ~1; + + while ((sig = ffs(pending)) != 0){ + sig--; + pending &= ~(1 << sig); + (*handlers[sig])(sig, (struct siginfo *)si, mc); + } + + /* + * Again, pending comes back with a mask of signals + * that arrived while tearing down the stack. If this + * is non-zero, we just go back, set up the stack + * again, and handle the new interrupts. + */ + if (!nested) + pending = from_irq_stack(nested); + } while (pending); +} + +void set_handler(int sig) +{ + struct sigaction action; + int flags = SA_SIGINFO | SA_ONSTACK; + sigset_t sig_mask; + + action.sa_sigaction = hard_handler; + + /* block irq ones */ + sigemptyset(&action.sa_mask); + sigaddset(&action.sa_mask, SIGIO); + sigaddset(&action.sa_mask, SIGWINCH); + sigaddset(&action.sa_mask, SIGALRM); + + if (sig == SIGSEGV) + flags |= SA_NODEFER; + + if (sigismember(&action.sa_mask, sig)) + flags |= SA_RESTART; /* if it's an irq signal */ + + action.sa_flags = flags; + action.sa_restorer = NULL; + if (sigaction(sig, &action, NULL) < 0) + panic("sigaction failed - errno = %d\n", errno); + + sigemptyset(&sig_mask); + sigaddset(&sig_mask, sig); + if (sigprocmask(SIG_UNBLOCK, &sig_mask, NULL) < 0) + panic("sigprocmask failed - errno = %d\n", errno); +} + +void send_sigio_to_self(void) +{ + kill(os_getpid(), SIGIO); +} + +int change_sig(int signal, int on) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, signal); + if (sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + return -errno; + + return 0; +} + +void block_signals(void) +{ + signals_enabled = 0; + /* + * This must return with signals disabled, so this barrier + * ensures that writes are flushed out before the return. + * This might matter if gcc figures out how to inline this and + * decides to shuffle this code into the caller. + */ + barrier(); +} + +void unblock_signals(void) +{ + int save_pending; + + if (signals_enabled == 1) + return; + + signals_enabled = 1; +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT + deliver_time_travel_irqs(); +#endif + + /* + * We loop because the IRQ handler returns with interrupts off. So, + * interrupts may have arrived and we need to re-enable them and + * recheck signals_pending. + */ + while (1) { + /* + * Save and reset save_pending after enabling signals. This + * way, signals_pending won't be changed while we're reading it. + * + * Setting signals_enabled and reading signals_pending must + * happen in this order, so have the barrier here. + */ + barrier(); + + save_pending = signals_pending; + if (save_pending == 0) + return; + + signals_pending = 0; + + /* + * We have pending interrupts, so disable signals, as the + * handlers expect them off when they are called. They will + * be enabled again above. We need to trace this, as we're + * expected to be enabling interrupts already, but any more + * tracing that happens inside the handlers we call for the + * pending signals will mess up the tracing state. + */ + signals_enabled = 0; + um_trace_signals_off(); + + /* + * Deal with SIGIO first because the alarm handler might + * schedule, leaving the pending SIGIO stranded until we come + * back here. + * + * SIGIO's handler doesn't use siginfo or mcontext, + * so they can be NULL. + */ + if (save_pending & SIGIO_MASK) + sig_handler_common(SIGIO, NULL, NULL); + + /* Do not reenter the handler */ + + if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) + timer_real_alarm_handler(NULL); + + /* Rerun the loop only if there is still pending SIGIO and not in TIMER handler */ + + if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK)) + return; + + /* Re-enable signals and trace that we're doing so. */ + um_trace_signals_on(); + signals_enabled = 1; + } +} + +int um_set_signals(int enable) +{ + int ret; + if (signals_enabled == enable) + return enable; + + ret = signals_enabled; + if (enable) + unblock_signals(); + else block_signals(); + + return ret; +} + +int um_set_signals_trace(int enable) +{ + int ret; + if (signals_enabled == enable) + return enable; + + ret = signals_enabled; + if (enable) + unblock_signals_trace(); + else + block_signals_trace(); + + return ret; +} + +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +void mark_sigio_pending(void) +{ + signals_pending |= SIGIO_MASK; +} + +void block_signals_hard(void) +{ + if (signals_blocked) + return; + signals_blocked = 1; + barrier(); +} + +void unblock_signals_hard(void) +{ + if (!signals_blocked) + return; + /* Must be set to 0 before we check the pending bits etc. */ + signals_blocked = 0; + barrier(); + + if (signals_pending && signals_enabled) { + /* this is a bit inefficient, but that's not really important */ + block_signals(); + unblock_signals(); + } else if (signals_pending & SIGIO_MASK) { + /* we need to run time-travel handlers even if not enabled */ + sigio_run_timetravel_handlers(); + } +} +#endif + +int os_is_signal_stack(void) +{ + stack_t ss; + sigaltstack(NULL, &ss); + + return ss.ss_flags & SS_ONSTACK; +} diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile new file mode 100644 index 0000000000..75f11989d2 --- /dev/null +++ b/arch/um/os-Linux/skas/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) +# + +obj-y := mem.o process.o + +USER_OBJS := $(obj-y) + +include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c new file mode 100644 index 0000000000..953fb10f3f --- /dev/null +++ b/arch/um/os-Linux/skas/mem.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <init.h> +#include <as-layout.h> +#include <mm_id.h> +#include <os.h> +#include <ptrace_user.h> +#include <registers.h> +#include <skas.h> +#include <sysdep/ptrace.h> +#include <sysdep/stub.h> + +extern char batch_syscall_stub[], __syscall_stub_start[]; + +extern void wait_stub_done(int pid); + +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) +{ + if (stack == NULL) { + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; +} + +static unsigned long syscall_regs[MAX_REG_NR]; + +static int __init init_syscall_regs(void) +{ + get_safe_registers(syscall_regs, NULL); + syscall_regs[REGS_IP_INDEX] = STUB_CODE + + ((unsigned long) batch_syscall_stub - + (unsigned long) __syscall_stub_start); + syscall_regs[REGS_SP_INDEX] = STUB_DATA; + + return 0; +} + +__initcall(init_syscall_regs); + +static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) +{ + int n, i; + long ret, offset; + unsigned long * data; + unsigned long * syscall; + int err, pid = mm_idp->u.pid; + + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers - \n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("%s : PTRACE_SETREGS failed, errno = %d\n", + __func__, -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", pid, + errno); + + wait_stub_done(pid); + + /* + * When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); + printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n", + __func__, ret, offset, data); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n", + __func__, syscall[0], ret, syscall[7]); + printk(UM_KERN_ERR " syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for (n = 1; n < data[0]/sizeof(long); n++) { + if (n == 1) + printk(UM_KERN_ERR " additional syscall data:"); + if (n % 4 == 1) + printk("\n" UM_KERN_ERR " "); + printk(" 0x%lx", data[n]); + } + if (n > 1) + printk("\n"); + } + else ret = 0; + + *addr = check_init_stack(mm_idp, NULL); + + return ret; +} + +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) +{ + unsigned long *stack = check_init_stack(mm_idp, *addr); + + *stack += sizeof(long); + stack += *stack / sizeof(long); + + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack++ = expected; + *stack = 0; + + if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < + UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { + *addr = stack; + return 0; + } + + return do_syscall_stub(mm_idp, addr); +} + +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; + + /* + * If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >= + UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if (ret) + return ret; + } + + stack = check_init_stack(mm_idp, *addr); + *addr = stack; + + *stack = data_count * sizeof(long); + + memcpy(stack + 1, data, data_count * sizeof(long)); + + *stub_addr = (void *)(((unsigned long)(stack + 1) & + ~UM_KERN_PAGE_MASK) + STUB_DATA); + + return 0; +} + +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset, int done, void **data) +{ + int ret; + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); + + return ret; +} + +int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int done, void **data) +{ + int ret; + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + + return ret; +} + +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + unsigned int prot, int done, void **data) +{ + int ret; + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); + + return ret; +} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c new file mode 100644 index 0000000000..9464833e74 --- /dev/null +++ b/arch/um/os-Linux/skas/process.c @@ -0,0 +1,732 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdlib.h> +#include <stdbool.h> +#include <unistd.h> +#include <sched.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include <as-layout.h> +#include <init.h> +#include <kern_util.h> +#include <mem.h> +#include <os.h> +#include <ptrace_user.h> +#include <registers.h> +#include <skas.h> +#include <sysdep/stub.h> +#include <linux/threads.h> + +int is_skas_winch(int pid, int fd, void *data) +{ + return pid == getpgrp(); +} + +static const char *ptrace_reg_name(int idx) +{ +#define R(n) case HOST_##n: return #n + + switch (idx) { +#ifdef __x86_64__ + R(BX); + R(CX); + R(DI); + R(SI); + R(DX); + R(BP); + R(AX); + R(R8); + R(R9); + R(R10); + R(R11); + R(R12); + R(R13); + R(R14); + R(R15); + R(ORIG_AX); + R(CS); + R(SS); + R(EFLAGS); +#elif defined(__i386__) + R(IP); + R(SP); + R(EFLAGS); + R(AX); + R(BX); + R(CX); + R(DX); + R(SI); + R(DI); + R(BP); + R(CS); + R(SS); + R(DS); + R(FS); + R(ES); + R(GS); + R(ORIG_AX); +#endif + } + return ""; +} + +static int ptrace_dump_regs(int pid) +{ + unsigned long regs[MAX_REG_NR]; + int i; + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + return -errno; + + printk(UM_KERN_ERR "Stub registers -\n"); + for (i = 0; i < ARRAY_SIZE(regs); i++) { + const char *regname = ptrace_reg_name(i); + + printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]); + } + + return 0; +} + +/* + * Signals that are OK to receive in the stub - we'll just continue it. + * SIGWINCH will happen when UML is inside a detached screen. + */ +#define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) + +/* Signals that the stub will finish with - anything else is an error */ +#define STUB_DONE_MASK (1 << SIGTRAP) + +void wait_stub_done(int pid) +{ + int n, status, err; + + while (1) { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((n < 0) || !WIFSTOPPED(status)) + goto bad_wait; + + if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) + break; + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + printk(UM_KERN_ERR "%s : continue failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + } + + if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) + return; + +bad_wait: + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n", + -err); + printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n", + __func__, pid, n, errno, status); + fatal_sigsegv(); +} + +extern unsigned long current_stub_stack(void); + +static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs) +{ + int err; + + err = get_fp_registers(pid, aux_fp_regs); + if (err < 0) { + printk(UM_KERN_ERR "save_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); + if (err) { + printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " + "errno = %d\n", pid, errno); + fatal_sigsegv(); + } + wait_stub_done(pid); + + /* + * faultinfo is prepared by the stub_segv_handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + + err = put_fp_registers(pid, aux_fp_regs); + if (err < 0) { + printk(UM_KERN_ERR "put_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } +} + +static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +{ + get_skas_faultinfo(pid, ®s->faultinfo, aux_fp_regs); + segv(regs->faultinfo, 0, 1, NULL); +} + +/* + * To use the same value of using_sysemu as the caller, ask it that value + * (in local_using_sysemu + */ +static void handle_trap(int pid, struct uml_pt_regs *regs, + int local_using_sysemu) +{ + int err, status; + + if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) + fatal_sigsegv(); + + if (!local_using_sysemu) + { + err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, + __NR_getpid); + if (err < 0) { + printk(UM_KERN_ERR "%s - nullifying syscall failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); + if (err < 0) { + printk(UM_KERN_ERR "%s - continuing to end of syscall failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((err < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGTRAP + 0x80)) { + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers from process, errno = %d\n", + -err); + printk(UM_KERN_ERR "%s - failed to wait at end of syscall, errno = %d, status = %d\n", + __func__, errno, status); + fatal_sigsegv(); + } + } + + handle_syscall(regs); +} + +extern char __syscall_stub_start[]; + +/** + * userspace_tramp() - userspace trampoline + * @stack: pointer to the new userspace stack page, can be NULL, if? FIXME: + * + * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed. + * This function will run on a temporary stack page. + * It ptrace()'es itself, then + * Two pages are mapped into the userspace address space: + * - STUB_CODE (with EXEC), which contains the skas stub code + * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel. + * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process. + * And last the process stops itself to give control to the UML kernel for this userspace process. + * + * Return: Always zero, otherwise the current userspace process is ended with non null exit() call + */ +static int userspace_tramp(void *stack) +{ + void *addr; + int fd; + unsigned long long offset; + + ptrace(PTRACE_TRACEME, 0, 0, 0); + + signal(SIGTERM, SIG_DFL); + signal(SIGWINCH, SIG_IGN); + + fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); + addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, errno = %d\n", + STUB_CODE, errno); + exit(1); + } + + if (stack != NULL) { + fd = phys_mapping(uml_to_phys(stack), &offset); + addr = mmap((void *) STUB_DATA, + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n", + STUB_DATA, errno); + exit(1); + } + } + if (stack != NULL) { + struct sigaction sa; + + unsigned long v = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start; + + set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; + sa.sa_sigaction = (void *) v; + sa.sa_restorer = NULL; + if (sigaction(SIGSEGV, &sa, NULL) < 0) { + printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n", + __func__, errno); + exit(1); + } + } + + kill(os_getpid(), SIGSTOP); + return 0; +} + +int userspace_pid[NR_CPUS]; +int kill_userspace_mm[NR_CPUS]; + +/** + * start_userspace() - prepare a new userspace process + * @stub_stack: pointer to the stub stack. Can be NULL, if? FIXME: + * + * Setups a new temporary stack page that is used while userspace_tramp() runs + * Clones the kernel process into a new userspace process, with FDs only. + * + * Return: When positive: the process id of the new userspace process, + * when negative: an error number. + * FIXME: can PIDs become negative?! + */ +int start_userspace(unsigned long stub_stack) +{ + void *stack; + unsigned long sp; + int pid, status, n, flags, err; + + /* setup a temporary stack page */ + stack = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) { + err = -errno; + printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n", + __func__, errno); + return err; + } + + /* set stack pointer to the end of the stack page, so it can grow downwards */ + sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; + + flags = CLONE_FILES | SIGCHLD; + + /* clone into new userspace process */ + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", + __func__, errno); + return err; + } + + do { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", + __func__, status); + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + + if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +{ + int err, status, op, pid = userspace_pid[0]; + /* To prevent races if using_sysemu changes under us.*/ + int local_using_sysemu; + siginfo_t si; + + /* Handle any immediate reschedules or signals */ + interrupt_end(); + + while (1) { + if (kill_userspace_mm[0]) + fatal_sigsegv(); + + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. + */ + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (put_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + /* Now we set local_using_sysemu to be used for one loop */ + local_using_sysemu = get_using_sysemu(); + + op = SELECT_PTRACE_OPERATION(local_using_sysemu, + singlestepping(NULL)); + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", + __func__, op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (get_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (WIFSTOPPED(status)) { + int sig = WSTOPSIG(status); + + /* These signal handlers need the si argument. + * The SIGIO and SIGALARM handlers which constitute the + * majority of invocations, do not use it. + */ + switch (sig) { + case SIGSEGV: + case SIGTRAP: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); + break; + } + + switch (sig) { + case SIGSEGV: + if (PTRACE_FULL_FAULTINFO) { + get_skas_faultinfo(pid, + ®s->faultinfo, aux_fp_regs); + (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, + regs); + } + else handle_segv(pid, regs, aux_fp_regs); + break; + case SIGTRAP + 0x80: + handle_trap(pid, regs, local_using_sysemu); + break; + case SIGTRAP: + relay_signal(SIGTRAP, (struct siginfo *)&si, regs); + break; + case SIGALRM: + break; + case SIGIO: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + block_signals_trace(); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); + unblock_signals_trace(); + break; + default: + printk(UM_KERN_ERR "%s - child stopped with signal %d\n", + __func__, sig); + fatal_sigsegv(); + } + pid = userspace_pid[0]; + interrupt_end(); + + /* Avoid -ERESTARTSYS handling in host */ + if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) + PT_SYSCALL_NR(regs->gp) = -1; + } + } +} + +static unsigned long thread_regs[MAX_REG_NR]; +static unsigned long thread_fp_regs[FP_SIZE]; + +static int __init init_thread_regs(void) +{ + get_safe_registers(thread_regs, thread_fp_regs); + /* Set parent's instruction pointer to start of clone-stub */ + thread_regs[REGS_IP_INDEX] = STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) __syscall_stub_start; + thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - + sizeof(void *); +#ifdef __SIGNAL_FRAMESIZE + thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; +#endif + return 0; +} + +__initcall(init_thread_regs); + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + int err; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + unsigned long long new_offset; + int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset); + + /* + * prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { + .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .parent_err = -ESRCH, + .child_err = 0, + }); + + *child_data = ((struct stub_data) { + .child_err = -ESRCH, + }); + + err = ptrace_setregs(pid, thread_regs); + if (err < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n", + __func__, pid, -err); + return err; + } + + err = put_fp_registers(pid, thread_fp_regs); + if (err < 0) { + printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n", + __func__, pid, err); + return err; + } + + /* + * Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n", + pid, errno); + return err; + } + + wait_stub_done(pid); + + pid = data->parent_err; + if (pid < 0) { + printk(UM_KERN_ERR "%s - stub-parent reports error %d\n", + __func__, -pid); + return pid; + } + + /* + * Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid); + if (child_data->child_err != STUB_DATA) { + printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n", + __func__, pid, data->child_err); + err = data->child_err; + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) +{ + (*buf)[0].JB_IP = (unsigned long) handler; + (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - + sizeof(void *); +} + +#define INIT_JMP_NEW_THREAD 0 +#define INIT_JMP_CALLBACK 1 +#define INIT_JMP_HALT 2 +#define INIT_JMP_REBOOT 3 + +void switch_threads(jmp_buf *me, jmp_buf *you) +{ + if (UML_SETJMP(me) == 0) + UML_LONGJMP(you, 1); +} + +static jmp_buf initial_jmpbuf; + +/* XXX Make these percpu */ +static void (*cb_proc)(void *arg); +static void *cb_arg; +static jmp_buf *cb_back; + +int start_idle_thread(void *stack, jmp_buf *switch_buf) +{ + int n; + + set_handler(SIGWINCH); + + /* + * Can't use UML_SETJMP or UML_LONGJMP here because they save + * and restore signals, with the possible side-effect of + * trying to handle any signals which came when they were + * blocked, which can't be done on this stack. + * Signals must be blocked when jumping back here and restored + * after returning to the jumper. + */ + n = setjmp(initial_jmpbuf); + switch (n) { + case INIT_JMP_NEW_THREAD: + (*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup; + (*switch_buf)[0].JB_SP = (unsigned long) stack + + UM_THREAD_SIZE - sizeof(void *); + break; + case INIT_JMP_CALLBACK: + (*cb_proc)(cb_arg); + longjmp(*cb_back, 1); + break; + case INIT_JMP_HALT: + kmalloc_ok = 0; + return 0; + case INIT_JMP_REBOOT: + kmalloc_ok = 0; + return 1; + default: + printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n", + __func__, n); + fatal_sigsegv(); + } + longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); + return 0; +} + +void initial_thread_cb_skas(void (*proc)(void *), void *arg) +{ + jmp_buf here; + + cb_proc = proc; + cb_arg = arg; + cb_back = &here; + + block_signals_trace(); + if (UML_SETJMP(&here) == 0) + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); + unblock_signals_trace(); + + cb_proc = NULL; + cb_arg = NULL; + cb_back = NULL; +} + +void halt_skas(void) +{ + block_signals_trace(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); +} + +static bool noreboot; + +static int __init noreboot_cmd_param(char *str, int *add) +{ + noreboot = true; + return 0; +} + +__uml_setup("noreboot", noreboot_cmd_param, +"noreboot\n" +" Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n" +" This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n" +" crashes in CI\n"); + +void reboot_skas(void) +{ + block_signals_trace(); + UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); +} + +void __switch_mm(struct mm_id *mm_idp) +{ + userspace_pid[0] = mm_idp->u.pid; + kill_userspace_mm[0] = mm_idp->kill; +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c new file mode 100644 index 0000000000..e3ee4db58b --- /dev/null +++ b/arch/um/os-Linux/start_up.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <asm/unistd.h> +#include <init.h> +#include <os.h> +#include <mem_user.h> +#include <ptrace_user.h> +#include <registers.h> +#include <skas.h> + +static void ptrace_child(void) +{ + int ret; + /* Calling os_getpid because some libcs cached getpid incorrectly */ + int pid = os_getpid(), ppid = getppid(); + int sc_result; + + if (change_sig(SIGWINCH, 0) < 0 || + ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { + perror("ptrace"); + kill(pid, SIGKILL); + } + kill(pid, SIGSTOP); + + /* + * This syscall will be intercepted by the parent. Don't call more than + * once, please. + */ + sc_result = os_getpid(); + + if (sc_result == pid) + /* Nothing modified by the parent, we are running normally. */ + ret = 1; + else if (sc_result == ppid) + /* + * Expected in check_ptrace and check_sysemu when they succeed + * in modifying the stack frame + */ + ret = 0; + else + /* Serious trouble! This could be caused by a bug in host 2.6 + * SKAS3/2.6 patch before release -V6, together with a bug in + * the UML code itself. + */ + ret = 2; + + exit(ret); +} + +static void fatal_perror(const char *str) +{ + perror(str); + exit(1); +} + +static void fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); + + exit(1); +} + +static void non_fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); +} + +static int start_ptraced_child(void) +{ + int pid, n, status; + + fflush(stdout); + + pid = fork(); + if (pid == 0) + ptrace_child(); + else if (pid < 0) + fatal_perror("start_ptraced_child : fork failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : waitpid failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) + fatal("check_ptrace : expected SIGSTOP, got status = %d", + status); + + return pid; +} + +/* When testing for SYSEMU support, if it is one of the broken versions, we + * must just avoid using sysemu, not panic, but only if SYSEMU features are + * broken. + * So only for SYSEMU features we test mustpanic, while normal host features + * must work anyway! + */ +static int stop_ptraced_child(int pid, int exitcode, int mustexit) +{ + int status, n, ret = 0; + + if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) { + perror("stop_ptraced_child : ptrace failed"); + return -1; + } + CATCH_EINTR(n = waitpid(pid, &status, 0)); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { + int exit_with = WEXITSTATUS(status); + if (exit_with == 2) + non_fatal("check_ptrace : child exited with status 2. " + "\nDisabling SYSEMU support.\n"); + non_fatal("check_ptrace : child exited with exitcode %d, while " + "expecting %d; status 0x%x\n", exit_with, + exitcode, status); + if (mustexit) + exit(1); + ret = -1; + } + + return ret; +} + +/* Changed only during early boot */ +static int force_sysemu_disabled = 0; + +static int __init nosysemu_cmd_param(char *str, int* add) +{ + force_sysemu_disabled = 1; + return 0; +} + +__uml_setup("nosysemu", nosysemu_cmd_param, +"nosysemu\n" +" Turns off syscall emulation patch for ptrace (SYSEMU).\n" +" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" +" behaviour of ptrace() and helps reduce host context switch rates.\n" +" To make it work, you need a kernel patch for your host, too.\n" +" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n" +" information.\n\n"); + +static void __init check_sysemu(void) +{ + unsigned long regs[MAX_REG_NR]; + int pid, n, status, count=0; + + os_info("Checking syscall emulation patch for ptrace..."); + sysemu_supported = 0; + pid = start_ptraced_child(); + + if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) + goto fail; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_sysemu : wait failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + fatal("check_sysemu : expected SIGTRAP, got status = %d\n", + status); + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + fatal_perror("check_sysemu : PTRACE_GETREGS failed"); + if (PT_SYSCALL_NR(regs) != __NR_getpid) { + non_fatal("check_sysemu got system call number %d, " + "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid); + goto fail; + } + + n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); + if (n < 0) { + non_fatal("check_sysemu : failed to modify system call " + "return"); + goto fail; + } + + if (stop_ptraced_child(pid, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 1; + os_info("OK\n"); + set_using_sysemu(!force_sysemu_disabled); + + os_info("Checking advanced syscall emulation patch for ptrace..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed"); + + while (1) { + count++; + if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) + goto fail; + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_sysemu: wait failed"); + + if (WIFSTOPPED(status) && + (WSTOPSIG(status) == (SIGTRAP|0x80))) { + if (!count) { + non_fatal("check_sysemu: SYSEMU_SINGLESTEP " + "doesn't singlestep"); + goto fail; + } + n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, + os_getpid()); + if (n < 0) + fatal_perror("check_sysemu : failed to modify " + "system call return"); + break; + } + else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) + count++; + else { + non_fatal("check_sysemu: expected SIGTRAP or " + "(SIGTRAP | 0x80), got status = %d\n", + status); + goto fail; + } + } + if (stop_ptraced_child(pid, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 2; + os_info("OK\n"); + + if (!force_sysemu_disabled) + set_using_sysemu(sysemu_supported); + return; + +fail: + stop_ptraced_child(pid, 1, 0); +fail_stopped: + non_fatal("missing\n"); +} + +static void __init check_ptrace(void) +{ + int pid, syscall, n, status; + + os_info("Checking that ptrace can change system call numbers..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed"); + + while (1) { + if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_ptrace : ptrace failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : wait failed"); + + if (!WIFSTOPPED(status) || + (WSTOPSIG(status) != (SIGTRAP | 0x80))) + fatal("check_ptrace : expected (SIGTRAP|0x80), " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, + 0); + if (syscall == __NR_getpid) { + n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, + __NR_getppid); + if (n < 0) + fatal_perror("check_ptrace : failed to modify " + "system call"); + break; + } + } + stop_ptraced_child(pid, 0, 1); + os_info("OK\n"); + check_sysemu(); +} + +extern void check_tmpexec(void); + +static void __init check_coredump_limit(void) +{ + struct rlimit lim; + int err = getrlimit(RLIMIT_CORE, &lim); + + if (err) { + perror("Getting core dump limit"); + return; + } + + os_info("Core dump limits :\n\tsoft - "); + if (lim.rlim_cur == RLIM_INFINITY) + os_info("NONE\n"); + else + os_info("%llu\n", (unsigned long long)lim.rlim_cur); + + os_info("\thard - "); + if (lim.rlim_max == RLIM_INFINITY) + os_info("NONE\n"); + else + os_info("%llu\n", (unsigned long long)lim.rlim_max); +} + +void __init get_host_cpu_features( + void (*flags_helper_func)(char *line), + void (*cache_helper_func)(char *line)) +{ + FILE *cpuinfo; + char *line = NULL; + size_t len = 0; + int done_parsing = 0; + + cpuinfo = fopen("/proc/cpuinfo", "r"); + if (cpuinfo == NULL) { + os_info("Failed to get host CPU features\n"); + } else { + while ((getline(&line, &len, cpuinfo)) != -1) { + if (strstr(line, "flags")) { + flags_helper_func(line); + done_parsing++; + } + if (strstr(line, "cache_alignment")) { + cache_helper_func(line); + done_parsing++; + } + free(line); + line = NULL; + if (done_parsing > 1) + break; + } + fclose(cpuinfo); + } +} + + +void __init os_early_checks(void) +{ + int pid; + + /* Print out the core dump limits early */ + check_coredump_limit(); + + check_ptrace(); + + /* Need to check this early because mmapping happens before the + * kernel is running. + */ + check_tmpexec(); + + pid = start_ptraced_child(); + if (init_pid_registers(pid)) + fatal("Failed to initialize default registers"); + stop_ptraced_child(pid, 1, 1); +} + +int __init parse_iomem(char *str, int *add) +{ + struct iomem_region *new; + struct stat64 buf; + char *file, *driver; + int fd, size; + + driver = str; + file = strchr(str,','); + if (file == NULL) { + os_warn("parse_iomem : failed to parse iomem\n"); + goto out; + } + *file = '\0'; + file++; + fd = open(file, O_RDWR, 0); + if (fd < 0) { + perror("parse_iomem - Couldn't open io file"); + goto out; + } + + if (fstat64(fd, &buf) < 0) { + perror("parse_iomem - cannot stat_fd file"); + goto out_close; + } + + new = malloc(sizeof(*new)); + if (new == NULL) { + perror("Couldn't allocate iomem_region struct"); + goto out_close; + } + + size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); + + *new = ((struct iomem_region) { .next = iomem_regions, + .driver = driver, + .fd = fd, + .size = size, + .phys = 0, + .virt = 0 }); + iomem_regions = new; + iomem_size += new->size + UM_KERN_PAGE_SIZE; + + return 0; + out_close: + close(fd); + out: + return 1; +} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c new file mode 100644 index 0000000000..4d5591d96d --- /dev/null +++ b/arch/um/os-Linux/time.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2012-2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <time.h> +#include <sys/time.h> +#include <kern_util.h> +#include <os.h> +#include <string.h> + +static timer_t event_high_res_timer = 0; + +static inline long long timespec_to_ns(const struct timespec *ts) +{ + return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + ts->tv_nsec; +} + +long long os_persistent_clock_emulation(void) +{ + struct timespec realtime_tp; + + clock_gettime(CLOCK_REALTIME, &realtime_tp); + return timespec_to_ns(&realtime_tp); +} + +/** + * os_timer_create() - create an new posix (interval) timer + */ +int os_timer_create(void) +{ + timer_t *t = &event_high_res_timer; + + if (timer_create(CLOCK_MONOTONIC, NULL, t) == -1) + return -1; + + return 0; +} + +int os_timer_set_interval(unsigned long long nsecs) +{ + struct itimerspec its; + + its.it_value.tv_sec = nsecs / UM_NSEC_PER_SEC; + its.it_value.tv_nsec = nsecs % UM_NSEC_PER_SEC; + + its.it_interval.tv_sec = nsecs / UM_NSEC_PER_SEC; + its.it_interval.tv_nsec = nsecs % UM_NSEC_PER_SEC; + + if (timer_settime(event_high_res_timer, 0, &its, NULL) == -1) + return -errno; + + return 0; +} + +int os_timer_one_shot(unsigned long long nsecs) +{ + struct itimerspec its = { + .it_value.tv_sec = nsecs / UM_NSEC_PER_SEC, + .it_value.tv_nsec = nsecs % UM_NSEC_PER_SEC, + + .it_interval.tv_sec = 0, + .it_interval.tv_nsec = 0, // we cheat here + }; + + timer_settime(event_high_res_timer, 0, &its, NULL); + return 0; +} + +/** + * os_timer_disable() - disable the posix (interval) timer + */ +void os_timer_disable(void) +{ + struct itimerspec its; + + memset(&its, 0, sizeof(struct itimerspec)); + timer_settime(event_high_res_timer, 0, &its, NULL); +} + +long long os_nsecs(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC,&ts); + return timespec_to_ns(&ts); +} + +/** + * os_idle_sleep() - sleep until interrupted + */ +void os_idle_sleep(void) +{ + struct itimerspec its; + sigset_t set, old; + + /* block SIGALRM while we analyze the timer state */ + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigprocmask(SIG_BLOCK, &set, &old); + + /* check the timer, and if it'll fire then wait for it */ + timer_gettime(event_high_res_timer, &its); + if (its.it_value.tv_sec || its.it_value.tv_nsec) + sigsuspend(&old); + /* either way, restore the signal mask */ + sigprocmask(SIG_UNBLOCK, &set, NULL); +} diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c new file mode 100644 index 0000000000..f784db83e0 --- /dev/null +++ b/arch/um/os-Linux/tty.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <kern_util.h> +#include <os.h> + +struct grantpt_info { + int fd; + int res; + int err; +}; + +static void grantpt_cb(void *arg) +{ + struct grantpt_info *info = arg; + + info->res = grantpt(info->fd); + info->err = errno; +} + +int get_pty(void) +{ + struct grantpt_info info; + int fd, err; + + fd = open("/dev/ptmx", O_RDWR); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't open /dev/ptmx - " + "err = %d\n", errno); + return err; + } + + info.fd = fd; + initial_thread_cb(grantpt_cb, &info); + + if (info.res < 0) { + err = -info.err; + printk(UM_KERN_ERR "get_pty : Couldn't grant pty - " + "errno = %d\n", -info.err); + goto out; + } + + if (unlockpt(fd) < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't unlock pty - " + "errno = %d\n", errno); + goto out; + } + return fd; +out: + close(fd); + return err; +} diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c new file mode 100644 index 0000000000..288c422bfa --- /dev/null +++ b/arch/um/os-Linux/umid.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> +#include <init.h> +#include <os.h> + +#define UML_DIR "~/.uml/" + +#define UMID_LEN 64 + +/* Changed by set_umid, which is run early in boot */ +static char umid[UMID_LEN] = { 0 }; + +/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ +static char *uml_dir = UML_DIR; + +static int __init make_uml_dir(void) +{ + char dir[512] = { '\0' }; + int len, err; + + if (*uml_dir == '~') { + char *home = getenv("HOME"); + + err = -ENOENT; + if (home == NULL) { + printk(UM_KERN_ERR + "%s: no value in environment for $HOME\n", + __func__); + goto err; + } + strscpy(dir, home, sizeof(dir)); + uml_dir++; + } + strlcat(dir, uml_dir, sizeof(dir)); + len = strlen(dir); + if (len > 0 && dir[len - 1] != '/') + strlcat(dir, "/", sizeof(dir)); + + err = -ENOMEM; + uml_dir = malloc(strlen(dir) + 1); + if (uml_dir == NULL) { + printk(UM_KERN_ERR "%s : malloc failed, errno = %d\n", + __func__, errno); + goto err; + } + strcpy(uml_dir, dir); + + if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) { + printk(UM_KERN_ERR "Failed to mkdir '%s': %s\n", + uml_dir, strerror(errno)); + err = -errno; + goto err_free; + } + return 0; + +err_free: + free(uml_dir); +err: + uml_dir = NULL; + return err; +} + +/* + * Unlinks the files contained in @dir and then removes @dir. + * Doesn't handle directory trees, so it's not like rm -rf, but almost such. We + * ignore ENOENT errors for anything (they happen, strangely enough - possibly + * due to races between multiple dying UML threads). + */ +static int remove_files_and_dir(char *dir) +{ + DIR *directory; + struct dirent *ent; + int len; + char file[256]; + int ret; + + directory = opendir(dir); + if (directory == NULL) { + if (errno != ENOENT) + return -errno; + else + return 0; + } + + while ((ent = readdir(directory)) != NULL) { + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + len = strlen(dir) + strlen("/") + strlen(ent->d_name) + 1; + if (len > sizeof(file)) { + ret = -E2BIG; + goto out; + } + + sprintf(file, "%s/%s", dir, ent->d_name); + if (unlink(file) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + } + + if (rmdir(dir) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + + ret = 0; +out: + closedir(directory); + return ret; +} + +/* + * This says that there isn't already a user of the specified directory even if + * there are errors during the checking. This is because if these errors + * happen, the directory is unusable by the pre-existing UML, so we might as + * well take it over. This could happen either by + * the existing UML somehow corrupting its umid directory + * something other than UML sticking stuff in the directory + * this boot racing with a shutdown of the other UML + * In any of these cases, the directory isn't useful for anything else. + * + * Boolean return: 1 if in use, 0 otherwise. + */ +static inline int is_umdir_used(char *dir) +{ + char pid[sizeof("nnnnnnnnn")], *end, *file; + int fd, p, n, err; + size_t filelen = strlen(dir) + sizeof("/pid") + 1; + + file = malloc(filelen); + if (!file) + return -ENOMEM; + + snprintf(file, filelen, "%s/pid", dir); + + fd = open(file, O_RDONLY); + if (fd < 0) { + fd = -errno; + if (fd != -ENOENT) { + printk(UM_KERN_ERR "is_umdir_used : couldn't open pid " + "file '%s', err = %d\n", file, -fd); + } + goto out; + } + + err = 0; + n = read(fd, pid, sizeof(pid)); + if (n < 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', err = %d\n", file, errno); + goto out_close; + } else if (n == 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', 0-byte read\n", file); + goto out_close; + } + + p = strtoul(pid, &end, 0); + if (end == pid) { + printk(UM_KERN_ERR "is_umdir_used : couldn't parse pid file " + "'%s', errno = %d\n", file, errno); + goto out_close; + } + + if ((kill(p, 0) == 0) || (errno != ESRCH)) { + printk(UM_KERN_ERR "umid \"%s\" is already in use by pid %d\n", + umid, p); + return 1; + } + +out_close: + close(fd); +out: + free(file); + return 0; +} + +/* + * Try to remove the directory @dir unless it's in use. + * Precondition: @dir exists. + * Returns 0 for success, < 0 for failure in removal or if the directory is in + * use. + */ +static int umdir_take_if_dead(char *dir) +{ + int ret; + if (is_umdir_used(dir)) + return -EEXIST; + + ret = remove_files_and_dir(dir); + if (ret) { + printk(UM_KERN_ERR "is_umdir_used - remove_files_and_dir " + "failed with err = %d\n", ret); + } + return ret; +} + +static void __init create_pid_file(void) +{ + char pid[sizeof("nnnnnnnnn")], *file; + int fd, n; + + n = strlen(uml_dir) + UMID_LEN + sizeof("/pid"); + file = malloc(n); + if (!file) + return; + + if (umid_file_name("pid", file, n)) + goto out; + + fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: " + "%s\n", file, strerror(errno)); + goto out; + } + + snprintf(pid, sizeof(pid), "%d\n", getpid()); + n = write(fd, pid, strlen(pid)); + if (n != strlen(pid)) + printk(UM_KERN_ERR "Write of pid file failed - err = %d\n", + errno); + + close(fd); +out: + free(file); +} + +int __init set_umid(char *name) +{ + if (strlen(name) > UMID_LEN - 1) + return -E2BIG; + + strscpy(umid, name, sizeof(umid)); + + return 0; +} + +/* Changed in make_umid, which is called during early boot */ +static int umid_setup = 0; + +static int __init make_umid(void) +{ + int fd, err; + char tmp[256]; + + if (umid_setup) + return 0; + + make_uml_dir(); + + if (*umid == '\0') { + strscpy(tmp, uml_dir, sizeof(tmp)); + strlcat(tmp, "XXXXXX", sizeof(tmp)); + fd = mkstemp(tmp); + if (fd < 0) { + printk(UM_KERN_ERR "make_umid - mkstemp(%s) failed: " + "%s\n", tmp, strerror(errno)); + err = -errno; + goto err; + } + + close(fd); + + set_umid(&tmp[strlen(uml_dir)]); + + /* + * There's a nice tiny little race between this unlink and + * the mkdir below. It'd be nice if there were a mkstemp + * for directories. + */ + if (unlink(tmp)) { + err = -errno; + goto err; + } + } + + snprintf(tmp, sizeof(tmp), "%s%s", uml_dir, umid); + err = mkdir(tmp, 0777); + if (err < 0) { + err = -errno; + if (err != -EEXIST) + goto err; + + if (umdir_take_if_dead(tmp) < 0) + goto err; + + err = mkdir(tmp, 0777); + } + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to create '%s' - err = %d\n", umid, + errno); + goto err; + } + + umid_setup = 1; + + create_pid_file(); + + err = 0; + err: + return err; +} + +static int __init make_umid_init(void) +{ + if (!make_umid()) + return 0; + + /* + * If initializing with the given umid failed, then try again with + * a random one. + */ + printk(UM_KERN_ERR "Failed to initialize umid \"%s\", trying with a " + "random umid\n", umid); + *umid = '\0'; + make_umid(); + + return 0; +} + +__initcall(make_umid_init); + +int __init umid_file_name(char *name, char *buf, int len) +{ + int n, err; + + err = make_umid(); + if (err) + return err; + + n = snprintf(buf, len, "%s%s/%s", uml_dir, umid, name); + if (n >= len) { + printk(UM_KERN_ERR "umid_file_name : buffer too short\n"); + return -E2BIG; + } + + return 0; +} + +char *get_umid(void) +{ + return umid; +} + +static int __init set_uml_dir(char *name, int *add) +{ + if (*name == '\0') { + os_warn("uml_dir can't be an empty string\n"); + return 0; + } + + if (name[strlen(name) - 1] == '/') { + uml_dir = name; + return 0; + } + + uml_dir = malloc(strlen(name) + 2); + if (uml_dir == NULL) { + os_warn("Failed to malloc uml_dir - error = %d\n", errno); + + /* + * Return 0 here because do_initcalls doesn't look at + * the return value. + */ + return 0; + } + sprintf(uml_dir, "%s/", name); + + return 0; +} + +__uml_setup("uml_dir=", set_uml_dir, +"uml_dir=<directory>\n" +" The location to place the pid and umid files.\n\n" +); + +static void remove_umid_dir(void) +{ + char *dir, err; + + dir = malloc(strlen(uml_dir) + UMID_LEN + 1); + if (!dir) + return; + + sprintf(dir, "%s%s", uml_dir, umid); + err = remove_files_and_dir(dir); + if (err) + os_warn("%s - remove_files_and_dir failed with err = %d\n", + __func__, err); + + free(dir); +} + +__uml_exitcall(remove_umid_dir); diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c new file mode 100644 index 0000000000..a310ae27b4 --- /dev/null +++ b/arch/um/os-Linux/user_syms.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#define __NO_FORTIFY +#include <linux/types.h> +#include <linux/module.h> + +/* + * This file exports some critical string functions and compiler + * built-in functions (where calls are emitted by the compiler + * itself that we cannot avoid even in kernel code) to modules. + * + * "_user.c" code that previously used exports here such as hostfs + * really should be considered part of the 'hypervisor' and define + * its own API boundary like hostfs does now; don't add exports to + * this file for such cases. + */ + +/* If it's not defined, the export is included in lib/string.c.*/ +#ifdef __HAVE_ARCH_STRSTR +#undef strstr +EXPORT_SYMBOL(strstr); +#endif + +#ifndef __x86_64__ +#undef memcpy +extern void *memcpy(void *, const void *, size_t); +EXPORT_SYMBOL(memcpy); +extern void *memmove(void *, const void *, size_t); +EXPORT_SYMBOL(memmove); +#undef memset +extern void *memset(void *, int, size_t); +EXPORT_SYMBOL(memset); +#endif + +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA +/* needed for __access_ok() */ +EXPORT_SYMBOL(vsyscall_ehdr); +EXPORT_SYMBOL(vsyscall_end); +#endif + +#ifdef _FORTIFY_SOURCE +extern int __sprintf_chk(char *str, int flag, size_t len, const char *format); +EXPORT_SYMBOL(__sprintf_chk); +#endif diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c new file mode 100644 index 0000000000..fc0f2a9dee --- /dev/null +++ b/arch/um/os-Linux/util.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <termios.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <sys/random.h> +#include <init.h> +#include <os.h> + +void stack_protections(unsigned long address) +{ + if (mprotect((void *) address, UM_THREAD_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC) < 0) + panic("protecting stack failed, errno = %d", errno); +} + +int raw(int fd) +{ + struct termios tt; + int err; + + CATCH_EINTR(err = tcgetattr(fd, &tt)); + if (err < 0) + return -errno; + + cfmakeraw(&tt); + + CATCH_EINTR(err = tcsetattr(fd, TCSADRAIN, &tt)); + if (err < 0) + return -errno; + + /* + * XXX tcsetattr could have applied only some changes + * (and cfmakeraw() is a set of changes) + */ + return 0; +} + +void setup_machinename(char *machine_out) +{ + struct utsname host; + + uname(&host); +#ifdef UML_CONFIG_UML_X86 +# ifndef UML_CONFIG_64BIT + if (!strcmp(host.machine, "x86_64")) { + strcpy(machine_out, "i686"); + return; + } +# else + if (!strcmp(host.machine, "i686")) { + strcpy(machine_out, "x86_64"); + return; + } +# endif +#endif + strcpy(machine_out, host.machine); +} + +void setup_hostinfo(char *buf, int len) +{ + struct utsname host; + + uname(&host); + snprintf(buf, len, "%s %s %s %s %s", host.sysname, host.nodename, + host.release, host.version, host.machine); +} + +/* + * We cannot use glibc's abort(). It makes use of tgkill() which + * has no effect within UML's kernel threads. + * After that glibc would execute an invalid instruction to kill + * the calling process and UML crashes with SIGSEGV. + */ +static inline void __attribute__ ((noreturn)) uml_abort(void) +{ + sigset_t sig; + + fflush(NULL); + + if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) + sigprocmask(SIG_UNBLOCK, &sig, 0); + + for (;;) + if (kill(getpid(), SIGABRT) < 0) + exit(127); +} + +ssize_t os_getrandom(void *buf, size_t len, unsigned int flags) +{ + return getrandom(buf, len, flags); +} + +/* + * UML helper threads must not handle SIGWINCH/INT/TERM + */ +void os_fix_helper_signals(void) +{ + signal(SIGWINCH, SIG_IGN); + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); +} + +void os_dump_core(void) +{ + int pid; + + signal(SIGSEGV, SIG_DFL); + + /* + * We are about to SIGTERM this entire process group to ensure that + * nothing is around to run after the kernel exits. The + * kernel wants to abort, not die through SIGTERM, so we + * ignore it here. + */ + + signal(SIGTERM, SIG_IGN); + kill(0, SIGTERM); + /* + * Most of the other processes associated with this UML are + * likely sTopped, so give them a SIGCONT so they see the + * SIGTERM. + */ + kill(0, SIGCONT); + + /* + * Now, having sent signals to everyone but us, make sure they + * die by ptrace. Processes can survive what's been done to + * them so far - the mechanism I understand is receiving a + * SIGSEGV and segfaulting immediately upon return. There is + * always a SIGSEGV pending, and (I'm guessing) signals are + * processed in numeric order so the SIGTERM (signal 15 vs + * SIGSEGV being signal 11) is never handled. + * + * Run a waitpid loop until we get some kind of error. + * Hopefully, it's ECHILD, but there's not a lot we can do if + * it's something else. Tell os_kill_ptraced_process not to + * wait for the child to report its death because there's + * nothing reasonable to do if that fails. + */ + + while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) + os_kill_ptraced_process(pid, 0); + + uml_abort(); +} + +void um_early_printk(const char *s, unsigned int n) +{ + printf("%.*s", n, s); +} + +static int quiet_info; + +static int __init quiet_cmd_param(char *str, int *add) +{ + quiet_info = 1; + return 0; +} + +__uml_setup("quiet", quiet_cmd_param, +"quiet\n" +" Turns off information messages during boot.\n\n"); + +void os_info(const char *fmt, ...) +{ + va_list list; + + if (quiet_info) + return; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); +} + +void os_warn(const char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); +} diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules new file mode 100644 index 0000000000..a8b7d9dab0 --- /dev/null +++ b/arch/um/scripts/Makefile.rules @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 +# =========================================================================== +# arch/um: Generic definitions +# =========================================================================== + +USER_SINGLE_OBJS := \ + $(foreach f,$(patsubst %.o,%,$(obj-y)),$($(f)-objs)) +USER_OBJS += $(filter %_user.o,$(obj-y) $(USER_SINGLE_OBJS)) +USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) + +$(USER_OBJS:.o=.%): \ + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o) + +# These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of +# using it directly. +UNPROFILE_OBJS := $(foreach file,$(UNPROFILE_OBJS),$(obj)/$(file)) + +$(UNPROFILE_OBJS:.o=.%): \ + c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) $(CFLAGS_$(basetarget).o) + +$(USER_OBJS) $(UNPROFILE_OBJS): \ + CHECKFLAGS := $(patsubst $(NOSTDINC_FLAGS),,$(CHECKFLAGS)) + +# The stubs can't try to call mcount or update basic block data +define unprofile + $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) +endef |