summaryrefslogtreecommitdiffstats
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/.gitignore2
-rw-r--r--init/Kconfig2010
-rw-r--r--init/Makefile61
-rwxr-xr-xinit/build-version10
-rw-r--r--init/calibrate.c316
-rw-r--r--init/do_mounts.c674
-rw-r--r--init/do_mounts.h43
-rw-r--r--init/do_mounts_initrd.c154
-rw-r--r--init/do_mounts_rd.c334
-rw-r--r--init/init_task.c222
-rw-r--r--init/initramfs.c762
-rw-r--r--init/main.c1650
-rw-r--r--init/noinitramfs.c42
-rw-r--r--init/version-timestamp.c31
-rw-r--r--init/version.c55
15 files changed, 6366 insertions, 0 deletions
diff --git a/init/.gitignore b/init/.gitignore
new file mode 100644
index 000000000..cbbe270ce
--- /dev/null
+++ b/init/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/utsversion-tmp.h
diff --git a/init/Kconfig b/init/Kconfig
new file mode 100644
index 000000000..148704640
--- /dev/null
+++ b/init/Kconfig
@@ -0,0 +1,2010 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config CC_VERSION_TEXT
+ string
+ default "$(CC_VERSION_TEXT)"
+ help
+ This is used in unclear ways:
+
+ - Re-run Kconfig when the compiler is updated
+ The 'default' property references the environment variable,
+ CC_VERSION_TEXT so it is recorded in include/config/auto.conf.cmd.
+ When the compiler is updated, Kconfig will be invoked.
+
+ - Ensure full rebuild when the compiler is updated
+ include/linux/compiler-version.h contains this option in the comment
+ line so fixdep adds include/config/CC_VERSION_TEXT into the
+ auto-generated dependency. When the compiler is updated, syncconfig
+ will touch it and then every file will be rebuilt.
+
+config CC_IS_GCC
+ def_bool $(success,test "$(cc-name)" = GCC)
+
+config GCC_VERSION
+ int
+ default $(cc-version) if CC_IS_GCC
+ default 0
+
+config CC_IS_CLANG
+ def_bool $(success,test "$(cc-name)" = Clang)
+
+config CLANG_VERSION
+ int
+ default $(cc-version) if CC_IS_CLANG
+ default 0
+
+config AS_IS_GNU
+ def_bool $(success,test "$(as-name)" = GNU)
+
+config AS_IS_LLVM
+ def_bool $(success,test "$(as-name)" = LLVM)
+
+config AS_VERSION
+ int
+ # Use clang version if this is the integrated assembler
+ default CLANG_VERSION if AS_IS_LLVM
+ default $(as-version)
+
+config LD_IS_BFD
+ def_bool $(success,test "$(ld-name)" = BFD)
+
+config LD_VERSION
+ int
+ default $(ld-version) if LD_IS_BFD
+ default 0
+
+config LD_IS_LLD
+ def_bool $(success,test "$(ld-name)" = LLD)
+
+config LLD_VERSION
+ int
+ default $(ld-version) if LD_IS_LLD
+ default 0
+
+config RUST_IS_AVAILABLE
+ def_bool $(success,$(srctree)/scripts/rust_is_available.sh)
+ help
+ This shows whether a suitable Rust toolchain is available (found).
+
+ Please see Documentation/rust/quick-start.rst for instructions on how
+ to satisfy the build requirements of Rust support.
+
+ In particular, the Makefile target 'rustavailable' is useful to check
+ why the Rust toolchain is not being detected.
+
+config CC_CAN_LINK
+ bool
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag))
+
+config CC_CAN_LINK_STATIC
+ bool
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static)
+
+config CC_HAS_ASM_GOTO_OUTPUT
+ def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
+
+config CC_HAS_ASM_GOTO_TIED_OUTPUT
+ depends on CC_HAS_ASM_GOTO_OUTPUT
+ # Detect buggy gcc and clang, fixed in gcc-11 clang-14.
+ def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
+
+config TOOLS_SUPPORT_RELR
+ def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
+
+config CC_HAS_ASM_INLINE
+ def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
+
+config CC_HAS_NO_PROFILE_FN_ATTR
+ def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
+
+config PAHOLE_VERSION
+ int
+ default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
+
+config CONSTRUCTORS
+ bool
+
+config IRQ_WORK
+ bool
+
+config BUILDTIME_TABLE_SORT
+ bool
+
+config THREAD_INFO_IN_TASK
+ bool
+ help
+ Select this to move thread_info off the stack into task_struct. To
+ make this work, an arch will need to remove all thread_info fields
+ except flags and fix any runtime bugs.
+
+ One subtle change that will be needed is to use try_get_task_stack()
+ and put_task_stack() in save_thread_stack_tsk() and get_wchan().
+
+menu "General setup"
+
+config BROKEN
+ bool
+
+config BROKEN_ON_SMP
+ bool
+ depends on BROKEN || !SMP
+ default y
+
+config INIT_ENV_ARG_LIMIT
+ int
+ default 32 if !UML
+ default 128 if UML
+ help
+ Maximum of each of the number of arguments and environment
+ variables passed to init from the kernel command line.
+
+config COMPILE_TEST
+ bool "Compile also drivers which will not load"
+ depends on HAS_IOMEM
+ help
+ Some drivers can be compiled on a different platform than they are
+ intended to be run on. Despite they cannot be loaded there (or even
+ when they load they cannot be used due to missing HW support),
+ developers still, opposing to distributors, might want to build such
+ drivers to compile-test them.
+
+ If you are a developer and want to build everything available, say Y
+ here. If you are a user/distributor, say N here to exclude useless
+ drivers to be distributed.
+
+config WERROR
+ bool "Compile the kernel with warnings as errors"
+ default COMPILE_TEST
+ help
+ A kernel build should not cause any compiler warnings, and this
+ enables the '-Werror' (for C) and '-Dwarnings' (for Rust) flags
+ to enforce that rule by default.
+
+ However, if you have a new (or very old) compiler with odd and
+ unusual warnings, or you have some architecture with problems,
+ you may need to disable this config option in order to
+ successfully build the kernel.
+
+ If in doubt, say Y.
+
+config UAPI_HEADER_TEST
+ bool "Compile test UAPI headers"
+ depends on HEADERS_INSTALL && CC_CAN_LINK
+ help
+ Compile test headers exported to user-space to ensure they are
+ self-contained, i.e. compilable as standalone units.
+
+ If you are a developer or tester and want to ensure the exported
+ headers are self-contained, say Y here. Otherwise, choose N.
+
+config LOCALVERSION
+ string "Local version - append to kernel release"
+ help
+ Append an extra string to the end of your kernel version.
+ This will show up when you type uname, for example.
+ The string you set here will be appended after the contents of
+ any files with a filename matching localversion* in your
+ object and source tree, in that order. Your total string can
+ be a maximum of 64 characters.
+
+config LOCALVERSION_AUTO
+ bool "Automatically append version information to the version string"
+ default y
+ depends on !COMPILE_TEST
+ help
+ This will try to automatically determine if the current tree is a
+ release tree by looking for git tags that belong to the current
+ top of tree revision.
+
+ A string of the format -gxxxxxxxx will be added to the localversion
+ if a git-based tree is found. The string generated by this will be
+ appended after any matching localversion* files, and after the value
+ set in CONFIG_LOCALVERSION.
+
+ (The actual string used here is the first eight characters produced
+ by running the command:
+
+ $ git rev-parse --verify HEAD
+
+ which is done within the script "scripts/setlocalversion".)
+
+config BUILD_SALT
+ string "Build ID Salt"
+ default ""
+ help
+ The build ID is used to link binaries and their debug info. Setting
+ this option will use the value in the calculation of the build id.
+ This is mostly useful for distributions which want to ensure the
+ build is unique between builds. It's safe to leave the default.
+
+config HAVE_KERNEL_GZIP
+ bool
+
+config HAVE_KERNEL_BZIP2
+ bool
+
+config HAVE_KERNEL_LZMA
+ bool
+
+config HAVE_KERNEL_XZ
+ bool
+
+config HAVE_KERNEL_LZO
+ bool
+
+config HAVE_KERNEL_LZ4
+ bool
+
+config HAVE_KERNEL_ZSTD
+ bool
+
+config HAVE_KERNEL_UNCOMPRESSED
+ bool
+
+choice
+ prompt "Kernel compression mode"
+ default KERNEL_GZIP
+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD || HAVE_KERNEL_UNCOMPRESSED
+ help
+ The linux kernel is a kind of self-extracting executable.
+ Several compression algorithms are available, which differ
+ in efficiency, compression and decompression speed.
+ Compression speed is only relevant when building a kernel.
+ Decompression speed is relevant at each boot.
+
+ If you have any problems with bzip2 or lzma compressed
+ kernels, mail me (Alain Knaff) <alain@knaff.lu>. (An older
+ version of this functionality (bzip2 only), for 2.4, was
+ supplied by Christian Ludwig)
+
+ High compression options are mostly useful for users, who
+ are low on disk space (embedded systems), but for whom ram
+ size matters less.
+
+ If in doubt, select 'gzip'
+
+config KERNEL_GZIP
+ bool "Gzip"
+ depends on HAVE_KERNEL_GZIP
+ help
+ The old and tried gzip compression. It provides a good balance
+ between compression ratio and decompression speed.
+
+config KERNEL_BZIP2
+ bool "Bzip2"
+ depends on HAVE_KERNEL_BZIP2
+ help
+ Its compression ratio and speed is intermediate.
+ Decompression speed is slowest among the choices. The kernel
+ size is about 10% smaller with bzip2, in comparison to gzip.
+ Bzip2 uses a large amount of memory. For modern kernels you
+ will need at least 8MB RAM or more for booting.
+
+config KERNEL_LZMA
+ bool "LZMA"
+ depends on HAVE_KERNEL_LZMA
+ help
+ This compression algorithm's ratio is best. Decompression speed
+ is between gzip and bzip2. Compression is slowest.
+ The kernel size is about 33% smaller with LZMA in comparison to gzip.
+
+config KERNEL_XZ
+ bool "XZ"
+ depends on HAVE_KERNEL_XZ
+ help
+ XZ uses the LZMA2 algorithm and instruction set specific
+ BCJ filters which can improve compression ratio of executable
+ code. The size of the kernel is about 30% smaller with XZ in
+ comparison to gzip. On architectures for which there is a BCJ
+ filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
+ will create a few percent smaller kernel than plain LZMA.
+
+ The speed is about the same as with LZMA: The decompression
+ speed of XZ is better than that of bzip2 but worse than gzip
+ and LZO. Compression is slow.
+
+config KERNEL_LZO
+ bool "LZO"
+ depends on HAVE_KERNEL_LZO
+ help
+ Its compression ratio is the poorest among the choices. The kernel
+ size is about 10% bigger than gzip; however its speed
+ (both compression and decompression) is the fastest.
+
+config KERNEL_LZ4
+ bool "LZ4"
+ depends on HAVE_KERNEL_LZ4
+ help
+ LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
+ A preliminary version of LZ4 de/compression tool is available at
+ <https://code.google.com/p/lz4/>.
+
+ Its compression ratio is worse than LZO. The size of the kernel
+ is about 8% bigger than LZO. But the decompression speed is
+ faster than LZO.
+
+config KERNEL_ZSTD
+ bool "ZSTD"
+ depends on HAVE_KERNEL_ZSTD
+ help
+ ZSTD is a compression algorithm targeting intermediate compression
+ with fast decompression speed. It will compress better than GZIP and
+ decompress around the same speed as LZO, but slower than LZ4. You
+ will need at least 192 KB RAM or more for booting. The zstd command
+ line tool is required for compression.
+
+config KERNEL_UNCOMPRESSED
+ bool "None"
+ depends on HAVE_KERNEL_UNCOMPRESSED
+ help
+ Produce uncompressed kernel image. This option is usually not what
+ you want. It is useful for debugging the kernel in slow simulation
+ environments, where decompressing and moving the kernel is awfully
+ slow. This option allows early boot code to skip the decompressor
+ and jump right at uncompressed kernel image.
+
+endchoice
+
+config DEFAULT_INIT
+ string "Default init path"
+ default ""
+ help
+ This option determines the default init for the system if no init=
+ option is passed on the kernel command line. If the requested path is
+ not present, we will still then move on to attempting further
+ locations (e.g. /sbin/init, etc). If this is empty, we will just use
+ the fallback list when init= is not passed.
+
+config DEFAULT_HOSTNAME
+ string "Default hostname"
+ default "(none)"
+ help
+ This option determines the default system hostname before userspace
+ calls sethostname(2). The kernel traditionally uses "(none)" here,
+ but you may wish to use a different default here to make a minimal
+ system more usable with less configuration.
+
+config SYSVIPC
+ bool "System V IPC"
+ help
+ Inter Process Communication is a suite of library functions and
+ system calls which let processes (running programs) synchronize and
+ exchange information. It is generally considered to be a good thing,
+ and some programs won't run unless you say Y here. In particular, if
+ you want to run the DOS emulator dosemu under Linux (read the
+ DOSEMU-HOWTO, available from <http://www.tldp.org/docs.html#howto>),
+ you'll need to say Y here.
+
+ You can find documentation about IPC with "info ipc" and also in
+ section 6.4 of the Linux Programmer's Guide, available from
+ <http://www.tldp.org/guides.html>.
+
+config SYSVIPC_SYSCTL
+ bool
+ depends on SYSVIPC
+ depends on SYSCTL
+ default y
+
+config SYSVIPC_COMPAT
+ def_bool y
+ depends on COMPAT && SYSVIPC
+
+config POSIX_MQUEUE
+ bool "POSIX Message Queues"
+ depends on NET
+ help
+ POSIX variant of message queues is a part of IPC. In POSIX message
+ queues every message has a priority which decides about succession
+ of receiving it by a process. If you want to compile and run
+ programs written e.g. for Solaris with use of its POSIX message
+ queues (functions mq_*) say Y here.
+
+ POSIX message queues are visible as a filesystem called 'mqueue'
+ and can be mounted somewhere if you want to do filesystem
+ operations on message queues.
+
+ If unsure, say Y.
+
+config POSIX_MQUEUE_SYSCTL
+ bool
+ depends on POSIX_MQUEUE
+ depends on SYSCTL
+ default y
+
+config WATCH_QUEUE
+ bool "General notification queue"
+ default n
+ help
+
+ This is a general notification queue for the kernel to pass events to
+ userspace by splicing them into pipes. It can be used in conjunction
+ with watches for key/keyring change notifications and device
+ notifications.
+
+ See Documentation/core-api/watch_queue.rst
+
+config CROSS_MEMORY_ATTACH
+ bool "Enable process_vm_readv/writev syscalls"
+ depends on MMU
+ default y
+ help
+ Enabling this option adds the system calls process_vm_readv and
+ process_vm_writev which allow a process with the correct privileges
+ to directly read from or write to another process' address space.
+ See the man page for more details.
+
+config USELIB
+ bool "uselib syscall (for libc5 and earlier)"
+ default ALPHA || M68K || SPARC
+ help
+ This option enables the uselib syscall, a system call used in the
+ dynamic linker from libc5 and earlier. glibc does not use this
+ system call. If you intend to run programs built on libc5 or
+ earlier, you may need to enable this syscall. Current systems
+ running glibc can safely disable this.
+
+config AUDIT
+ bool "Auditing support"
+ depends on NET
+ help
+ Enable auditing infrastructure that can be used with another
+ kernel subsystem, such as SELinux (which requires this for
+ logging of avc messages output). System call auditing is included
+ on architectures which support it.
+
+config HAVE_ARCH_AUDITSYSCALL
+ bool
+
+config AUDITSYSCALL
+ def_bool y
+ depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
+ select FSNOTIFY
+
+source "kernel/irq/Kconfig"
+source "kernel/time/Kconfig"
+source "kernel/bpf/Kconfig"
+source "kernel/Kconfig.preempt"
+
+menu "CPU/Task time and stats accounting"
+
+config VIRT_CPU_ACCOUNTING
+ bool
+
+choice
+ prompt "Cputime accounting"
+ default TICK_CPU_ACCOUNTING
+
+# Kind of a stub config for the pure tick based cputime accounting
+config TICK_CPU_ACCOUNTING
+ bool "Simple tick based cputime accounting"
+ depends on !S390 && !NO_HZ_FULL
+ help
+ This is the basic tick based cputime accounting that maintains
+ statistics about user, system and idle time spent on per jiffies
+ granularity.
+
+ If unsure, say Y.
+
+config VIRT_CPU_ACCOUNTING_NATIVE
+ bool "Deterministic task and CPU time accounting"
+ depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL
+ select VIRT_CPU_ACCOUNTING
+ help
+ Select this option to enable more accurate task and CPU time
+ accounting. This is done by reading a CPU counter on each
+ kernel entry and exit and on transitions within the kernel
+ between system, softirq and hardirq state, so there is a
+ small performance impact. In the case of s390 or IBM POWER > 5,
+ this also enables accounting of stolen time on logically-partitioned
+ systems.
+
+config VIRT_CPU_ACCOUNTING_GEN
+ bool "Full dynticks CPU time accounting"
+ depends on HAVE_CONTEXT_TRACKING_USER
+ depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
+ depends on GENERIC_CLOCKEVENTS
+ select VIRT_CPU_ACCOUNTING
+ select CONTEXT_TRACKING_USER
+ help
+ Select this option to enable task and CPU time accounting on full
+ dynticks systems. This accounting is implemented by watching every
+ kernel-user boundaries using the context tracking subsystem.
+ The accounting is thus performed at the expense of some significant
+ overhead.
+
+ For now this is only useful if you are working on the full
+ dynticks subsystem development.
+
+ If unsure, say N.
+
+endchoice
+
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
+ help
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
+config HAVE_SCHED_AVG_IRQ
+ def_bool y
+ depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
+ depends on SMP
+
+config SCHED_THERMAL_PRESSURE
+ bool
+ default y if ARM && ARM_CPU_TOPOLOGY
+ default y if ARM64
+ depends on SMP
+ depends on CPU_FREQ_THERMAL
+ help
+ Select this option to enable thermal pressure accounting in the
+ scheduler. Thermal pressure is the value conveyed to the scheduler
+ that reflects the reduction in CPU compute capacity resulted from
+ thermal throttling. Thermal throttling occurs when the performance of
+ a CPU is capped due to high operating temperatures.
+
+ If selected, the scheduler will be able to balance tasks accordingly,
+ i.e. put less load on throttled CPUs than on non/less throttled ones.
+
+ This requires the architecture to implement
+ arch_update_thermal_pressure() and arch_scale_thermal_pressure().
+
+config BSD_PROCESS_ACCT
+ bool "BSD Process Accounting"
+ depends on MULTIUSER
+ help
+ If you say Y here, a user level program will be able to instruct the
+ kernel (via a special system call) to write process accounting
+ information to a file: whenever a process exits, information about
+ that process will be appended to the file by the kernel. The
+ information includes things such as creation time, owning user,
+ command name, memory usage, controlling terminal etc. (the complete
+ list is in the struct acct in <file:include/linux/acct.h>). It is
+ up to the user level program to do useful things with this
+ information. This is generally a good idea, so say Y.
+
+config BSD_PROCESS_ACCT_V3
+ bool "BSD Process Accounting version 3 file format"
+ depends on BSD_PROCESS_ACCT
+ default n
+ help
+ If you say Y here, the process accounting information is written
+ in a new file format that also logs the process IDs of each
+ process and its parent. Note that this file format is incompatible
+ with previous v0/v1/v2 file formats, so you will need updated tools
+ for processing it. A preliminary version of these tools is available
+ at <http://www.gnu.org/software/acct/>.
+
+config TASKSTATS
+ bool "Export task/process statistics through netlink"
+ depends on NET
+ depends on MULTIUSER
+ default n
+ help
+ Export selected statistics for tasks/processes through the
+ generic netlink interface. Unlike BSD process accounting, the
+ statistics are available during the lifetime of tasks/processes as
+ responses to commands. Like BSD accounting, they are sent to user
+ space on task exit.
+
+ Say N if unsure.
+
+config TASK_DELAY_ACCT
+ bool "Enable per-task delay accounting"
+ depends on TASKSTATS
+ select SCHED_INFO
+ help
+ Collect information on time spent by a task waiting for system
+ resources like cpu, synchronous block I/O completion and swapping
+ in pages. Such statistics can help in setting a task's priorities
+ relative to other tasks for cpu, io, rss limits etc.
+
+ Say N if unsure.
+
+config TASK_XACCT
+ bool "Enable extended accounting over taskstats"
+ depends on TASKSTATS
+ help
+ Collect extended task accounting data and send the data
+ to userland for processing over the taskstats interface.
+
+ Say N if unsure.
+
+config TASK_IO_ACCOUNTING
+ bool "Enable per-task storage I/O accounting"
+ depends on TASK_XACCT
+ help
+ Collect information on the number of bytes of storage I/O which this
+ task has caused.
+
+ Say N if unsure.
+
+config PSI
+ bool "Pressure stall information tracking"
+ select KERNFS
+ help
+ Collect metrics that indicate how overcommitted the CPU, memory,
+ and IO capacity are in the system.
+
+ If you say Y here, the kernel will create /proc/pressure/ with the
+ pressure statistics files cpu, memory, and io. These will indicate
+ the share of walltime in which some or all tasks in the system are
+ delayed due to contention of the respective resource.
+
+ In kernels with cgroup support, cgroups (cgroup2 only) will
+ have cpu.pressure, memory.pressure, and io.pressure files,
+ which aggregate pressure stalls for the grouped tasks only.
+
+ For more details see Documentation/accounting/psi.rst.
+
+ Say N if unsure.
+
+config PSI_DEFAULT_DISABLED
+ bool "Require boot parameter to enable pressure stall information tracking"
+ default n
+ depends on PSI
+ help
+ If set, pressure stall information tracking will be disabled
+ per default but can be enabled through passing psi=1 on the
+ kernel commandline during boot.
+
+ This feature adds some code to the task wakeup and sleep
+ paths of the scheduler. The overhead is too low to affect
+ common scheduling-intense workloads in practice (such as
+ webservers, memcache), but it does show up in artificial
+ scheduler stress tests, such as hackbench.
+
+ If you are paranoid and not sure what the kernel will be
+ used for, say Y.
+
+ Say N if unsure.
+
+endmenu # "CPU/Task time and stats accounting"
+
+config CPU_ISOLATION
+ bool "CPU isolation"
+ depends on SMP || COMPILE_TEST
+ default y
+ help
+ Make sure that CPUs running critical tasks are not disturbed by
+ any source of "noise" such as unbound workqueues, timers, kthreads...
+ Unbound jobs get offloaded to housekeeping CPUs. This is driven by
+ the "isolcpus=" boot parameter.
+
+ Say Y if unsure.
+
+source "kernel/rcu/Kconfig"
+
+config BUILD_BIN2C
+ bool
+ default n
+
+config IKCONFIG
+ tristate "Kernel .config support"
+ help
+ This option enables the complete Linux kernel ".config" file
+ contents to be saved in the kernel. It provides documentation
+ of which kernel options are used in a running kernel or in an
+ on-disk kernel. This information can be extracted from the kernel
+ image file with the script scripts/extract-ikconfig and used as
+ input to rebuild the current kernel or to build another kernel.
+ It can also be extracted from a running kernel by reading
+ /proc/config.gz if enabled (below).
+
+config IKCONFIG_PROC
+ bool "Enable access to .config through /proc/config.gz"
+ depends on IKCONFIG && PROC_FS
+ help
+ This option enables access to the kernel configuration file
+ through /proc/config.gz.
+
+config IKHEADERS
+ tristate "Enable kernel headers through /sys/kernel/kheaders.tar.xz"
+ depends on SYSFS
+ help
+ This option enables access to the in-kernel headers that are generated during
+ the build process. These can be used to build eBPF tracing programs,
+ or similar programs. If you build the headers as a module, a module called
+ kheaders.ko is built which can be loaded on-demand to get access to headers.
+
+config LOG_BUF_SHIFT
+ int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ range 12 25
+ default 17
+ depends on PRINTK
+ help
+ Select the minimal kernel log buffer size as a power of 2.
+ The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+ parameter, see below. Any higher size also might be forced
+ by "log_buf_len" boot parameter.
+
+ Examples:
+ 17 => 128 KB
+ 16 => 64 KB
+ 15 => 32 KB
+ 14 => 16 KB
+ 13 => 8 KB
+ 12 => 4 KB
+
+config LOG_CPU_MAX_BUF_SHIFT
+ int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+ depends on SMP
+ range 0 21
+ default 12 if !BASE_SMALL
+ default 0 if BASE_SMALL
+ depends on PRINTK
+ help
+ This option allows to increase the default ring buffer size
+ according to the number of CPUs. The value defines the contribution
+ of each CPU as a power of 2. The used space is typically only few
+ lines however it might be much more when problems are reported,
+ e.g. backtraces.
+
+ The increased size means that a new buffer has to be allocated and
+ the original static one is unused. It makes sense only on systems
+ with more CPUs. Therefore this value is used only when the sum of
+ contributions is greater than the half of the default kernel ring
+ buffer as defined by LOG_BUF_SHIFT. The default values are set
+ so that more than 16 CPUs are needed to trigger the allocation.
+
+ Also this option is ignored when "log_buf_len" kernel parameter is
+ used as it forces an exact (power of two) size of the ring buffer.
+
+ The number of possible CPUs is used for this computation ignoring
+ hotplugging making the computation optimal for the worst case
+ scenario while allowing a simple algorithm to be used from bootup.
+
+ Examples shift values and their meaning:
+ 17 => 128 KB for each CPU
+ 16 => 64 KB for each CPU
+ 15 => 32 KB for each CPU
+ 14 => 16 KB for each CPU
+ 13 => 8 KB for each CPU
+ 12 => 4 KB for each CPU
+
+config PRINTK_SAFE_LOG_BUF_SHIFT
+ int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)"
+ range 10 21
+ default 13
+ depends on PRINTK
+ help
+ Select the size of an alternate printk per-CPU buffer where messages
+ printed from usafe contexts are temporary stored. One example would
+ be NMI messages, another one - printk recursion. The messages are
+ copied to the main log buffer in a safe context to avoid a deadlock.
+ The value defines the size as a power of 2.
+
+ Those messages are rare and limited. The largest one is when
+ a backtrace is printed. It usually fits into 4KB. Select
+ 8KB if you want to be on the safe side.
+
+ Examples:
+ 17 => 128 KB for each CPU
+ 16 => 64 KB for each CPU
+ 15 => 32 KB for each CPU
+ 14 => 16 KB for each CPU
+ 13 => 8 KB for each CPU
+ 12 => 4 KB for each CPU
+
+config PRINTK_INDEX
+ bool "Printk indexing debugfs interface"
+ depends on PRINTK && DEBUG_FS
+ help
+ Add support for indexing of all printk formats known at compile time
+ at <debugfs>/printk/index/<module>.
+
+ This can be used as part of maintaining daemons which monitor
+ /dev/kmsg, as it permits auditing the printk formats present in a
+ kernel, allowing detection of cases where monitored printks are
+ changed or no longer present.
+
+ There is no additional runtime cost to printk with this enabled.
+
+#
+# Architectures with an unreliable sched_clock() should select this:
+#
+config HAVE_UNSTABLE_SCHED_CLOCK
+ bool
+
+config GENERIC_SCHED_CLOCK
+ bool
+
+menu "Scheduler features"
+
+config UCLAMP_TASK
+ bool "Enable utilization clamping for RT/FAIR tasks"
+ depends on CPU_FREQ_GOV_SCHEDUTIL
+ help
+ This feature enables the scheduler to track the clamped utilization
+ of each CPU based on RUNNABLE tasks scheduled on that CPU.
+
+ With this option, the user can specify the min and max CPU
+ utilization allowed for RUNNABLE tasks. The max utilization defines
+ the maximum frequency a task should use while the min utilization
+ defines the minimum frequency it should use.
+
+ Both min and max utilization clamp values are hints to the scheduler,
+ aiming at improving its frequency selection policy, but they do not
+ enforce or grant any specific bandwidth for tasks.
+
+ If in doubt, say N.
+
+config UCLAMP_BUCKETS_COUNT
+ int "Number of supported utilization clamp buckets"
+ range 5 20
+ default 5
+ depends on UCLAMP_TASK
+ help
+ Defines the number of clamp buckets to use. The range of each bucket
+ will be SCHED_CAPACITY_SCALE/UCLAMP_BUCKETS_COUNT. The higher the
+ number of clamp buckets the finer their granularity and the higher
+ the precision of clamping aggregation and tracking at run-time.
+
+ For example, with the minimum configuration value we will have 5
+ clamp buckets tracking 20% utilization each. A 25% boosted tasks will
+ be refcounted in the [20..39]% bucket and will set the bucket clamp
+ effective value to 25%.
+ If a second 30% boosted task should be co-scheduled on the same CPU,
+ that task will be refcounted in the same bucket of the first task and
+ it will boost the bucket clamp effective value to 30%.
+ The clamp effective value of a bucket is reset to its nominal value
+ (20% in the example above) when there are no more tasks refcounted in
+ that bucket.
+
+ An additional boost/capping margin can be added to some tasks. In the
+ example above the 25% task will be boosted to 30% until it exits the
+ CPU. If that should be considered not acceptable on certain systems,
+ it's always possible to reduce the margin by increasing the number of
+ clamp buckets to trade off used memory for run-time tracking
+ precision.
+
+ If in doubt, use the default value.
+
+endmenu
+
+#
+# For architectures that want to enable the support for NUMA-affine scheduler
+# balancing logic:
+#
+config ARCH_SUPPORTS_NUMA_BALANCING
+ bool
+
+#
+# For architectures that prefer to flush all TLBs after a number of pages
+# are unmapped instead of sending one IPI per page to flush. The architecture
+# must provide guarantees on what happens if a clean TLB cache entry is
+# written after the unmap. Details are in mm/rmap.c near the check for
+# should_defer_flush. The architecture should also consider if the full flush
+# and the refill costs are offset by the savings of sending fewer IPIs.
+config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ bool
+
+config CC_HAS_INT128
+ def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
+
+config CC_IMPLICIT_FALLTHROUGH
+ string
+ default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
+ default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
+
+# Currently, disable gcc-11+ array-bounds globally.
+# It's still broken in gcc-13, so no upper bound yet.
+config GCC11_NO_ARRAY_BOUNDS
+ def_bool y
+
+config CC_NO_ARRAY_BOUNDS
+ bool
+ default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
+
+#
+# For architectures that know their GCC __int128 support is sound
+#
+config ARCH_SUPPORTS_INT128
+ bool
+
+# For architectures that (ab)use NUMA to represent different memory regions
+# all cpu-local but of different latencies, such as SuperH.
+#
+config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ bool
+
+config NUMA_BALANCING
+ bool "Memory placement aware NUMA scheduler"
+ depends on ARCH_SUPPORTS_NUMA_BALANCING
+ depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
+ help
+ This option adds support for automatic NUMA aware memory/task placement.
+ The mechanism is quite primitive and is based on migrating memory when
+ it has references to the node the task is running on.
+
+ This system will be inactive on UMA systems.
+
+config NUMA_BALANCING_DEFAULT_ENABLED
+ bool "Automatically enable NUMA aware memory/task placement"
+ default y
+ depends on NUMA_BALANCING
+ help
+ If set, automatic NUMA balancing will be enabled if running on a NUMA
+ machine.
+
+menuconfig CGROUPS
+ bool "Control Group support"
+ select KERNFS
+ help
+ This option adds support for grouping sets of processes together, for
+ use with process control subsystems such as Cpusets, CFS, memory
+ controls or device isolation.
+ See
+ - Documentation/scheduler/sched-design-CFS.rst (CFS)
+ - Documentation/admin-guide/cgroup-v1/ (features for grouping, isolation
+ and resource control)
+
+ Say N if unsure.
+
+if CGROUPS
+
+config PAGE_COUNTER
+ bool
+
+config CGROUP_FAVOR_DYNMODS
+ bool "Favor dynamic modification latency reduction by default"
+ help
+ This option enables the "favordynmods" mount option by default
+ which reduces the latencies of dynamic cgroup modifications such
+ as task migrations and controller on/offs at the cost of making
+ hot path operations such as forks and exits more expensive.
+
+ Say N if unsure.
+
+config MEMCG
+ bool "Memory controller"
+ select PAGE_COUNTER
+ select EVENTFD
+ help
+ Provides control over the memory footprint of tasks in a cgroup.
+
+config MEMCG_KMEM
+ bool
+ depends on MEMCG && !SLOB
+ default y
+
+config BLK_CGROUP
+ bool "IO controller"
+ depends on BLOCK
+ default n
+ help
+ Generic block IO controller cgroup interface. This is the common
+ cgroup interface which should be used by various IO controlling
+ policies.
+
+ Currently, CFQ IO scheduler uses it to recognize task groups and
+ control disk bandwidth allocation (proportional time slice allocation)
+ to such task groups. It is also used by bio throttling logic in
+ block layer to implement upper limit in IO rates on a device.
+
+ This option only enables generic Block IO controller infrastructure.
+ One needs to also enable actual IO controlling logic/policy. For
+ enabling proportional weight division of disk bandwidth in CFQ, set
+ CONFIG_BFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
+ CONFIG_BLK_DEV_THROTTLING=y.
+
+ See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
+
+config CGROUP_WRITEBACK
+ bool
+ depends on MEMCG && BLK_CGROUP
+ default y
+
+menuconfig CGROUP_SCHED
+ bool "CPU controller"
+ default n
+ help
+ This feature lets CPU scheduler recognize task groups and control CPU
+ bandwidth allocation to such task groups. It uses cgroups to group
+ tasks.
+
+if CGROUP_SCHED
+config FAIR_GROUP_SCHED
+ bool "Group scheduling for SCHED_OTHER"
+ depends on CGROUP_SCHED
+ default CGROUP_SCHED
+
+config CFS_BANDWIDTH
+ bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
+ depends on FAIR_GROUP_SCHED
+ default n
+ help
+ This option allows users to define CPU bandwidth rates (limits) for
+ tasks running within the fair group scheduler. Groups with no limit
+ set are considered to be unconstrained and will run with no
+ restriction.
+ See Documentation/scheduler/sched-bwc.rst for more information.
+
+config RT_GROUP_SCHED
+ bool "Group scheduling for SCHED_RR/FIFO"
+ depends on CGROUP_SCHED
+ default n
+ help
+ This feature lets you explicitly allocate real CPU bandwidth
+ to task groups. If enabled, it will also make it impossible to
+ schedule realtime tasks for non-root users until you allocate
+ realtime bandwidth for them.
+ See Documentation/scheduler/sched-rt-group.rst for more information.
+
+endif #CGROUP_SCHED
+
+config UCLAMP_TASK_GROUP
+ bool "Utilization clamping per group of tasks"
+ depends on CGROUP_SCHED
+ depends on UCLAMP_TASK
+ default n
+ help
+ This feature enables the scheduler to track the clamped utilization
+ of each CPU based on RUNNABLE tasks currently scheduled on that CPU.
+
+ When this option is enabled, the user can specify a min and max
+ CPU bandwidth which is allowed for each single task in a group.
+ The max bandwidth allows to clamp the maximum frequency a task
+ can use, while the min bandwidth allows to define a minimum
+ frequency a task will always use.
+
+ When task group based utilization clamping is enabled, an eventually
+ specified task-specific clamp value is constrained by the cgroup
+ specified clamp value. Both minimum and maximum task clamping cannot
+ be bigger than the corresponding clamping defined at task group level.
+
+ If in doubt, say N.
+
+config CGROUP_PIDS
+ bool "PIDs controller"
+ help
+ Provides enforcement of process number limits in the scope of a
+ cgroup. Any attempt to fork more processes than is allowed in the
+ cgroup will fail. PIDs are fundamentally a global resource because it
+ is fairly trivial to reach PID exhaustion before you reach even a
+ conservative kmemcg limit. As a result, it is possible to grind a
+ system to halt without being limited by other cgroup policies. The
+ PIDs controller is designed to stop this from happening.
+
+ It should be noted that organisational operations (such as attaching
+ to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
+ since the PIDs limit only affects a process's ability to fork, not to
+ attach to a cgroup.
+
+config CGROUP_RDMA
+ bool "RDMA controller"
+ help
+ Provides enforcement of RDMA resources defined by IB stack.
+ It is fairly easy for consumers to exhaust RDMA resources, which
+ can result into resource unavailability to other consumers.
+ RDMA controller is designed to stop this from happening.
+ Attaching processes with active RDMA resources to the cgroup
+ hierarchy is allowed even if can cross the hierarchy's limit.
+
+config CGROUP_FREEZER
+ bool "Freezer controller"
+ help
+ Provides a way to freeze and unfreeze all tasks in a
+ cgroup.
+
+ This option affects the ORIGINAL cgroup interface. The cgroup2 memory
+ controller includes important in-kernel memory consumers per default.
+
+ If you're using cgroup2, say N.
+
+config CGROUP_HUGETLB
+ bool "HugeTLB controller"
+ depends on HUGETLB_PAGE
+ select PAGE_COUNTER
+ default n
+ help
+ Provides a cgroup controller for HugeTLB pages.
+ When you enable this, you can put a per cgroup limit on HugeTLB usage.
+ The limit is enforced during page fault. Since HugeTLB doesn't
+ support page reclaim, enforcing the limit at page fault time implies
+ that, the application will get SIGBUS signal if it tries to access
+ HugeTLB pages beyond its limit. This requires the application to know
+ beforehand how much HugeTLB pages it would require for its use. The
+ control group is tracked in the third page lru pointer. This means
+ that we cannot use the controller with huge page less than 3 pages.
+
+config CPUSETS
+ bool "Cpuset controller"
+ depends on SMP
+ help
+ This option will let you create and manage CPUSETs which
+ allow dynamically partitioning a system into sets of CPUs and
+ Memory Nodes and assigning tasks to run only within those sets.
+ This is primarily useful on large SMP or NUMA systems.
+
+ Say N if unsure.
+
+config PROC_PID_CPUSET
+ bool "Include legacy /proc/<pid>/cpuset file"
+ depends on CPUSETS
+ default y
+
+config CGROUP_DEVICE
+ bool "Device controller"
+ help
+ Provides a cgroup controller implementing whitelists for
+ devices which a process in the cgroup can mknod or open.
+
+config CGROUP_CPUACCT
+ bool "Simple CPU accounting controller"
+ help
+ Provides a simple controller for monitoring the
+ total CPU consumed by the tasks in a cgroup.
+
+config CGROUP_PERF
+ bool "Perf controller"
+ depends on PERF_EVENTS
+ help
+ This option extends the perf per-cpu mode to restrict monitoring
+ to threads which belong to the cgroup specified and run on the
+ designated cpu. Or this can be used to have cgroup ID in samples
+ so that it can monitor performance events among cgroups.
+
+ Say N if unsure.
+
+config CGROUP_BPF
+ bool "Support for eBPF programs attached to cgroups"
+ depends on BPF_SYSCALL
+ select SOCK_CGROUP_DATA
+ help
+ Allow attaching eBPF programs to a cgroup using the bpf(2)
+ syscall command BPF_PROG_ATTACH.
+
+ In which context these programs are accessed depends on the type
+ of attachment. For instance, programs that are attached using
+ BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
+ inet sockets.
+
+config CGROUP_MISC
+ bool "Misc resource controller"
+ default n
+ help
+ Provides a controller for miscellaneous resources on a host.
+
+ Miscellaneous scalar resources are the resources on the host system
+ which cannot be abstracted like the other cgroups. This controller
+ tracks and limits the miscellaneous resources used by a process
+ attached to a cgroup hierarchy.
+
+ For more information, please check misc cgroup section in
+ /Documentation/admin-guide/cgroup-v2.rst.
+
+config CGROUP_DEBUG
+ bool "Debug controller"
+ default n
+ depends on DEBUG_KERNEL
+ help
+ This option enables a simple controller that exports
+ debugging information about the cgroups framework. This
+ controller is for control cgroup debugging only. Its
+ interfaces are not stable.
+
+ Say N.
+
+config SOCK_CGROUP_DATA
+ bool
+ default n
+
+endif # CGROUPS
+
+menuconfig NAMESPACES
+ bool "Namespaces support" if EXPERT
+ depends on MULTIUSER
+ default !EXPERT
+ help
+ Provides the way to make tasks work with different objects using
+ the same id. For example same IPC id may refer to different objects
+ or same user id or pid may refer to different tasks when used in
+ different namespaces.
+
+if NAMESPACES
+
+config UTS_NS
+ bool "UTS namespace"
+ default y
+ help
+ In this namespace tasks see different info provided with the
+ uname() system call
+
+config TIME_NS
+ bool "TIME namespace"
+ depends on GENERIC_VDSO_TIME_NS
+ default y
+ help
+ In this namespace boottime and monotonic clocks can be set.
+ The time will keep going with the same pace.
+
+config IPC_NS
+ bool "IPC namespace"
+ depends on (SYSVIPC || POSIX_MQUEUE)
+ default y
+ help
+ In this namespace tasks work with IPC ids which correspond to
+ different IPC objects in different namespaces.
+
+config USER_NS
+ bool "User namespace"
+ default n
+ help
+ This allows containers, i.e. vservers, to use user namespaces
+ to provide different user info for different servers.
+
+ When user namespaces are enabled in the kernel it is
+ recommended that the MEMCG option also be enabled and that
+ user-space use the memory control groups to limit the amount
+ of memory a memory unprivileged users can use.
+
+ If unsure, say N.
+
+config PID_NS
+ bool "PID Namespaces"
+ default y
+ help
+ Support process id namespaces. This allows having multiple
+ processes with the same pid as long as they are in different
+ pid namespaces. This is a building block of containers.
+
+config NET_NS
+ bool "Network namespace"
+ depends on NET
+ default y
+ help
+ Allow user space to create what appear to be multiple instances
+ of the network stack.
+
+endif # NAMESPACES
+
+config CHECKPOINT_RESTORE
+ bool "Checkpoint/restore support"
+ depends on PROC_FS
+ select PROC_CHILDREN
+ select KCMP
+ default n
+ help
+ Enables additional kernel features in a sake of checkpoint/restore.
+ In particular it adds auxiliary prctl codes to setup process text,
+ data and heap segment sizes, and a few additional /proc filesystem
+ entries.
+
+ If unsure, say N here.
+
+config SCHED_AUTOGROUP
+ bool "Automatic process group scheduling"
+ select CGROUPS
+ select CGROUP_SCHED
+ select FAIR_GROUP_SCHED
+ help
+ This option optimizes the scheduler for common desktop workloads by
+ automatically creating and populating task groups. This separation
+ of workloads isolates aggressive CPU burners (like build jobs) from
+ desktop applications. Task group autogeneration is currently based
+ upon task session.
+
+config SYSFS_DEPRECATED
+ bool "Enable deprecated sysfs features to support old userspace tools"
+ depends on SYSFS
+ default n
+ help
+ This option adds code that switches the layout of the "block" class
+ devices, to not show up in /sys/class/block/, but only in
+ /sys/block/.
+
+ This switch is only active when the sysfs.deprecated=1 boot option is
+ passed or the SYSFS_DEPRECATED_V2 option is set.
+
+ This option allows new kernels to run on old distributions and tools,
+ which might get confused by /sys/class/block/. Since 2007/2008 all
+ major distributions and tools handle this just fine.
+
+ Recent distributions and userspace tools after 2009/2010 depend on
+ the existence of /sys/class/block/, and will not work with this
+ option enabled.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here.
+
+config SYSFS_DEPRECATED_V2
+ bool "Enable deprecated sysfs features by default"
+ default n
+ depends on SYSFS
+ depends on SYSFS_DEPRECATED
+ help
+ Enable deprecated sysfs by default.
+
+ See the CONFIG_SYSFS_DEPRECATED option for more details about this
+ option.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here. Even then, odds are you would not need it
+ enabled, you can always pass the boot option if absolutely necessary.
+
+config RELAY
+ bool "Kernel->user space relay support (formerly relayfs)"
+ select IRQ_WORK
+ help
+ This option enables support for relay interface support in
+ certain file systems (such as debugfs).
+ It is designed to provide an efficient mechanism for tools and
+ facilities to relay large amounts of data from kernel space to
+ user space.
+
+ If unsure, say N.
+
+config BLK_DEV_INITRD
+ bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
+ help
+ The initial RAM filesystem is a ramfs which is loaded by the
+ boot loader (loadlin or lilo) and that is mounted as root
+ before the normal boot procedure. It is typically used to
+ load modules needed to mount the "real" root file system,
+ etc. See <file:Documentation/admin-guide/initrd.rst> for details.
+
+ If RAM disk support (BLK_DEV_RAM) is also included, this
+ also enables initial RAM disk (initrd) support and adds
+ 15 Kbytes (more on some other architectures) to the kernel size.
+
+ If unsure say Y.
+
+if BLK_DEV_INITRD
+
+source "usr/Kconfig"
+
+endif
+
+config BOOT_CONFIG
+ bool "Boot config support"
+ select BLK_DEV_INITRD if !BOOT_CONFIG_EMBED
+ help
+ Extra boot config allows system admin to pass a config file as
+ complemental extension of kernel cmdline when booting.
+ The boot config file must be attached at the end of initramfs
+ with checksum, size and magic word.
+ See <file:Documentation/admin-guide/bootconfig.rst> for details.
+
+ If unsure, say Y.
+
+config BOOT_CONFIG_EMBED
+ bool "Embed bootconfig file in the kernel"
+ depends on BOOT_CONFIG
+ help
+ Embed a bootconfig file given by BOOT_CONFIG_EMBED_FILE in the
+ kernel. Usually, the bootconfig file is loaded with the initrd
+ image. But if the system doesn't support initrd, this option will
+ help you by embedding a bootconfig file while building the kernel.
+
+ If unsure, say N.
+
+config BOOT_CONFIG_EMBED_FILE
+ string "Embedded bootconfig file path"
+ depends on BOOT_CONFIG_EMBED
+ help
+ Specify a bootconfig file which will be embedded to the kernel.
+ This bootconfig will be used if there is no initrd or no other
+ bootconfig in the initrd.
+
+config INITRAMFS_PRESERVE_MTIME
+ bool "Preserve cpio archive mtimes in initramfs"
+ default y
+ help
+ Each entry in an initramfs cpio archive carries an mtime value. When
+ enabled, extracted cpio items take this mtime, with directory mtime
+ setting deferred until after creation of any child entries.
+
+ If unsure, say Y.
+
+choice
+ prompt "Compiler optimization level"
+ default CC_OPTIMIZE_FOR_PERFORMANCE
+
+config CC_OPTIMIZE_FOR_PERFORMANCE
+ bool "Optimize for performance (-O2)"
+ help
+ This is the default optimization level for the kernel, building
+ with the "-O2" compiler flag for best performance and most
+ helpful compile-time warnings.
+
+config CC_OPTIMIZE_FOR_SIZE
+ bool "Optimize for size (-Os)"
+ help
+ Choosing this option will pass "-Os" to your compiler resulting
+ in a smaller kernel.
+
+endchoice
+
+config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+ bool
+ help
+ This requires that the arch annotates or otherwise protects
+ its external entry points from being discarded. Linker scripts
+ must also merge .text.*, .data.*, and .bss.* correctly into
+ output sections. Care must be taken not to pull in unrelated
+ sections (e.g., '.text.init'). Typically '.' in section names
+ is used to distinguish them from label names / C identifiers.
+
+config LD_DEAD_CODE_DATA_ELIMINATION
+ bool "Dead code and data elimination (EXPERIMENTAL)"
+ depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+ depends on EXPERT
+ depends on $(cc-option,-ffunction-sections -fdata-sections)
+ depends on $(ld-option,--gc-sections)
+ help
+ Enable this if you want to do dead code and data elimination with
+ the linker by compiling with -ffunction-sections -fdata-sections,
+ and linking with --gc-sections.
+
+ This can reduce on disk and in-memory size of the kernel
+ code and static data, particularly for small configs and
+ on small systems. This has the possibility of introducing
+ silently broken kernel if the required annotations are not
+ present. This option is not well tested yet, so use at your
+ own risk.
+
+config LD_ORPHAN_WARN
+ def_bool y
+ depends on ARCH_WANT_LD_ORPHAN_WARN
+ depends on $(ld-option,--orphan-handling=warn)
+
+config SYSCTL
+ bool
+
+config HAVE_UID16
+ bool
+
+config SYSCTL_EXCEPTION_TRACE
+ bool
+ help
+ Enable support for /proc/sys/debug/exception-trace.
+
+config SYSCTL_ARCH_UNALIGN_NO_WARN
+ bool
+ help
+ Enable support for /proc/sys/kernel/ignore-unaligned-usertrap
+ Allows arch to define/use @no_unaligned_warning to possibly warn
+ about unaligned access emulation going on under the hood.
+
+config SYSCTL_ARCH_UNALIGN_ALLOW
+ bool
+ help
+ Enable support for /proc/sys/kernel/unaligned-trap
+ Allows arches to define/use @unaligned_enabled to runtime toggle
+ the unaligned access emulation.
+ see arch/parisc/kernel/unaligned.c for reference
+
+config HAVE_PCSPKR_PLATFORM
+ bool
+
+# interpreter that classic socket filters depend on
+config BPF
+ bool
+ select CRYPTO_LIB_SHA1
+
+menuconfig EXPERT
+ bool "Configure standard kernel features (expert users)"
+ # Unhide debug options, to make the on-by-default options visible
+ select DEBUG_KERNEL
+ help
+ This option allows certain base kernel options and settings
+ to be disabled or tweaked. This is for specialized
+ environments which can tolerate a "non-standard" kernel.
+ Only use this if you really know what you are doing.
+
+config UID16
+ bool "Enable 16-bit UID system calls" if EXPERT
+ depends on HAVE_UID16 && MULTIUSER
+ default y
+ help
+ This enables the legacy 16-bit UID syscall wrappers.
+
+config MULTIUSER
+ bool "Multiple users, groups and capabilities support" if EXPERT
+ default y
+ help
+ This option enables support for non-root users, groups and
+ capabilities.
+
+ If you say N here, all processes will run with UID 0, GID 0, and all
+ possible capabilities. Saying N here also compiles out support for
+ system calls related to UIDs, GIDs, and capabilities, such as setuid,
+ setgid, and capset.
+
+ If unsure, say Y here.
+
+config SGETMASK_SYSCALL
+ bool "sgetmask/ssetmask syscalls support" if EXPERT
+ def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
+ help
+ sys_sgetmask and sys_ssetmask are obsolete system calls
+ no longer supported in libc but still enabled by default in some
+ architectures.
+
+ If unsure, leave the default option here.
+
+config SYSFS_SYSCALL
+ bool "Sysfs syscall support" if EXPERT
+ default y
+ help
+ sys_sysfs is an obsolete system call no longer supported in libc.
+ Note that disabling this option is more secure but might break
+ compatibility with some systems.
+
+ If unsure say Y here.
+
+config FHANDLE
+ bool "open by fhandle syscalls" if EXPERT
+ select EXPORTFS
+ default y
+ help
+ If you say Y here, a user level program will be able to map
+ file names to handle and then later use the handle for
+ different file system operations. This is useful in implementing
+ userspace file servers, which now track files using handles instead
+ of names. The handle would remain the same even if file names
+ get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
+ syscalls.
+
+config POSIX_TIMERS
+ bool "Posix Clocks & timers" if EXPERT
+ default y
+ help
+ This includes native support for POSIX timers to the kernel.
+ Some embedded systems have no use for them and therefore they
+ can be configured out to reduce the size of the kernel image.
+
+ When this option is disabled, the following syscalls won't be
+ available: timer_create, timer_gettime: timer_getoverrun,
+ timer_settime, timer_delete, clock_adjtime, getitimer,
+ setitimer, alarm. Furthermore, the clock_settime, clock_gettime,
+ clock_getres and clock_nanosleep syscalls will be limited to
+ CLOCK_REALTIME, CLOCK_MONOTONIC and CLOCK_BOOTTIME only.
+
+ If unsure say y.
+
+config PRINTK
+ default y
+ bool "Enable support for printk" if EXPERT
+ select IRQ_WORK
+ help
+ This option enables normal printk support. Removing it
+ eliminates most of the message strings from the kernel image
+ and makes the kernel more or less silent. As this makes it
+ very difficult to diagnose system problems, saying N here is
+ strongly discouraged.
+
+config BUG
+ bool "BUG() support" if EXPERT
+ default y
+ help
+ Disabling this option eliminates support for BUG and WARN, reducing
+ the size of your kernel image and potentially quietly ignoring
+ numerous fatal conditions. You should only consider disabling this
+ option for embedded systems with no facilities for reporting errors.
+ Just say Y.
+
+config ELF_CORE
+ depends on COREDUMP
+ default y
+ bool "Enable ELF core dumps" if EXPERT
+ help
+ Enable support for generating core dumps. Disabling saves about 4k.
+
+
+config PCSPKR_PLATFORM
+ bool "Enable PC-Speaker support" if EXPERT
+ depends on HAVE_PCSPKR_PLATFORM
+ select I8253_LOCK
+ default y
+ help
+ This option allows to disable the internal PC-Speaker
+ support, saving some memory.
+
+config BASE_FULL
+ default y
+ bool "Enable full-sized data structures for core" if EXPERT
+ help
+ Disabling this option reduces the size of miscellaneous core
+ kernel data structures. This saves memory on small machines,
+ but may reduce performance.
+
+config FUTEX
+ bool "Enable futex support" if EXPERT
+ depends on !(SPARC32 && SMP)
+ default y
+ imply RT_MUTEXES
+ help
+ Disabling this option will cause the kernel to be built without
+ support for "fast userspace mutexes". The resulting kernel may not
+ run glibc-based applications correctly.
+
+config FUTEX_PI
+ bool
+ depends on FUTEX && RT_MUTEXES
+ default y
+
+config EPOLL
+ bool "Enable eventpoll support" if EXPERT
+ default y
+ help
+ Disabling this option will cause the kernel to be built without
+ support for epoll family of system calls.
+
+config SIGNALFD
+ bool "Enable signalfd() system call" if EXPERT
+ default y
+ help
+ Enable the signalfd() system call that allows to receive signals
+ on a file descriptor.
+
+ If unsure, say Y.
+
+config TIMERFD
+ bool "Enable timerfd() system call" if EXPERT
+ default y
+ help
+ Enable the timerfd() system call that allows to receive timer
+ events on a file descriptor.
+
+ If unsure, say Y.
+
+config EVENTFD
+ bool "Enable eventfd() system call" if EXPERT
+ default y
+ help
+ Enable the eventfd() system call that allows to receive both
+ kernel notification (ie. KAIO) or userspace notifications.
+
+ If unsure, say Y.
+
+config SHMEM
+ bool "Use full shmem filesystem" if EXPERT
+ default y
+ depends on MMU
+ help
+ The shmem is an internal filesystem used to manage shared memory.
+ It is backed by swap and manages resource limits. It is also exported
+ to userspace as tmpfs if TMPFS is enabled. Disabling this
+ option replaces shmem and tmpfs with the much simpler ramfs code,
+ which may be appropriate on small systems without swap.
+
+config AIO
+ bool "Enable AIO support" if EXPERT
+ default y
+ help
+ This option enables POSIX asynchronous I/O which may by used
+ by some high performance threaded applications. Disabling
+ this option saves about 7k.
+
+config IO_URING
+ bool "Enable IO uring support" if EXPERT
+ select IO_WQ
+ default y
+ help
+ This option enables support for the io_uring interface, enabling
+ applications to submit and complete IO through submission and
+ completion rings that are shared between the kernel and application.
+
+config ADVISE_SYSCALLS
+ bool "Enable madvise/fadvise syscalls" if EXPERT
+ default y
+ help
+ This option enables the madvise and fadvise syscalls, used by
+ applications to advise the kernel about their future memory or file
+ usage, improving performance. If building an embedded system where no
+ applications use these syscalls, you can disable this option to save
+ space.
+
+config MEMBARRIER
+ bool "Enable membarrier() system call" if EXPERT
+ default y
+ help
+ Enable the membarrier() system call that allows issuing memory
+ barriers across all running threads, which can be used to distribute
+ the cost of user-space memory barriers asymmetrically by transforming
+ pairs of memory barriers into pairs consisting of membarrier() and a
+ compiler barrier.
+
+ If unsure, say Y.
+
+config KALLSYMS
+ bool "Load all symbols for debugging/ksymoops" if EXPERT
+ default y
+ help
+ Say Y here to let the kernel print out symbolic crash information and
+ symbolic stack backtraces. This increases the size of the kernel
+ somewhat, as all symbols have to be loaded into the kernel image.
+
+config KALLSYMS_ALL
+ bool "Include all symbols in kallsyms"
+ depends on DEBUG_KERNEL && KALLSYMS
+ help
+ Normally kallsyms only contains the symbols of functions for nicer
+ OOPS messages and backtraces (i.e., symbols from the text and inittext
+ sections). This is sufficient for most cases. And only if you want to
+ enable kernel live patching, or other less common use cases (e.g.,
+ when a debugger is used) all symbols are required (i.e., names of
+ variables from the data sections, etc).
+
+ This option makes sure that all symbols are loaded into the kernel
+ image (i.e., symbols from all sections) in cost of increased kernel
+ size (depending on the kernel configuration, it may be 300KiB or
+ something like this).
+
+ Say N unless you really need all symbols, or kernel live patching.
+
+config KALLSYMS_ABSOLUTE_PERCPU
+ bool
+ depends on KALLSYMS
+ default X86_64 && SMP
+
+config KALLSYMS_BASE_RELATIVE
+ bool
+ depends on KALLSYMS
+ default !IA64
+ help
+ Instead of emitting them as absolute values in the native word size,
+ emit the symbol references in the kallsyms table as 32-bit entries,
+ each containing a relative value in the range [base, base + U32_MAX]
+ or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either
+ an absolute value in the range [0, S32_MAX] or a relative value in the
+ range [base, base + S32_MAX], where base is the lowest relative symbol
+ address encountered in the image.
+
+ On 64-bit builds, this reduces the size of the address table by 50%,
+ but more importantly, it results in entries whose values are build
+ time constants, and no relocation pass is required at runtime to fix
+ up the entries based on the runtime load address of the kernel.
+
+# end of the "standard kernel features (expert users)" menu
+
+# syscall, maps, verifier
+
+config ARCH_HAS_MEMBARRIER_CALLBACKS
+ bool
+
+config ARCH_HAS_MEMBARRIER_SYNC_CORE
+ bool
+
+config KCMP
+ bool "Enable kcmp() system call" if EXPERT
+ help
+ Enable the kernel resource comparison system call. It provides
+ user-space with the ability to compare two processes to see if they
+ share a common resource, such as a file descriptor or even virtual
+ memory space.
+
+ If unsure, say N.
+
+config RSEQ
+ bool "Enable rseq() system call" if EXPERT
+ default y
+ depends on HAVE_RSEQ
+ select MEMBARRIER
+ help
+ Enable the restartable sequences system call. It provides a
+ user-space cache for the current CPU number value, which
+ speeds up getting the current CPU number from user-space,
+ as well as an ABI to speed up user-space operations on
+ per-CPU data.
+
+ If unsure, say Y.
+
+config DEBUG_RSEQ
+ default n
+ bool "Enabled debugging of rseq() system call" if EXPERT
+ depends on RSEQ && DEBUG_KERNEL
+ help
+ Enable extra debugging checks for the rseq system call.
+
+ If unsure, say N.
+
+config EMBEDDED
+ bool "Embedded system"
+ select EXPERT
+ help
+ This option should be enabled if compiling the kernel for
+ an embedded system so certain expert options are available
+ for configuration.
+
+config HAVE_PERF_EVENTS
+ bool
+ help
+ See tools/perf/design.txt for details.
+
+config GUEST_PERF_EVENTS
+ bool
+ depends on HAVE_PERF_EVENTS
+
+config PERF_USE_VMALLOC
+ bool
+ help
+ See tools/perf/design.txt for details
+
+config PC104
+ bool "PC/104 support" if EXPERT
+ help
+ Expose PC/104 form factor device drivers and options available for
+ selection and configuration. Enable this option if your target
+ machine has a PC/104 bus.
+
+menu "Kernel Performance Events And Counters"
+
+config PERF_EVENTS
+ bool "Kernel performance events and counters"
+ default y if PROFILING
+ depends on HAVE_PERF_EVENTS
+ select IRQ_WORK
+ select SRCU
+ help
+ Enable kernel support for various performance events provided
+ by software and hardware.
+
+ Software events are supported either built-in or via the
+ use of generic tracepoints.
+
+ Most modern CPUs support performance events via performance
+ counter registers. These registers count the number of certain
+ types of hw events: such as instructions executed, cachemisses
+ suffered, or branches mis-predicted - without slowing down the
+ kernel or applications. These registers can also trigger interrupts
+ when a threshold number of events have passed - and can thus be
+ used to profile the code that runs on that CPU.
+
+ The Linux Performance Event subsystem provides an abstraction of
+ these software and hardware event capabilities, available via a
+ system call and used by the "perf" utility in tools/perf/. It
+ provides per task and per CPU counters, and it provides event
+ capabilities on top of those.
+
+ Say Y if unsure.
+
+config DEBUG_PERF_USE_VMALLOC
+ default n
+ bool "Debug: use vmalloc to back perf mmap() buffers"
+ depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
+ select PERF_USE_VMALLOC
+ help
+ Use vmalloc memory to back perf mmap() buffers.
+
+ Mostly useful for debugging the vmalloc code on platforms
+ that don't require it.
+
+ Say N if unsure.
+
+endmenu
+
+config SYSTEM_DATA_VERIFICATION
+ def_bool n
+ select SYSTEM_TRUSTED_KEYRING
+ select KEYS
+ select CRYPTO
+ select CRYPTO_RSA
+ select ASYMMETRIC_KEY_TYPE
+ select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+ select ASN1
+ select OID_REGISTRY
+ select X509_CERTIFICATE_PARSER
+ select PKCS7_MESSAGE_PARSER
+ help
+ Provide PKCS#7 message verification using the contents of the system
+ trusted keyring to provide public keys. This then can be used for
+ module verification, kexec image verification and firmware blob
+ verification.
+
+config PROFILING
+ bool "Profiling support"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers.
+
+config RUST
+ bool "Rust support"
+ depends on HAVE_RUST
+ depends on RUST_IS_AVAILABLE
+ depends on !MODVERSIONS
+ depends on !GCC_PLUGINS
+ depends on !RANDSTRUCT
+ depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
+ select CONSTRUCTORS
+ help
+ Enables Rust support in the kernel.
+
+ This allows other Rust-related options, like drivers written in Rust,
+ to be selected.
+
+ It is also required to be able to load external kernel modules
+ written in Rust.
+
+ See Documentation/rust/ for more information.
+
+ If unsure, say N.
+
+config RUSTC_VERSION_TEXT
+ string
+ depends on RUST
+ default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n)
+
+config BINDGEN_VERSION_TEXT
+ string
+ depends on RUST
+ default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n)
+
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
+config TRACEPOINTS
+ bool
+
+endmenu # General setup
+
+source "arch/Kconfig"
+
+config RT_MUTEXES
+ bool
+ default y if PREEMPT_RT
+
+config BASE_SMALL
+ int
+ default 0 if BASE_FULL
+ default 1 if !BASE_FULL
+
+config MODULE_SIG_FORMAT
+ def_bool n
+ select SYSTEM_DATA_VERIFICATION
+
+source "kernel/module/Kconfig"
+
+config INIT_ALL_POSSIBLE
+ bool
+ help
+ Back when each arch used to define their own cpu_online_mask and
+ cpu_possible_mask, some of them chose to initialize cpu_possible_mask
+ with all 1s, and others with all 0s. When they were centralised,
+ it was better to provide this option than to break all the archs
+ and have several arch maintainers pursuing me down dark alleys.
+
+source "block/Kconfig"
+
+config PREEMPT_NOTIFIERS
+ bool
+
+config PADATA
+ depends on SMP
+ bool
+
+config ASN1
+ tristate
+ help
+ Build a simple ASN.1 grammar compiler that produces a bytecode output
+ that can be interpreted by the ASN.1 stream decoder and used to
+ inform it as to what tags are to be expected in a stream and what
+ functions to call on what tags.
+
+source "kernel/Kconfig.locks"
+
+config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ bool
+
+config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
+ bool
+
+# It may be useful for an architecture to override the definitions of the
+# SYSCALL_DEFINE() and __SYSCALL_DEFINEx() macros in <linux/syscalls.h>
+# and the COMPAT_ variants in <linux/compat.h>, in particular to use a
+# different calling convention for syscalls. They can also override the
+# macros for not-implemented syscalls in kernel/sys_ni.c and
+# kernel/time/posix-stubs.c. All these overrides need to be available in
+# <asm/syscall_wrapper.h>.
+config ARCH_HAS_SYSCALL_WRAPPER
+ def_bool n
diff --git a/init/Makefile b/init/Makefile
new file mode 100644
index 000000000..8316c23be
--- /dev/null
+++ b/init/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+ccflags-y := -fno-function-sections -fno-data-sections
+
+obj-y := main.o version.o mounts.o
+ifneq ($(CONFIG_BLK_DEV_INITRD),y)
+obj-y += noinitramfs.o
+else
+obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o
+endif
+obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o
+
+obj-y += init_task.o
+
+mounts-y := do_mounts.o
+mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o
+mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o
+
+#
+# UTS_VERSION
+#
+
+smp-flag-$(CONFIG_SMP) := SMP
+preempt-flag-$(CONFIG_PREEMPT_BUILD) := PREEMPT
+preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) := PREEMPT_DYNAMIC
+preempt-flag-$(CONFIG_PREEMPT_RT) := PREEMPT_RT
+
+build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto))
+build-timestamp = $(or $(KBUILD_BUILD_TIMESTAMP), $(build-timestamp-auto))
+
+# Maximum length of UTS_VERSION is 64 chars
+filechk_uts_version = \
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+ echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
+
+#
+# Build version.c with temporary UTS_VERSION
+#
+
+$(obj)/utsversion-tmp.h: FORCE
+ $(call filechk,uts_version)
+
+clean-files += utsversion-tmp.h
+
+$(obj)/version.o: $(obj)/utsversion-tmp.h
+CFLAGS_version.o := -include $(obj)/utsversion-tmp.h
+
+#
+# Build version-timestamp.c with final UTS_VERSION
+#
+
+include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/$(src)/build-version)
+include/generated/utsversion.h: build-timestamp-auto = $(shell LC_ALL=C date)
+include/generated/utsversion.h: FORCE
+ $(call filechk,uts_version)
+
+$(obj)/version-timestamp.o: include/generated/utsversion.h
+CFLAGS_version-timestamp.o := -include include/generated/utsversion.h
diff --git a/init/build-version b/init/build-version
new file mode 100755
index 000000000..537d45815
--- /dev/null
+++ b/init/build-version
@@ -0,0 +1,10 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+prev_ver=$(cat .version 2>/dev/null) &&
+ver=$(expr ${prev_ver} + 1 2>/dev/null) ||
+ver=1
+
+echo ${ver} > .version
+
+echo ${ver}
diff --git a/init/calibrate.c b/init/calibrate.c
new file mode 100644
index 000000000..f3831272f
--- /dev/null
+++ b/init/calibrate.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/* calibrate.c: default delay calibration
+ *
+ * Excised from init/main.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+
+unsigned long lpj_fine;
+unsigned long preset_lpj;
+static int __init lpj_setup(char *str)
+{
+ preset_lpj = simple_strtoul(str,NULL,0);
+ return 1;
+}
+
+__setup("lpj=", lpj_setup);
+
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+
+/* This routine uses the read_current_timer() routine and gets the
+ * loops per jiffy directly, instead of guessing it using delay().
+ * Also, this code tries to handle non-maskable asynchronous events
+ * (like SMIs)
+ */
+#define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100))
+#define MAX_DIRECT_CALIBRATION_RETRIES 5
+
+static unsigned long calibrate_delay_direct(void)
+{
+ unsigned long pre_start, start, post_start;
+ unsigned long pre_end, end, post_end;
+ unsigned long start_jiffies;
+ unsigned long timer_rate_min, timer_rate_max;
+ unsigned long good_timer_sum = 0;
+ unsigned long good_timer_count = 0;
+ unsigned long measured_times[MAX_DIRECT_CALIBRATION_RETRIES];
+ int max = -1; /* index of measured_times with max/min values or not set */
+ int min = -1;
+ int i;
+
+ if (read_current_timer(&pre_start) < 0 )
+ return 0;
+
+ /*
+ * A simple loop like
+ * while ( jiffies < start_jiffies+1)
+ * start = read_current_timer();
+ * will not do. As we don't really know whether jiffy switch
+ * happened first or timer_value was read first. And some asynchronous
+ * event can happen between these two events introducing errors in lpj.
+ *
+ * So, we do
+ * 1. pre_start <- When we are sure that jiffy switch hasn't happened
+ * 2. check jiffy switch
+ * 3. start <- timer value before or after jiffy switch
+ * 4. post_start <- When we are sure that jiffy switch has happened
+ *
+ * Note, we don't know anything about order of 2 and 3.
+ * Now, by looking at post_start and pre_start difference, we can
+ * check whether any asynchronous event happened or not
+ */
+
+ for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
+ pre_start = 0;
+ read_current_timer(&start);
+ start_jiffies = jiffies;
+ while (time_before_eq(jiffies, start_jiffies + 1)) {
+ pre_start = start;
+ read_current_timer(&start);
+ }
+ read_current_timer(&post_start);
+
+ pre_end = 0;
+ end = post_start;
+ while (time_before_eq(jiffies, start_jiffies + 1 +
+ DELAY_CALIBRATION_TICKS)) {
+ pre_end = end;
+ read_current_timer(&end);
+ }
+ read_current_timer(&post_end);
+
+ timer_rate_max = (post_end - pre_start) /
+ DELAY_CALIBRATION_TICKS;
+ timer_rate_min = (pre_end - post_start) /
+ DELAY_CALIBRATION_TICKS;
+
+ /*
+ * If the upper limit and lower limit of the timer_rate is
+ * >= 12.5% apart, redo calibration.
+ */
+ if (start >= post_end)
+ printk(KERN_NOTICE "calibrate_delay_direct() ignoring "
+ "timer_rate as we had a TSC wrap around"
+ " start=%lu >=post_end=%lu\n",
+ start, post_end);
+ if (start < post_end && pre_start != 0 && pre_end != 0 &&
+ (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
+ good_timer_count++;
+ good_timer_sum += timer_rate_max;
+ measured_times[i] = timer_rate_max;
+ if (max < 0 || timer_rate_max > measured_times[max])
+ max = i;
+ if (min < 0 || timer_rate_max < measured_times[min])
+ min = i;
+ } else
+ measured_times[i] = 0;
+
+ }
+
+ /*
+ * Find the maximum & minimum - if they differ too much throw out the
+ * one with the largest difference from the mean and try again...
+ */
+ while (good_timer_count > 1) {
+ unsigned long estimate;
+ unsigned long maxdiff;
+
+ /* compute the estimate */
+ estimate = (good_timer_sum/good_timer_count);
+ maxdiff = estimate >> 3;
+
+ /* if range is within 12% let's take it */
+ if ((measured_times[max] - measured_times[min]) < maxdiff)
+ return estimate;
+
+ /* ok - drop the worse value and try again... */
+ good_timer_sum = 0;
+ good_timer_count = 0;
+ if ((measured_times[max] - estimate) <
+ (estimate - measured_times[min])) {
+ printk(KERN_NOTICE "calibrate_delay_direct() dropping "
+ "min bogoMips estimate %d = %lu\n",
+ min, measured_times[min]);
+ measured_times[min] = 0;
+ min = max;
+ } else {
+ printk(KERN_NOTICE "calibrate_delay_direct() dropping "
+ "max bogoMips estimate %d = %lu\n",
+ max, measured_times[max]);
+ measured_times[max] = 0;
+ max = min;
+ }
+
+ for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
+ if (measured_times[i] == 0)
+ continue;
+ good_timer_count++;
+ good_timer_sum += measured_times[i];
+ if (measured_times[i] < measured_times[min])
+ min = i;
+ if (measured_times[i] > measured_times[max])
+ max = i;
+ }
+
+ }
+
+ printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good "
+ "estimate for loops_per_jiffy.\nProbably due to long platform "
+ "interrupts. Consider using \"lpj=\" boot option.\n");
+ return 0;
+}
+#else
+static unsigned long calibrate_delay_direct(void)
+{
+ return 0;
+}
+#endif
+
+/*
+ * This is the number of bits of precision for the loops_per_jiffy. Each
+ * time we refine our estimate after the first takes 1.5/HZ seconds, so try
+ * to start with a good estimate.
+ * For the boot cpu we can skip the delay calibration and assign it a value
+ * calculated based on the timer frequency.
+ * For the rest of the CPUs we cannot assume that the timer frequency is same as
+ * the cpu frequency, hence do the calibration for those.
+ */
+#define LPS_PREC 8
+
+static unsigned long calibrate_delay_converge(void)
+{
+ /* First stage - slowly accelerate to find initial bounds */
+ unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
+ int trials = 0, band = 0, trial_in_band = 0;
+
+ lpj = (1<<12);
+
+ /* wait for "start of" clock tick */
+ ticks = jiffies;
+ while (ticks == jiffies)
+ ; /* nothing */
+ /* Go .. */
+ ticks = jiffies;
+ do {
+ if (++trial_in_band == (1<<band)) {
+ ++band;
+ trial_in_band = 0;
+ }
+ __delay(lpj * band);
+ trials += band;
+ } while (ticks == jiffies);
+ /*
+ * We overshot, so retreat to a clear underestimate. Then estimate
+ * the largest likely undershoot. This defines our chop bounds.
+ */
+ trials -= band;
+ loopadd_base = lpj * band;
+ lpj_base = lpj * trials;
+
+recalibrate:
+ lpj = lpj_base;
+ loopadd = loopadd_base;
+
+ /*
+ * Do a binary approximation to get lpj set to
+ * equal one clock (up to LPS_PREC bits)
+ */
+ chop_limit = lpj >> LPS_PREC;
+ while (loopadd > chop_limit) {
+ lpj += loopadd;
+ ticks = jiffies;
+ while (ticks == jiffies)
+ ; /* nothing */
+ ticks = jiffies;
+ __delay(lpj);
+ if (jiffies != ticks) /* longer than 1 tick */
+ lpj -= loopadd;
+ loopadd >>= 1;
+ }
+ /*
+ * If we incremented every single time possible, presume we've
+ * massively underestimated initially, and retry with a higher
+ * start, and larger range. (Only seen on x86_64, due to SMIs)
+ */
+ if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
+ lpj_base = lpj;
+ loopadd_base <<= 2;
+ goto recalibrate;
+ }
+
+ return lpj;
+}
+
+static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
+
+/*
+ * Check if cpu calibration delay is already known. For example,
+ * some processors with multi-core sockets may have all cores
+ * with the same calibration delay.
+ *
+ * Architectures should override this function if a faster calibration
+ * method is available.
+ */
+unsigned long __attribute__((weak)) calibrate_delay_is_known(void)
+{
+ return 0;
+}
+
+/*
+ * Indicate the cpu delay calibration is done. This can be used by
+ * architectures to stop accepting delay timer registrations after this point.
+ */
+
+void __attribute__((weak)) calibration_delay_done(void)
+{
+}
+
+void calibrate_delay(void)
+{
+ unsigned long lpj;
+ static bool printed;
+ int this_cpu = smp_processor_id();
+
+ if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
+ lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
+ if (!printed)
+ pr_info("Calibrating delay loop (skipped) "
+ "already calibrated this CPU");
+ } else if (preset_lpj) {
+ lpj = preset_lpj;
+ if (!printed)
+ pr_info("Calibrating delay loop (skipped) "
+ "preset value.. ");
+ } else if ((!printed) && lpj_fine) {
+ lpj = lpj_fine;
+ pr_info("Calibrating delay loop (skipped), "
+ "value calculated using timer frequency.. ");
+ } else if ((lpj = calibrate_delay_is_known())) {
+ ;
+ } else if ((lpj = calibrate_delay_direct()) != 0) {
+ if (!printed)
+ pr_info("Calibrating delay using timer "
+ "specific routine.. ");
+ } else {
+ if (!printed)
+ pr_info("Calibrating delay loop... ");
+ lpj = calibrate_delay_converge();
+ }
+ per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
+ if (!printed)
+ pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
+ lpj/(500000/HZ),
+ (lpj/(5000/HZ)) % 100, lpj);
+
+ loops_per_jiffy = lpj;
+ printed = true;
+
+ calibration_delay_done();
+}
diff --git a/init/do_mounts.c b/init/do_mounts.c
new file mode 100644
index 000000000..2d705b9c8
--- /dev/null
+++ b/init/do_mounts.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/fd.h>
+#include <linux/tty.h>
+#include <linux/suspend.h>
+#include <linux/root_dev.h>
+#include <linux/security.h>
+#include <linux/delay.h>
+#include <linux/mount.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/initrd.h>
+#include <linux/async.h>
+#include <linux/fs_struct.h>
+#include <linux/slab.h>
+#include <linux/ramfs.h>
+#include <linux/shmem_fs.h>
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_mount.h>
+#include <linux/raid/detect.h>
+#include <uapi/linux/mount.h>
+
+#include "do_mounts.h"
+
+int root_mountflags = MS_RDONLY | MS_SILENT;
+static char * __initdata root_device_name;
+static char __initdata saved_root_name[64];
+static int root_wait;
+
+dev_t ROOT_DEV;
+
+static int __init load_ramdisk(char *str)
+{
+ pr_warn("ignoring the deprecated load_ramdisk= option\n");
+ return 1;
+}
+__setup("load_ramdisk=", load_ramdisk);
+
+static int __init readonly(char *str)
+{
+ if (*str)
+ return 0;
+ root_mountflags |= MS_RDONLY;
+ return 1;
+}
+
+static int __init readwrite(char *str)
+{
+ if (*str)
+ return 0;
+ root_mountflags &= ~MS_RDONLY;
+ return 1;
+}
+
+__setup("ro", readonly);
+__setup("rw", readwrite);
+
+#ifdef CONFIG_BLOCK
+struct uuidcmp {
+ const char *uuid;
+ int len;
+};
+
+/**
+ * match_dev_by_uuid - callback for finding a partition using its uuid
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the desired struct uuidcmp to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_uuid(struct device *dev, const void *data)
+{
+ struct block_device *bdev = dev_to_bdev(dev);
+ const struct uuidcmp *cmp = data;
+
+ if (!bdev->bd_meta_info ||
+ strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len))
+ return 0;
+ return 1;
+}
+
+/**
+ * devt_from_partuuid - looks up the dev_t of a partition by its UUID
+ * @uuid_str: char array containing ascii UUID
+ *
+ * The function will return the first partition which contains a matching
+ * UUID value in its partition_meta_info struct. This does not search
+ * by filesystem UUIDs.
+ *
+ * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be
+ * extracted and used as an offset from the partition identified by the UUID.
+ *
+ * Returns the matching dev_t on success or 0 on failure.
+ */
+static dev_t devt_from_partuuid(const char *uuid_str)
+{
+ struct uuidcmp cmp;
+ struct device *dev = NULL;
+ dev_t devt = 0;
+ int offset = 0;
+ char *slash;
+
+ cmp.uuid = uuid_str;
+
+ slash = strchr(uuid_str, '/');
+ /* Check for optional partition number offset attributes. */
+ if (slash) {
+ char c = 0;
+
+ /* Explicitly fail on poor PARTUUID syntax. */
+ if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1)
+ goto clear_root_wait;
+ cmp.len = slash - uuid_str;
+ } else {
+ cmp.len = strlen(uuid_str);
+ }
+
+ if (!cmp.len)
+ goto clear_root_wait;
+
+ dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
+ if (!dev)
+ return 0;
+
+ if (offset) {
+ /*
+ * Attempt to find the requested partition by adding an offset
+ * to the partition number found by UUID.
+ */
+ devt = part_devt(dev_to_disk(dev),
+ dev_to_bdev(dev)->bd_partno + offset);
+ } else {
+ devt = dev->devt;
+ }
+
+ put_device(dev);
+ return devt;
+
+clear_root_wait:
+ pr_err("VFS: PARTUUID= is invalid.\n"
+ "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+ if (root_wait)
+ pr_err("Disabling rootwait; root= is invalid.\n");
+ root_wait = 0;
+ return 0;
+}
+
+/**
+ * match_dev_by_label - callback for finding a partition using its label
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the label to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_label(struct device *dev, const void *data)
+{
+ struct block_device *bdev = dev_to_bdev(dev);
+ const char *label = data;
+
+ if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname))
+ return 0;
+ return 1;
+}
+
+static dev_t devt_from_partlabel(const char *label)
+{
+ struct device *dev;
+ dev_t devt = 0;
+
+ dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
+ if (dev) {
+ devt = dev->devt;
+ put_device(dev);
+ }
+
+ return devt;
+}
+
+static dev_t devt_from_devname(const char *name)
+{
+ dev_t devt = 0;
+ int part;
+ char s[32];
+ char *p;
+
+ if (strlen(name) > 31)
+ return 0;
+ strcpy(s, name);
+ for (p = s; *p; p++) {
+ if (*p == '/')
+ *p = '!';
+ }
+
+ devt = blk_lookup_devt(s, 0);
+ if (devt)
+ return devt;
+
+ /*
+ * Try non-existent, but valid partition, which may only exist after
+ * opening the device, like partitioned md devices.
+ */
+ while (p > s && isdigit(p[-1]))
+ p--;
+ if (p == s || !*p || *p == '0')
+ return 0;
+
+ /* try disk name without <part number> */
+ part = simple_strtoul(p, NULL, 10);
+ *p = '\0';
+ devt = blk_lookup_devt(s, part);
+ if (devt)
+ return devt;
+
+ /* try disk name without p<part number> */
+ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
+ return 0;
+ p[-1] = '\0';
+ return blk_lookup_devt(s, part);
+}
+#endif /* CONFIG_BLOCK */
+
+static dev_t devt_from_devnum(const char *name)
+{
+ unsigned maj, min, offset;
+ dev_t devt = 0;
+ char *p, dummy;
+
+ if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
+ sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
+ devt = MKDEV(maj, min);
+ if (maj != MAJOR(devt) || min != MINOR(devt))
+ return 0;
+ } else {
+ devt = new_decode_dev(simple_strtoul(name, &p, 16));
+ if (*p)
+ return 0;
+ }
+
+ return devt;
+}
+
+/*
+ * Convert a name into device number. We accept the following variants:
+ *
+ * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
+ * no leading 0x, for example b302.
+ * 2) /dev/nfs represents Root_NFS (0xff)
+ * 3) /dev/<disk_name> represents the device number of disk
+ * 4) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
+ * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ * filled hex representation of the 32-bit "NT disk signature", and PP
+ * is a zero-filled hex representation of the 1-based partition number.
+ * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
+ * 8) <major>:<minor> major and minor number of the device separated by
+ * a colon.
+ * 9) PARTLABEL=<name> with name being the GPT partition label.
+ * MSDOS partitions do not support labels!
+ * 10) /dev/cifs represents Root_CIFS (0xfe)
+ *
+ * If name doesn't have fall into the categories above, we return (0,0).
+ * block_class is used to check if something is a disk name. If the disk
+ * name contains slashes, the device name has them replaced with
+ * bangs.
+ */
+dev_t name_to_dev_t(const char *name)
+{
+ if (strcmp(name, "/dev/nfs") == 0)
+ return Root_NFS;
+ if (strcmp(name, "/dev/cifs") == 0)
+ return Root_CIFS;
+ if (strcmp(name, "/dev/ram") == 0)
+ return Root_RAM0;
+#ifdef CONFIG_BLOCK
+ if (strncmp(name, "PARTUUID=", 9) == 0)
+ return devt_from_partuuid(name + 9);
+ if (strncmp(name, "PARTLABEL=", 10) == 0)
+ return devt_from_partlabel(name + 10);
+ if (strncmp(name, "/dev/", 5) == 0)
+ return devt_from_devname(name + 5);
+#endif
+ return devt_from_devnum(name);
+}
+EXPORT_SYMBOL_GPL(name_to_dev_t);
+
+static int __init root_dev_setup(char *line)
+{
+ strscpy(saved_root_name, line, sizeof(saved_root_name));
+ return 1;
+}
+
+__setup("root=", root_dev_setup);
+
+static int __init rootwait_setup(char *str)
+{
+ if (*str)
+ return 0;
+ root_wait = 1;
+ return 1;
+}
+
+__setup("rootwait", rootwait_setup);
+
+static char * __initdata root_mount_data;
+static int __init root_data_setup(char *str)
+{
+ root_mount_data = str;
+ return 1;
+}
+
+static char * __initdata root_fs_names;
+static int __init fs_names_setup(char *str)
+{
+ root_fs_names = str;
+ return 1;
+}
+
+static unsigned int __initdata root_delay;
+static int __init root_delay_setup(char *str)
+{
+ root_delay = simple_strtoul(str, NULL, 0);
+ return 1;
+}
+
+__setup("rootflags=", root_data_setup);
+__setup("rootfstype=", fs_names_setup);
+__setup("rootdelay=", root_delay_setup);
+
+/* This can return zero length strings. Caller should check */
+static int __init split_fs_names(char *page, size_t size, char *names)
+{
+ int count = 1;
+ char *p = page;
+
+ strscpy(p, root_fs_names, size);
+ while (*p++) {
+ if (p[-1] == ',') {
+ p[-1] = '\0';
+ count++;
+ }
+ }
+
+ return count;
+}
+
+static int __init do_mount_root(const char *name, const char *fs,
+ const int flags, const void *data)
+{
+ struct super_block *s;
+ struct page *p = NULL;
+ char *data_page = NULL;
+ int ret;
+
+ if (data) {
+ /* init_mount() requires a full page as fifth argument */
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ data_page = page_address(p);
+ /* zero-pad. init_mount() will make sure it's terminated */
+ strncpy(data_page, data, PAGE_SIZE);
+ }
+
+ ret = init_mount(name, "/root", fs, flags, data_page);
+ if (ret)
+ goto out;
+
+ init_chdir("/root");
+ s = current->fs->pwd.dentry->d_sb;
+ ROOT_DEV = s->s_dev;
+ printk(KERN_INFO
+ "VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
+ s->s_type->name,
+ sb_rdonly(s) ? " readonly" : "",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
+
+out:
+ if (p)
+ put_page(p);
+ return ret;
+}
+
+void __init mount_block_root(char *name, int flags)
+{
+ struct page *page = alloc_page(GFP_KERNEL);
+ char *fs_names = page_address(page);
+ char *p;
+ char b[BDEVNAME_SIZE];
+ int num_fs, i;
+
+ scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
+ if (root_fs_names)
+ num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
+ else
+ num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
+retry:
+ for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) {
+ int err;
+
+ if (!*p)
+ continue;
+ err = do_mount_root(name, p, flags, root_mount_data);
+ switch (err) {
+ case 0:
+ goto out;
+ case -EACCES:
+ case -EINVAL:
+ continue;
+ }
+ /*
+ * Allow the user to distinguish between failed sys_open
+ * and bad superblock on root device.
+ * and give them a list of the available devices
+ */
+ printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
+ root_device_name, b, err);
+ printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
+
+ printk_all_partitions();
+ panic("VFS: Unable to mount root fs on %s", b);
+ }
+ if (!(flags & SB_RDONLY)) {
+ flags |= SB_RDONLY;
+ goto retry;
+ }
+
+ printk("List of all partitions:\n");
+ printk_all_partitions();
+ printk("No filesystem could mount root, tried: ");
+ for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
+ printk(" %s", p);
+ printk("\n");
+ panic("VFS: Unable to mount root fs on %s", b);
+out:
+ put_page(page);
+}
+
+#ifdef CONFIG_ROOT_NFS
+
+#define NFSROOT_TIMEOUT_MIN 5
+#define NFSROOT_TIMEOUT_MAX 30
+#define NFSROOT_RETRY_MAX 5
+
+static int __init mount_nfs_root(void)
+{
+ char *root_dev, *root_data;
+ unsigned int timeout;
+ int try, err;
+
+ err = nfs_root_data(&root_dev, &root_data);
+ if (err != 0)
+ return 0;
+
+ /*
+ * The server or network may not be ready, so try several
+ * times. Stop after a few tries in case the client wants
+ * to fall back to other boot methods.
+ */
+ timeout = NFSROOT_TIMEOUT_MIN;
+ for (try = 1; ; try++) {
+ err = do_mount_root(root_dev, "nfs",
+ root_mountflags, root_data);
+ if (err == 0)
+ return 1;
+ if (try > NFSROOT_RETRY_MAX)
+ break;
+
+ /* Wait, in case the server refused us immediately */
+ ssleep(timeout);
+ timeout <<= 1;
+ if (timeout > NFSROOT_TIMEOUT_MAX)
+ timeout = NFSROOT_TIMEOUT_MAX;
+ }
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_CIFS_ROOT
+
+extern int cifs_root_data(char **dev, char **opts);
+
+#define CIFSROOT_TIMEOUT_MIN 5
+#define CIFSROOT_TIMEOUT_MAX 30
+#define CIFSROOT_RETRY_MAX 5
+
+static int __init mount_cifs_root(void)
+{
+ char *root_dev, *root_data;
+ unsigned int timeout;
+ int try, err;
+
+ err = cifs_root_data(&root_dev, &root_data);
+ if (err != 0)
+ return 0;
+
+ timeout = CIFSROOT_TIMEOUT_MIN;
+ for (try = 1; ; try++) {
+ err = do_mount_root(root_dev, "cifs", root_mountflags,
+ root_data);
+ if (err == 0)
+ return 1;
+ if (try > CIFSROOT_RETRY_MAX)
+ break;
+
+ ssleep(timeout);
+ timeout <<= 1;
+ if (timeout > CIFSROOT_TIMEOUT_MAX)
+ timeout = CIFSROOT_TIMEOUT_MAX;
+ }
+ return 0;
+}
+#endif
+
+static bool __init fs_is_nodev(char *fstype)
+{
+ struct file_system_type *fs = get_fs_type(fstype);
+ bool ret = false;
+
+ if (fs) {
+ ret = !(fs->fs_flags & FS_REQUIRES_DEV);
+ put_filesystem(fs);
+ }
+
+ return ret;
+}
+
+static int __init mount_nodev_root(void)
+{
+ char *fs_names, *fstype;
+ int err = -EINVAL;
+ int num_fs, i;
+
+ fs_names = (void *)__get_free_page(GFP_KERNEL);
+ if (!fs_names)
+ return -EINVAL;
+ num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
+
+ for (i = 0, fstype = fs_names; i < num_fs;
+ i++, fstype += strlen(fstype) + 1) {
+ if (!*fstype)
+ continue;
+ if (!fs_is_nodev(fstype))
+ continue;
+ err = do_mount_root(root_device_name, fstype, root_mountflags,
+ root_mount_data);
+ if (!err)
+ break;
+ }
+
+ free_page((unsigned long)fs_names);
+ return err;
+}
+
+void __init mount_root(void)
+{
+#ifdef CONFIG_ROOT_NFS
+ if (ROOT_DEV == Root_NFS) {
+ if (!mount_nfs_root())
+ printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n");
+ return;
+ }
+#endif
+#ifdef CONFIG_CIFS_ROOT
+ if (ROOT_DEV == Root_CIFS) {
+ if (!mount_cifs_root())
+ printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n");
+ return;
+ }
+#endif
+ if (ROOT_DEV == 0 && root_device_name && root_fs_names) {
+ if (mount_nodev_root() == 0)
+ return;
+ }
+#ifdef CONFIG_BLOCK
+ {
+ int err = create_dev("/dev/root", ROOT_DEV);
+
+ if (err < 0)
+ pr_emerg("Failed to create /dev/root: %d\n", err);
+ mount_block_root("/dev/root", root_mountflags);
+ }
+#endif
+}
+
+/*
+ * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
+ */
+void __init prepare_namespace(void)
+{
+ if (root_delay) {
+ printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
+ root_delay);
+ ssleep(root_delay);
+ }
+
+ /*
+ * wait for the known devices to complete their probing
+ *
+ * Note: this is a potential source of long boot delays.
+ * For example, it is not atypical to wait 5 seconds here
+ * for the touchpad of a laptop to initialize.
+ */
+ wait_for_device_probe();
+
+ md_run_setup();
+
+ if (saved_root_name[0]) {
+ root_device_name = saved_root_name;
+ if (!strncmp(root_device_name, "mtd", 3) ||
+ !strncmp(root_device_name, "ubi", 3)) {
+ mount_block_root(root_device_name, root_mountflags);
+ goto out;
+ }
+ ROOT_DEV = name_to_dev_t(root_device_name);
+ if (strncmp(root_device_name, "/dev/", 5) == 0)
+ root_device_name += 5;
+ }
+
+ if (initrd_load())
+ goto out;
+
+ /* wait for any asynchronous scanning to complete */
+ if ((ROOT_DEV == 0) && root_wait) {
+ printk(KERN_INFO "Waiting for root device %s...\n",
+ saved_root_name);
+ while (driver_probe_done() != 0 ||
+ (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
+ msleep(5);
+ async_synchronize_full();
+ }
+
+ mount_root();
+out:
+ devtmpfs_mount();
+ init_mount(".", "/", NULL, MS_MOVE, NULL);
+ init_chroot(".");
+}
+
+static bool is_tmpfs;
+static int rootfs_init_fs_context(struct fs_context *fc)
+{
+ if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
+ return shmem_init_fs_context(fc);
+
+ return ramfs_init_fs_context(fc);
+}
+
+struct file_system_type rootfs_fs_type = {
+ .name = "rootfs",
+ .init_fs_context = rootfs_init_fs_context,
+ .kill_sb = kill_litter_super,
+};
+
+void __init init_rootfs(void)
+{
+ if (IS_ENABLED(CONFIG_TMPFS)) {
+ if (!saved_root_name[0] && !root_fs_names)
+ is_tmpfs = true;
+ else if (root_fs_names && !!strstr(root_fs_names, "tmpfs"))
+ is_tmpfs = true;
+ }
+}
diff --git a/init/do_mounts.h b/init/do_mounts.h
new file mode 100644
index 000000000..7a29ac3e4
--- /dev/null
+++ b/init/do_mounts.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/major.h>
+#include <linux/root_dev.h>
+#include <linux/init_syscalls.h>
+
+void mount_block_root(char *name, int flags);
+void mount_root(void);
+extern int root_mountflags;
+
+static inline __init int create_dev(char *name, dev_t dev)
+{
+ init_unlink(name);
+ return init_mknod(name, S_IFBLK | 0600, new_encode_dev(dev));
+}
+
+#ifdef CONFIG_BLK_DEV_RAM
+
+int __init rd_load_disk(int n);
+int __init rd_load_image(char *from);
+
+#else
+
+static inline int rd_load_disk(int n) { return 0; }
+static inline int rd_load_image(char *from) { return 0; }
+
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+bool __init initrd_load(void);
+
+#else
+
+static inline bool initrd_load(void) { return false; }
+
+#endif
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
new file mode 100644
index 000000000..347312413
--- /dev/null
+++ b/init/do_mounts_initrd.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/unistd.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+#include <linux/romfs_fs.h>
+#include <linux/initrd.h>
+#include <linux/sched.h>
+#include <linux/freezer.h>
+#include <linux/kmod.h>
+#include <uapi/linux/mount.h>
+
+#include "do_mounts.h"
+
+unsigned long initrd_start, initrd_end;
+int initrd_below_start_ok;
+static unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
+static int __initdata mount_initrd = 1;
+
+phys_addr_t phys_initrd_start __initdata;
+unsigned long phys_initrd_size __initdata;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kern_do_mounts_initrd_table[] = {
+ {
+ .procname = "real-root-dev",
+ .data = &real_root_dev,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static __init int kernel_do_mounts_initrd_sysctls_init(void)
+{
+ register_sysctl_init("kernel", kern_do_mounts_initrd_table);
+ return 0;
+}
+late_initcall(kernel_do_mounts_initrd_sysctls_init);
+#endif /* CONFIG_SYSCTL */
+
+static int __init no_initrd(char *str)
+{
+ mount_initrd = 0;
+ return 1;
+}
+
+__setup("noinitrd", no_initrd);
+
+static int __init early_initrdmem(char *p)
+{
+ phys_addr_t start;
+ unsigned long size;
+ char *endp;
+
+ start = memparse(p, &endp);
+ if (*endp == ',') {
+ size = memparse(endp + 1, NULL);
+
+ phys_initrd_start = start;
+ phys_initrd_size = size;
+ }
+ return 0;
+}
+early_param("initrdmem", early_initrdmem);
+
+static int __init early_initrd(char *p)
+{
+ return early_initrdmem(p);
+}
+early_param("initrd", early_initrd);
+
+static int __init init_linuxrc(struct subprocess_info *info, struct cred *new)
+{
+ ksys_unshare(CLONE_FS | CLONE_FILES);
+ console_on_rootfs();
+ /* move initrd over / and chdir/chroot in initrd root */
+ init_chdir("/root");
+ init_mount(".", "/", NULL, MS_MOVE, NULL);
+ init_chroot(".");
+ ksys_setsid();
+ return 0;
+}
+
+static void __init handle_initrd(void)
+{
+ struct subprocess_info *info;
+ static char *argv[] = { "linuxrc", NULL, };
+ extern char *envp_init[];
+ int error;
+
+ pr_warn("using deprecated initrd support, will be removed in 2021.\n");
+
+ real_root_dev = new_encode_dev(ROOT_DEV);
+ create_dev("/dev/root.old", Root_RAM0);
+ /* mount initrd on rootfs' /root */
+ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY);
+ init_mkdir("/old", 0700);
+ init_chdir("/old");
+
+ info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
+ GFP_KERNEL, init_linuxrc, NULL, NULL);
+ if (!info)
+ return;
+ call_usermodehelper_exec(info, UMH_WAIT_PROC|UMH_FREEZABLE);
+
+ /* move initrd to rootfs' /old */
+ init_mount("..", ".", NULL, MS_MOVE, NULL);
+ /* switch root and cwd back to / of rootfs */
+ init_chroot("..");
+
+ if (new_decode_dev(real_root_dev) == Root_RAM0) {
+ init_chdir("/old");
+ return;
+ }
+
+ init_chdir("/");
+ ROOT_DEV = new_decode_dev(real_root_dev);
+ mount_root();
+
+ printk(KERN_NOTICE "Trying to move old root to /initrd ... ");
+ error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
+ if (!error)
+ printk("okay\n");
+ else {
+ if (error == -ENOENT)
+ printk("/initrd does not exist. Ignored.\n");
+ else
+ printk("failed\n");
+ printk(KERN_NOTICE "Unmounting old root\n");
+ init_umount("/old", MNT_DETACH);
+ }
+}
+
+bool __init initrd_load(void)
+{
+ if (mount_initrd) {
+ create_dev("/dev/ram", Root_RAM0);
+ /*
+ * Load the initrd data into /dev/ram0. Execute it as initrd
+ * unless /dev/ram0 is supposed to be our actual root device,
+ * in that case the ram disk is just set up here, and gets
+ * mounted in the normal path.
+ */
+ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
+ init_unlink("/initrd.image");
+ handle_initrd();
+ return true;
+ }
+ }
+ init_unlink("/initrd.image");
+ return false;
+}
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
new file mode 100644
index 000000000..ac021ae6e
--- /dev/null
+++ b/init/do_mounts_rd.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/romfs_fs.h>
+#include <uapi/linux/cramfs_fs.h>
+#include <linux/initrd.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include "do_mounts.h"
+#include "../fs/squashfs/squashfs_fs.h"
+
+#include <linux/decompress/generic.h>
+
+static struct file *in_file, *out_file;
+static loff_t in_pos, out_pos;
+
+static int __init prompt_ramdisk(char *str)
+{
+ pr_warn("ignoring the deprecated prompt_ramdisk= option\n");
+ return 1;
+}
+__setup("prompt_ramdisk=", prompt_ramdisk);
+
+int __initdata rd_image_start; /* starting block # of image */
+
+static int __init ramdisk_start_setup(char *str)
+{
+ rd_image_start = simple_strtol(str,NULL,0);
+ return 1;
+}
+__setup("ramdisk_start=", ramdisk_start_setup);
+
+static int __init crd_load(decompress_fn deco);
+
+/*
+ * This routine tries to find a RAM disk image to load, and returns the
+ * number of blocks to read for a non-compressed image, 0 if the image
+ * is a compressed image, and -1 if an image with the right magic
+ * numbers could not be found.
+ *
+ * We currently check for the following magic numbers:
+ * minix
+ * ext2
+ * romfs
+ * cramfs
+ * squashfs
+ * gzip
+ * bzip2
+ * lzma
+ * xz
+ * lzo
+ * lz4
+ */
+static int __init
+identify_ramdisk_image(struct file *file, loff_t pos,
+ decompress_fn *decompressor)
+{
+ const int size = 512;
+ struct minix_super_block *minixsb;
+ struct romfs_super_block *romfsb;
+ struct cramfs_super *cramfsb;
+ struct squashfs_super_block *squashfsb;
+ int nblocks = -1;
+ unsigned char *buf;
+ const char *compress_name;
+ unsigned long n;
+ int start_block = rd_image_start;
+
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ minixsb = (struct minix_super_block *) buf;
+ romfsb = (struct romfs_super_block *) buf;
+ cramfsb = (struct cramfs_super *) buf;
+ squashfsb = (struct squashfs_super_block *) buf;
+ memset(buf, 0xe5, size);
+
+ /*
+ * Read block 0 to test for compressed kernel
+ */
+ pos = start_block * BLOCK_SIZE;
+ kernel_read(file, buf, size, &pos);
+
+ *decompressor = decompress_method(buf, size, &compress_name);
+ if (compress_name) {
+ printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n",
+ compress_name, start_block);
+ if (!*decompressor)
+ printk(KERN_EMERG
+ "RAMDISK: %s decompressor not configured!\n",
+ compress_name);
+ nblocks = 0;
+ goto done;
+ }
+
+ /* romfs is at block zero too */
+ if (romfsb->word0 == ROMSB_WORD0 &&
+ romfsb->word1 == ROMSB_WORD1) {
+ printk(KERN_NOTICE
+ "RAMDISK: romfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ if (cramfsb->magic == CRAMFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: cramfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ /* squashfs is at block zero too */
+ if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: squashfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
+ >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ /*
+ * Read 512 bytes further to check if cramfs is padded
+ */
+ pos = start_block * BLOCK_SIZE + 0x200;
+ kernel_read(file, buf, size, &pos);
+
+ if (cramfsb->magic == CRAMFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: cramfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ /*
+ * Read block 1 to test for minix and ext2 superblock
+ */
+ pos = (start_block + 1) * BLOCK_SIZE;
+ kernel_read(file, buf, size, &pos);
+
+ /* Try minix */
+ if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
+ minixsb->s_magic == MINIX_SUPER_MAGIC2) {
+ printk(KERN_NOTICE
+ "RAMDISK: Minix filesystem found at block %d\n",
+ start_block);
+ nblocks = minixsb->s_nzones << minixsb->s_log_zone_size;
+ goto done;
+ }
+
+ /* Try ext2 */
+ n = ext2_image_size(buf);
+ if (n) {
+ printk(KERN_NOTICE
+ "RAMDISK: ext2 filesystem found at block %d\n",
+ start_block);
+ nblocks = n;
+ goto done;
+ }
+
+ printk(KERN_NOTICE
+ "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
+ start_block);
+
+done:
+ kfree(buf);
+ return nblocks;
+}
+
+static unsigned long nr_blocks(struct file *file)
+{
+ struct inode *inode = file->f_mapping->host;
+
+ if (!S_ISBLK(inode->i_mode))
+ return 0;
+ return i_size_read(inode) >> 10;
+}
+
+int __init rd_load_image(char *from)
+{
+ int res = 0;
+ unsigned long rd_blocks, devblocks;
+ int nblocks, i;
+ char *buf = NULL;
+ unsigned short rotate = 0;
+ decompress_fn decompressor = NULL;
+#if !defined(CONFIG_S390)
+ char rotator[4] = { '|' , '/' , '-' , '\\' };
+#endif
+
+ out_file = filp_open("/dev/ram", O_RDWR, 0);
+ if (IS_ERR(out_file))
+ goto out;
+
+ in_file = filp_open(from, O_RDONLY, 0);
+ if (IS_ERR(in_file))
+ goto noclose_input;
+
+ in_pos = rd_image_start * BLOCK_SIZE;
+ nblocks = identify_ramdisk_image(in_file, in_pos, &decompressor);
+ if (nblocks < 0)
+ goto done;
+
+ if (nblocks == 0) {
+ if (crd_load(decompressor) == 0)
+ goto successful_load;
+ goto done;
+ }
+
+ /*
+ * NOTE NOTE: nblocks is not actually blocks but
+ * the number of kibibytes of data to load into a ramdisk.
+ */
+ rd_blocks = nr_blocks(out_file);
+ if (nblocks > rd_blocks) {
+ printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n",
+ nblocks, rd_blocks);
+ goto done;
+ }
+
+ /*
+ * OK, time to copy in the data
+ */
+ if (strcmp(from, "/initrd.image") == 0)
+ devblocks = nblocks;
+ else
+ devblocks = nr_blocks(in_file);
+
+ if (devblocks == 0) {
+ printk(KERN_ERR "RAMDISK: could not determine device size\n");
+ goto done;
+ }
+
+ buf = kmalloc(BLOCK_SIZE, GFP_KERNEL);
+ if (!buf) {
+ printk(KERN_ERR "RAMDISK: could not allocate buffer\n");
+ goto done;
+ }
+
+ printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
+ nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
+ for (i = 0; i < nblocks; i++) {
+ if (i && (i % devblocks == 0)) {
+ pr_cont("done disk #1.\n");
+ rotate = 0;
+ fput(in_file);
+ break;
+ }
+ kernel_read(in_file, buf, BLOCK_SIZE, &in_pos);
+ kernel_write(out_file, buf, BLOCK_SIZE, &out_pos);
+#if !defined(CONFIG_S390)
+ if (!(i % 16)) {
+ pr_cont("%c\b", rotator[rotate & 0x3]);
+ rotate++;
+ }
+#endif
+ }
+ pr_cont("done.\n");
+
+successful_load:
+ res = 1;
+done:
+ fput(in_file);
+noclose_input:
+ fput(out_file);
+out:
+ kfree(buf);
+ init_unlink("/dev/ram");
+ return res;
+}
+
+int __init rd_load_disk(int n)
+{
+ create_dev("/dev/root", ROOT_DEV);
+ create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n));
+ return rd_load_image("/dev/root");
+}
+
+static int exit_code;
+static int decompress_error;
+
+static long __init compr_fill(void *buf, unsigned long len)
+{
+ long r = kernel_read(in_file, buf, len, &in_pos);
+ if (r < 0)
+ printk(KERN_ERR "RAMDISK: error while reading compressed data");
+ else if (r == 0)
+ printk(KERN_ERR "RAMDISK: EOF while reading compressed data");
+ return r;
+}
+
+static long __init compr_flush(void *window, unsigned long outcnt)
+{
+ long written = kernel_write(out_file, window, outcnt, &out_pos);
+ if (written != outcnt) {
+ if (decompress_error == 0)
+ printk(KERN_ERR
+ "RAMDISK: incomplete write (%ld != %ld)\n",
+ written, outcnt);
+ decompress_error = 1;
+ return -1;
+ }
+ return outcnt;
+}
+
+static void __init error(char *x)
+{
+ printk(KERN_ERR "%s\n", x);
+ exit_code = 1;
+ decompress_error = 1;
+}
+
+static int __init crd_load(decompress_fn deco)
+{
+ int result;
+
+ if (!deco) {
+ pr_emerg("Invalid ramdisk decompression routine. "
+ "Select appropriate config option.\n");
+ panic("Could not decompress initial ramdisk image.");
+ }
+
+ result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error);
+ if (decompress_error)
+ result = 1;
+ return result;
+}
diff --git a/init/init_task.c b/init/init_task.c
new file mode 100644
index 000000000..ff6c4b9bf
--- /dev/null
+++ b/init/init_task.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init_task.h>
+#include <linux/export.h>
+#include <linux/mqueue.h>
+#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
+#include <linux/sched/task.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/audit.h>
+#include <linux/numa.h>
+#include <linux/scs.h>
+
+#include <linux/uaccess.h>
+
+static struct signal_struct init_signals = {
+ .nr_threads = 1,
+ .thread_head = LIST_HEAD_INIT(init_task.thread_node),
+ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(init_signals.wait_chldexit),
+ .shared_pending = {
+ .list = LIST_HEAD_INIT(init_signals.shared_pending.list),
+ .signal = {{0}}
+ },
+ .multiprocess = HLIST_HEAD_INIT,
+ .rlim = INIT_RLIMITS,
+ .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
+ .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
+#ifdef CONFIG_POSIX_TIMERS
+ .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
+ .cputimer = {
+ .cputime_atomic = INIT_CPUTIME_ATOMIC,
+ },
+#endif
+ INIT_CPU_TIMERS(init_signals)
+ .pids = {
+ [PIDTYPE_PID] = &init_struct_pid,
+ [PIDTYPE_TGID] = &init_struct_pid,
+ [PIDTYPE_PGID] = &init_struct_pid,
+ [PIDTYPE_SID] = &init_struct_pid,
+ },
+ INIT_PREV_CPUTIME(init_signals)
+};
+
+static struct sighand_struct init_sighand = {
+ .count = REFCOUNT_INIT(1),
+ .action = { { { .sa_handler = SIG_DFL, } }, },
+ .siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
+ .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
+};
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
+ __init_task_data = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
+/*
+ * Set up the first task table, touch at your own risk!. Base=0,
+ * limit=0x1fffff (=2MB)
+ */
+struct task_struct init_task
+#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
+ __init_task_data
+#endif
+ __aligned(L1_CACHE_BYTES)
+= {
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ .thread_info = INIT_THREAD_INFO(init_task),
+ .stack_refcount = REFCOUNT_INIT(1),
+#endif
+ .__state = 0,
+ .stack = init_stack,
+ .usage = REFCOUNT_INIT(2),
+ .flags = PF_KTHREAD,
+ .prio = MAX_PRIO - 20,
+ .static_prio = MAX_PRIO - 20,
+ .normal_prio = MAX_PRIO - 20,
+ .policy = SCHED_NORMAL,
+ .cpus_ptr = &init_task.cpus_mask,
+ .user_cpus_ptr = NULL,
+ .cpus_mask = CPU_MASK_ALL,
+ .nr_cpus_allowed= NR_CPUS,
+ .mm = NULL,
+ .active_mm = &init_mm,
+ .restart_block = {
+ .fn = do_no_restart_syscall,
+ },
+ .se = {
+ .group_node = LIST_HEAD_INIT(init_task.se.group_node),
+ },
+ .rt = {
+ .run_list = LIST_HEAD_INIT(init_task.rt.run_list),
+ .time_slice = RR_TIMESLICE,
+ },
+ .tasks = LIST_HEAD_INIT(init_task.tasks),
+#ifdef CONFIG_SMP
+ .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
+#endif
+#ifdef CONFIG_CGROUP_SCHED
+ .sched_task_group = &root_task_group,
+#endif
+ .ptraced = LIST_HEAD_INIT(init_task.ptraced),
+ .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry),
+ .real_parent = &init_task,
+ .parent = &init_task,
+ .children = LIST_HEAD_INIT(init_task.children),
+ .sibling = LIST_HEAD_INIT(init_task.sibling),
+ .group_leader = &init_task,
+ RCU_POINTER_INITIALIZER(real_cred, &init_cred),
+ RCU_POINTER_INITIALIZER(cred, &init_cred),
+ .comm = INIT_TASK_COMM,
+ .thread = INIT_THREAD,
+ .fs = &init_fs,
+ .files = &init_files,
+#ifdef CONFIG_IO_URING
+ .io_uring = NULL,
+#endif
+ .signal = &init_signals,
+ .sighand = &init_sighand,
+ .nsproxy = &init_nsproxy,
+ .pending = {
+ .list = LIST_HEAD_INIT(init_task.pending.list),
+ .signal = {{0}}
+ },
+ .blocked = {{0}},
+ .alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
+ .journal_info = NULL,
+ INIT_CPU_TIMERS(init_task)
+ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
+ .timer_slack_ns = 50000, /* 50 usec default slack */
+ .thread_pid = &init_struct_pid,
+ .thread_group = LIST_HEAD_INIT(init_task.thread_group),
+ .thread_node = LIST_HEAD_INIT(init_signals.thread_head),
+#ifdef CONFIG_AUDIT
+ .loginuid = INVALID_UID,
+ .sessionid = AUDIT_SID_UNSET,
+#endif
+#ifdef CONFIG_PERF_EVENTS
+ .perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
+ .perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
+#endif
+#ifdef CONFIG_PREEMPT_RCU
+ .rcu_read_lock_nesting = 0,
+ .rcu_read_unlock_special.s = 0,
+ .rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
+ .rcu_blocked_node = NULL,
+#endif
+#ifdef CONFIG_TASKS_RCU
+ .rcu_tasks_holdout = false,
+ .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
+ .rcu_tasks_idle_cpu = -1,
+#endif
+#ifdef CONFIG_TASKS_TRACE_RCU
+ .trc_reader_nesting = 0,
+ .trc_reader_special.s = 0,
+ .trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
+ .trc_blkd_node = LIST_HEAD_INIT(init_task.trc_blkd_node),
+#endif
+#ifdef CONFIG_CPUSETS
+ .mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
+ &init_task.alloc_lock),
+#endif
+#ifdef CONFIG_RT_MUTEXES
+ .pi_waiters = RB_ROOT_CACHED,
+ .pi_top_task = NULL,
+#endif
+ INIT_PREV_CPUTIME(init_task)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+ .vtime.seqcount = SEQCNT_ZERO(init_task.vtime_seqcount),
+ .vtime.starttime = 0,
+ .vtime.state = VTIME_SYS,
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+ .numa_preferred_nid = NUMA_NO_NODE,
+ .numa_group = NULL,
+ .numa_faults = NULL,
+#endif
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+ .kasan_depth = 1,
+#endif
+#ifdef CONFIG_KCSAN
+ .kcsan_ctx = {
+ .scoped_accesses = {LIST_POISON1, NULL},
+ },
+#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+ .softirqs_enabled = 1,
+#endif
+#ifdef CONFIG_LOCKDEP
+ .lockdep_depth = 0, /* no locks held yet */
+ .curr_chain_key = INITIAL_CHAIN_KEY,
+ .lockdep_recursion = 0,
+#endif
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .ret_stack = NULL,
+ .tracing_graph_pause = ATOMIC_INIT(0),
+#endif
+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION)
+ .trace_recursion = 0,
+#endif
+#ifdef CONFIG_LIVEPATCH
+ .patch_state = KLP_UNDEFINED,
+#endif
+#ifdef CONFIG_SECURITY
+ .security = NULL,
+#endif
+#ifdef CONFIG_SECCOMP_FILTER
+ .seccomp = { .filter_count = ATOMIC_INIT(0) },
+#endif
+};
+EXPORT_SYMBOL(init_task);
+
+/*
+ * Initial thread structure. Alignment of this is handled by a special
+ * linker map entry.
+ */
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task);
+#endif
diff --git a/init/initramfs.c b/init/initramfs.c
new file mode 100644
index 000000000..2f5bfb7d7
--- /dev/null
+++ b/init/initramfs.c
@@ -0,0 +1,762 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/async.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/dirent.h>
+#include <linux/syscalls.h>
+#include <linux/utime.h>
+#include <linux/file.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/namei.h>
+#include <linux/init_syscalls.h>
+#include <linux/task_work.h>
+#include <linux/umh.h>
+
+static __initdata bool csum_present;
+static __initdata u32 io_csum;
+
+static ssize_t __init xwrite(struct file *file, const unsigned char *p,
+ size_t count, loff_t *pos)
+{
+ ssize_t out = 0;
+
+ /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
+ while (count) {
+ ssize_t rv = kernel_write(file, p, count, pos);
+
+ if (rv < 0) {
+ if (rv == -EINTR || rv == -EAGAIN)
+ continue;
+ return out ? out : rv;
+ } else if (rv == 0)
+ break;
+
+ if (csum_present) {
+ ssize_t i;
+
+ for (i = 0; i < rv; i++)
+ io_csum += p[i];
+ }
+
+ p += rv;
+ out += rv;
+ count -= rv;
+ }
+
+ return out;
+}
+
+static __initdata char *message;
+static void __init error(char *x)
+{
+ if (!message)
+ message = x;
+}
+
+static void panic_show_mem(const char *fmt, ...)
+{
+ va_list args;
+
+ show_mem(0, NULL);
+ va_start(args, fmt);
+ panic(fmt, args);
+ va_end(args);
+}
+
+/* link hash */
+
+#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
+
+static __initdata struct hash {
+ int ino, minor, major;
+ umode_t mode;
+ struct hash *next;
+ char name[N_ALIGN(PATH_MAX)];
+} *head[32];
+
+static inline int hash(int major, int minor, int ino)
+{
+ unsigned long tmp = ino + minor + (major << 3);
+ tmp += tmp >> 5;
+ return tmp & 31;
+}
+
+static char __init *find_link(int major, int minor, int ino,
+ umode_t mode, char *name)
+{
+ struct hash **p, *q;
+ for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) {
+ if ((*p)->ino != ino)
+ continue;
+ if ((*p)->minor != minor)
+ continue;
+ if ((*p)->major != major)
+ continue;
+ if (((*p)->mode ^ mode) & S_IFMT)
+ continue;
+ return (*p)->name;
+ }
+ q = kmalloc(sizeof(struct hash), GFP_KERNEL);
+ if (!q)
+ panic_show_mem("can't allocate link hash entry");
+ q->major = major;
+ q->minor = minor;
+ q->ino = ino;
+ q->mode = mode;
+ strcpy(q->name, name);
+ q->next = NULL;
+ *p = q;
+ return NULL;
+}
+
+static void __init free_hash(void)
+{
+ struct hash **p, *q;
+ for (p = head; p < head + 32; p++) {
+ while (*p) {
+ q = *p;
+ *p = q->next;
+ kfree(q);
+ }
+ }
+}
+
+#ifdef CONFIG_INITRAMFS_PRESERVE_MTIME
+static void __init do_utime(char *filename, time64_t mtime)
+{
+ struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
+ init_utimes(filename, t);
+}
+
+static void __init do_utime_path(const struct path *path, time64_t mtime)
+{
+ struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
+ vfs_utimes(path, t);
+}
+
+static __initdata LIST_HEAD(dir_list);
+struct dir_entry {
+ struct list_head list;
+ time64_t mtime;
+ char name[];
+};
+
+static void __init dir_add(const char *name, time64_t mtime)
+{
+ size_t nlen = strlen(name) + 1;
+ struct dir_entry *de;
+
+ de = kmalloc(sizeof(struct dir_entry) + nlen, GFP_KERNEL);
+ if (!de)
+ panic_show_mem("can't allocate dir_entry buffer");
+ INIT_LIST_HEAD(&de->list);
+ strscpy(de->name, name, nlen);
+ de->mtime = mtime;
+ list_add(&de->list, &dir_list);
+}
+
+static void __init dir_utime(void)
+{
+ struct dir_entry *de, *tmp;
+ list_for_each_entry_safe(de, tmp, &dir_list, list) {
+ list_del(&de->list);
+ do_utime(de->name, de->mtime);
+ kfree(de);
+ }
+}
+#else
+static void __init do_utime(char *filename, time64_t mtime) {}
+static void __init do_utime_path(const struct path *path, time64_t mtime) {}
+static void __init dir_add(const char *name, time64_t mtime) {}
+static void __init dir_utime(void) {}
+#endif
+
+static __initdata time64_t mtime;
+
+/* cpio header parsing */
+
+static __initdata unsigned long ino, major, minor, nlink;
+static __initdata umode_t mode;
+static __initdata unsigned long body_len, name_len;
+static __initdata uid_t uid;
+static __initdata gid_t gid;
+static __initdata unsigned rdev;
+static __initdata u32 hdr_csum;
+
+static void __init parse_header(char *s)
+{
+ unsigned long parsed[13];
+ char buf[9];
+ int i;
+
+ buf[8] = '\0';
+ for (i = 0, s += 6; i < 13; i++, s += 8) {
+ memcpy(buf, s, 8);
+ parsed[i] = simple_strtoul(buf, NULL, 16);
+ }
+ ino = parsed[0];
+ mode = parsed[1];
+ uid = parsed[2];
+ gid = parsed[3];
+ nlink = parsed[4];
+ mtime = parsed[5]; /* breaks in y2106 */
+ body_len = parsed[6];
+ major = parsed[7];
+ minor = parsed[8];
+ rdev = new_encode_dev(MKDEV(parsed[9], parsed[10]));
+ name_len = parsed[11];
+ hdr_csum = parsed[12];
+}
+
+/* FSM */
+
+static __initdata enum state {
+ Start,
+ Collect,
+ GotHeader,
+ SkipIt,
+ GotName,
+ CopyFile,
+ GotSymlink,
+ Reset
+} state, next_state;
+
+static __initdata char *victim;
+static unsigned long byte_count __initdata;
+static __initdata loff_t this_header, next_header;
+
+static inline void __init eat(unsigned n)
+{
+ victim += n;
+ this_header += n;
+ byte_count -= n;
+}
+
+static __initdata char *collected;
+static long remains __initdata;
+static __initdata char *collect;
+
+static void __init read_into(char *buf, unsigned size, enum state next)
+{
+ if (byte_count >= size) {
+ collected = victim;
+ eat(size);
+ state = next;
+ } else {
+ collect = collected = buf;
+ remains = size;
+ next_state = next;
+ state = Collect;
+ }
+}
+
+static __initdata char *header_buf, *symlink_buf, *name_buf;
+
+static int __init do_start(void)
+{
+ read_into(header_buf, 110, GotHeader);
+ return 0;
+}
+
+static int __init do_collect(void)
+{
+ unsigned long n = remains;
+ if (byte_count < n)
+ n = byte_count;
+ memcpy(collect, victim, n);
+ eat(n);
+ collect += n;
+ if ((remains -= n) != 0)
+ return 1;
+ state = next_state;
+ return 0;
+}
+
+static int __init do_header(void)
+{
+ if (!memcmp(collected, "070701", 6)) {
+ csum_present = false;
+ } else if (!memcmp(collected, "070702", 6)) {
+ csum_present = true;
+ } else {
+ if (memcmp(collected, "070707", 6) == 0)
+ error("incorrect cpio method used: use -H newc option");
+ else
+ error("no cpio magic");
+ return 1;
+ }
+ parse_header(collected);
+ next_header = this_header + N_ALIGN(name_len) + body_len;
+ next_header = (next_header + 3) & ~3;
+ state = SkipIt;
+ if (name_len <= 0 || name_len > PATH_MAX)
+ return 0;
+ if (S_ISLNK(mode)) {
+ if (body_len > PATH_MAX)
+ return 0;
+ collect = collected = symlink_buf;
+ remains = N_ALIGN(name_len) + body_len;
+ next_state = GotSymlink;
+ state = Collect;
+ return 0;
+ }
+ if (S_ISREG(mode) || !body_len)
+ read_into(name_buf, N_ALIGN(name_len), GotName);
+ return 0;
+}
+
+static int __init do_skip(void)
+{
+ if (this_header + byte_count < next_header) {
+ eat(byte_count);
+ return 1;
+ } else {
+ eat(next_header - this_header);
+ state = next_state;
+ return 0;
+ }
+}
+
+static int __init do_reset(void)
+{
+ while (byte_count && *victim == '\0')
+ eat(1);
+ if (byte_count && (this_header & 3))
+ error("broken padding");
+ return 1;
+}
+
+static void __init clean_path(char *path, umode_t fmode)
+{
+ struct kstat st;
+
+ if (!init_stat(path, &st, AT_SYMLINK_NOFOLLOW) &&
+ (st.mode ^ fmode) & S_IFMT) {
+ if (S_ISDIR(st.mode))
+ init_rmdir(path);
+ else
+ init_unlink(path);
+ }
+}
+
+static int __init maybe_link(void)
+{
+ if (nlink >= 2) {
+ char *old = find_link(major, minor, ino, mode, collected);
+ if (old) {
+ clean_path(collected, 0);
+ return (init_link(old, collected) < 0) ? -1 : 1;
+ }
+ }
+ return 0;
+}
+
+static __initdata struct file *wfile;
+static __initdata loff_t wfile_pos;
+
+static int __init do_name(void)
+{
+ state = SkipIt;
+ next_state = Reset;
+ if (strcmp(collected, "TRAILER!!!") == 0) {
+ free_hash();
+ return 0;
+ }
+ clean_path(collected, mode);
+ if (S_ISREG(mode)) {
+ int ml = maybe_link();
+ if (ml >= 0) {
+ int openflags = O_WRONLY|O_CREAT;
+ if (ml != 1)
+ openflags |= O_TRUNC;
+ wfile = filp_open(collected, openflags, mode);
+ if (IS_ERR(wfile))
+ return 0;
+ wfile_pos = 0;
+ io_csum = 0;
+
+ vfs_fchown(wfile, uid, gid);
+ vfs_fchmod(wfile, mode);
+ if (body_len)
+ vfs_truncate(&wfile->f_path, body_len);
+ state = CopyFile;
+ }
+ } else if (S_ISDIR(mode)) {
+ init_mkdir(collected, mode);
+ init_chown(collected, uid, gid, 0);
+ init_chmod(collected, mode);
+ dir_add(collected, mtime);
+ } else if (S_ISBLK(mode) || S_ISCHR(mode) ||
+ S_ISFIFO(mode) || S_ISSOCK(mode)) {
+ if (maybe_link() == 0) {
+ init_mknod(collected, mode, rdev);
+ init_chown(collected, uid, gid, 0);
+ init_chmod(collected, mode);
+ do_utime(collected, mtime);
+ }
+ }
+ return 0;
+}
+
+static int __init do_copy(void)
+{
+ if (byte_count >= body_len) {
+ if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
+ error("write error");
+
+ do_utime_path(&wfile->f_path, mtime);
+ fput(wfile);
+ if (csum_present && io_csum != hdr_csum)
+ error("bad data checksum");
+ eat(body_len);
+ state = SkipIt;
+ return 0;
+ } else {
+ if (xwrite(wfile, victim, byte_count, &wfile_pos) != byte_count)
+ error("write error");
+ body_len -= byte_count;
+ eat(byte_count);
+ return 1;
+ }
+}
+
+static int __init do_symlink(void)
+{
+ collected[N_ALIGN(name_len) + body_len] = '\0';
+ clean_path(collected, 0);
+ init_symlink(collected + N_ALIGN(name_len), collected);
+ init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW);
+ do_utime(collected, mtime);
+ state = SkipIt;
+ next_state = Reset;
+ return 0;
+}
+
+static __initdata int (*actions[])(void) = {
+ [Start] = do_start,
+ [Collect] = do_collect,
+ [GotHeader] = do_header,
+ [SkipIt] = do_skip,
+ [GotName] = do_name,
+ [CopyFile] = do_copy,
+ [GotSymlink] = do_symlink,
+ [Reset] = do_reset,
+};
+
+static long __init write_buffer(char *buf, unsigned long len)
+{
+ byte_count = len;
+ victim = buf;
+
+ while (!actions[state]())
+ ;
+ return len - byte_count;
+}
+
+static long __init flush_buffer(void *bufv, unsigned long len)
+{
+ char *buf = (char *) bufv;
+ long written;
+ long origLen = len;
+ if (message)
+ return -1;
+ while ((written = write_buffer(buf, len)) < len && !message) {
+ char c = buf[written];
+ if (c == '0') {
+ buf += written;
+ len -= written;
+ state = Start;
+ } else if (c == 0) {
+ buf += written;
+ len -= written;
+ state = Reset;
+ } else
+ error("junk within compressed archive");
+ }
+ return origLen;
+}
+
+static unsigned long my_inptr __initdata; /* index of next byte to be processed in inbuf */
+
+#include <linux/decompress/generic.h>
+
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
+{
+ long written;
+ decompress_fn decompress;
+ const char *compress_name;
+ static __initdata char msg_buf[64];
+
+ header_buf = kmalloc(110, GFP_KERNEL);
+ symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
+ name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
+
+ if (!header_buf || !symlink_buf || !name_buf)
+ panic_show_mem("can't allocate buffers");
+
+ state = Start;
+ this_header = 0;
+ message = NULL;
+ while (!message && len) {
+ loff_t saved_offset = this_header;
+ if (*buf == '0' && !(this_header & 3)) {
+ state = Start;
+ written = write_buffer(buf, len);
+ buf += written;
+ len -= written;
+ continue;
+ }
+ if (!*buf) {
+ buf++;
+ len--;
+ this_header++;
+ continue;
+ }
+ this_header = 0;
+ decompress = decompress_method(buf, len, &compress_name);
+ pr_debug("Detected %s compressed data\n", compress_name);
+ if (decompress) {
+ int res = decompress(buf, len, NULL, flush_buffer, NULL,
+ &my_inptr, error);
+ if (res)
+ error("decompressor failed");
+ } else if (compress_name) {
+ if (!message) {
+ snprintf(msg_buf, sizeof msg_buf,
+ "compression method %s not configured",
+ compress_name);
+ message = msg_buf;
+ }
+ } else
+ error("invalid magic at start of compressed archive");
+ if (state != Reset)
+ error("junk at the end of compressed archive");
+ this_header = saved_offset + my_inptr;
+ buf += my_inptr;
+ len -= my_inptr;
+ }
+ dir_utime();
+ kfree(name_buf);
+ kfree(symlink_buf);
+ kfree(header_buf);
+ return message;
+}
+
+static int __initdata do_retain_initrd;
+
+static int __init retain_initrd_param(char *str)
+{
+ if (*str)
+ return 0;
+ do_retain_initrd = 1;
+ return 1;
+}
+__setup("retain_initrd", retain_initrd_param);
+
+#ifdef CONFIG_ARCH_HAS_KEEPINITRD
+static int __init keepinitrd_setup(char *__unused)
+{
+ do_retain_initrd = 1;
+ return 1;
+}
+__setup("keepinitrd", keepinitrd_setup);
+#endif
+
+static bool __initdata initramfs_async = true;
+static int __init initramfs_async_setup(char *str)
+{
+ strtobool(str, &initramfs_async);
+ return 1;
+}
+__setup("initramfs_async=", initramfs_async_setup);
+
+extern char __initramfs_start[];
+extern unsigned long __initramfs_size;
+#include <linux/initrd.h>
+#include <linux/kexec.h>
+
+void __init reserve_initrd_mem(void)
+{
+ phys_addr_t start;
+ unsigned long size;
+
+ /* Ignore the virtul address computed during device tree parsing */
+ initrd_start = initrd_end = 0;
+
+ if (!phys_initrd_size)
+ return;
+ /*
+ * Round the memory region to page boundaries as per free_initrd_mem()
+ * This allows us to detect whether the pages overlapping the initrd
+ * are in use, but more importantly, reserves the entire set of pages
+ * as we don't want these pages allocated for other purposes.
+ */
+ start = round_down(phys_initrd_start, PAGE_SIZE);
+ size = phys_initrd_size + (phys_initrd_start - start);
+ size = round_up(size, PAGE_SIZE);
+
+ if (!memblock_is_region_memory(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
+ (u64)start, size);
+ goto disable;
+ }
+
+ if (memblock_is_region_reserved(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
+ (u64)start, size);
+ goto disable;
+ }
+
+ memblock_reserve(start, size);
+ /* Now convert initrd to virtual addresses */
+ initrd_start = (unsigned long)__va(phys_initrd_start);
+ initrd_end = initrd_start + phys_initrd_size;
+ initrd_below_start_ok = 1;
+
+ return;
+disable:
+ pr_cont(" - disabling initrd\n");
+ initrd_start = 0;
+ initrd_end = 0;
+}
+
+void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
+ unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
+ unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
+
+ memblock_free((void *)aligned_start, aligned_end - aligned_start);
+#endif
+
+ free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+ "initrd");
+}
+
+#ifdef CONFIG_KEXEC_CORE
+static bool __init kexec_free_initrd(void)
+{
+ unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
+ unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
+
+ /*
+ * If the initrd region is overlapped with crashkernel reserved region,
+ * free only memory that is not part of crashkernel region.
+ */
+ if (initrd_start >= crashk_end || initrd_end <= crashk_start)
+ return false;
+
+ /*
+ * Initialize initrd memory region since the kexec boot does not do.
+ */
+ memset((void *)initrd_start, 0, initrd_end - initrd_start);
+ if (initrd_start < crashk_start)
+ free_initrd_mem(initrd_start, crashk_start);
+ if (initrd_end > crashk_end)
+ free_initrd_mem(crashk_end, initrd_end);
+ return true;
+}
+#else
+static inline bool kexec_free_initrd(void)
+{
+ return false;
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_BLK_DEV_RAM
+static void __init populate_initrd_image(char *err)
+{
+ ssize_t written;
+ struct file *file;
+ loff_t pos = 0;
+
+ unpack_to_rootfs(__initramfs_start, __initramfs_size);
+
+ printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
+ err);
+ file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ if (IS_ERR(file))
+ return;
+
+ written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start,
+ &pos);
+ if (written != initrd_end - initrd_start)
+ pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
+ written, initrd_end - initrd_start);
+ fput(file);
+}
+#endif /* CONFIG_BLK_DEV_RAM */
+
+static void __init do_populate_rootfs(void *unused, async_cookie_t cookie)
+{
+ /* Load the built in initramfs */
+ char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
+ if (err)
+ panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
+
+ if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
+ goto done;
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_RAM))
+ printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
+ else
+ printk(KERN_INFO "Unpacking initramfs...\n");
+
+ err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
+ if (err) {
+#ifdef CONFIG_BLK_DEV_RAM
+ populate_initrd_image(err);
+#else
+ printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+#endif
+ }
+
+done:
+ /*
+ * If the initrd region is overlapped with crashkernel reserved region,
+ * free only memory that is not part of crashkernel region.
+ */
+ if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
+ free_initrd_mem(initrd_start, initrd_end);
+ initrd_start = 0;
+ initrd_end = 0;
+
+ flush_delayed_fput();
+ task_work_run();
+}
+
+static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
+static async_cookie_t initramfs_cookie;
+
+void wait_for_initramfs(void)
+{
+ if (!initramfs_cookie) {
+ /*
+ * Something before rootfs_initcall wants to access
+ * the filesystem/initramfs. Probably a bug. Make a
+ * note, avoid deadlocking the machine, and let the
+ * caller's access fail as it used to.
+ */
+ pr_warn_once("wait_for_initramfs() called before rootfs_initcalls\n");
+ return;
+ }
+ async_synchronize_cookie_domain(initramfs_cookie + 1, &initramfs_domain);
+}
+EXPORT_SYMBOL_GPL(wait_for_initramfs);
+
+static int __init populate_rootfs(void)
+{
+ initramfs_cookie = async_schedule_domain(do_populate_rootfs, NULL,
+ &initramfs_domain);
+ usermodehelper_enable();
+ if (!initramfs_async)
+ wait_for_initramfs();
+ return 0;
+}
+rootfs_initcall(populate_rootfs);
diff --git a/init/main.c b/init/main.c
new file mode 100644
index 000000000..87a52bdb4
--- /dev/null
+++ b/init/main.c
@@ -0,0 +1,1650 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/init/main.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * GK 2/5/95 - Changed to support mounting root fs via NFS
+ * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
+ * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
+ * Simplified starting of init: Michael A. Griffith <grif@acm.org>
+ */
+
+#define DEBUG /* Enable initcall_debug */
+
+#include <linux/types.h>
+#include <linux/extable.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/binfmts.h>
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/stackprotector.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/memblock.h>
+#include <linux/acpi.h>
+#include <linux/bootconfig.h>
+#include <linux/console.h>
+#include <linux/nmi.h>
+#include <linux/percpu.h>
+#include <linux/kmod.h>
+#include <linux/kprobes.h>
+#include <linux/kmsan.h>
+#include <linux/vmalloc.h>
+#include <linux/kernel_stat.h>
+#include <linux/start_kernel.h>
+#include <linux/security.h>
+#include <linux/smp.h>
+#include <linux/profile.h>
+#include <linux/kfence.h>
+#include <linux/rcupdate.h>
+#include <linux/srcu.h>
+#include <linux/moduleparam.h>
+#include <linux/kallsyms.h>
+#include <linux/buildid.h>
+#include <linux/writeback.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/cgroup.h>
+#include <linux/efi.h>
+#include <linux/tick.h>
+#include <linux/sched/isolation.h>
+#include <linux/interrupt.h>
+#include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
+#include <linux/unistd.h>
+#include <linux/utsname.h>
+#include <linux/rmap.h>
+#include <linux/mempolicy.h>
+#include <linux/key.h>
+#include <linux/page_ext.h>
+#include <linux/debug_locks.h>
+#include <linux/debugobjects.h>
+#include <linux/lockdep.h>
+#include <linux/kmemleak.h>
+#include <linux/padata.h>
+#include <linux/pid_namespace.h>
+#include <linux/device/driver.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/sched/init.h>
+#include <linux/signal.h>
+#include <linux/idr.h>
+#include <linux/kgdb.h>
+#include <linux/ftrace.h>
+#include <linux/async.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/pti.h>
+#include <linux/blkdev.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/context_tracking.h>
+#include <linux/random.h>
+#include <linux/list.h>
+#include <linux/integrity.h>
+#include <linux/proc_ns.h>
+#include <linux/io.h>
+#include <linux/cache.h>
+#include <linux/rodata_test.h>
+#include <linux/jump_label.h>
+#include <linux/kcsan.h>
+#include <linux/init_syscalls.h>
+#include <linux/stackdepot.h>
+#include <linux/randomize_kstack.h>
+#include <net/net_namespace.h>
+
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/initcall.h>
+
+#include <kunit/test.h>
+
+static int kernel_init(void *);
+
+extern void init_IRQ(void);
+extern void radix_tree_init(void);
+extern void maple_tree_init(void);
+
+/*
+ * Debug helper: via this flag we know that we are in 'early bootup code'
+ * where only the boot processor is running with IRQ disabled. This means
+ * two things - IRQ must not be enabled before the flag is cleared and some
+ * operations which are not allowed with IRQ disabled are allowed while the
+ * flag is set.
+ */
+bool early_boot_irqs_disabled __read_mostly;
+
+enum system_states system_state __read_mostly;
+EXPORT_SYMBOL(system_state);
+
+/*
+ * Boot command-line arguments
+ */
+#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
+#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
+
+extern void time_init(void);
+/* Default late time init is NULL. archs can override this later. */
+void (*__initdata late_time_init)(void);
+
+/* Untouched command line saved by arch-specific code. */
+char __initdata boot_command_line[COMMAND_LINE_SIZE];
+/* Untouched saved command line (eg. for /proc) */
+char *saved_command_line;
+/* Command line for parameter parsing */
+static char *static_command_line;
+/* Untouched extra command line */
+static char *extra_command_line;
+/* Extra init arguments */
+static char *extra_init_args;
+
+#ifdef CONFIG_BOOT_CONFIG
+/* Is bootconfig on command line? */
+static bool bootconfig_found;
+static size_t initargs_offs;
+#else
+# define bootconfig_found false
+# define initargs_offs 0
+#endif
+
+static char *execute_command;
+static char *ramdisk_execute_command = "/init";
+
+/*
+ * Used to generate warnings if static_key manipulation functions are used
+ * before jump_label_init is called.
+ */
+bool static_key_initialized __read_mostly;
+EXPORT_SYMBOL_GPL(static_key_initialized);
+
+/*
+ * If set, this is an indication to the drivers that reset the underlying
+ * device before going ahead with the initialization otherwise driver might
+ * rely on the BIOS and skip the reset operation.
+ *
+ * This is useful if kernel is booting in an unreliable environment.
+ * For ex. kdump situation where previous kernel has crashed, BIOS has been
+ * skipped and devices will be in unknown state.
+ */
+unsigned int reset_devices;
+EXPORT_SYMBOL(reset_devices);
+
+static int __init set_reset_devices(char *str)
+{
+ reset_devices = 1;
+ return 1;
+}
+
+__setup("reset_devices", set_reset_devices);
+
+static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char *panic_later, *panic_param;
+
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
+
+static bool __init obsolete_checksetup(char *line)
+{
+ const struct obs_kernel_param *p;
+ bool had_early_param = false;
+
+ p = __setup_start;
+ do {
+ int n = strlen(p->str);
+ if (parameqn(line, p->str, n)) {
+ if (p->early) {
+ /* Already done in parse_early_param?
+ * (Needs exact match on param part).
+ * Keep iterating, as we can have early
+ * params and __setups of same names 8( */
+ if (line[n] == '\0' || line[n] == '=')
+ had_early_param = true;
+ } else if (!p->setup_func) {
+ pr_warn("Parameter %s is obsolete, ignored\n",
+ p->str);
+ return true;
+ } else if (p->setup_func(line + n))
+ return true;
+ }
+ p++;
+ } while (p < __setup_end);
+
+ return had_early_param;
+}
+
+/*
+ * This should be approx 2 Bo*oMips to start (note initial shift), and will
+ * still work even if initially too large, it will just take slightly longer
+ */
+unsigned long loops_per_jiffy = (1<<12);
+EXPORT_SYMBOL(loops_per_jiffy);
+
+static int __init debug_kernel(char *str)
+{
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+ return 0;
+}
+
+static int __init quiet_kernel(char *str)
+{
+ console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ return 0;
+}
+
+early_param("debug", debug_kernel);
+early_param("quiet", quiet_kernel);
+
+static int __init loglevel(char *str)
+{
+ int newlevel;
+
+ /*
+ * Only update loglevel value when a correct setting was passed,
+ * to prevent blind crashes (when loglevel being set to 0) that
+ * are quite hard to debug
+ */
+ if (get_option(&str, &newlevel)) {
+ console_loglevel = newlevel;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+early_param("loglevel", loglevel);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static void * __init get_boot_config_from_initrd(size_t *_size)
+{
+ u32 size, csum;
+ char *data;
+ u32 *hdr;
+ int i;
+
+ if (!initrd_end)
+ return NULL;
+
+ data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+ /*
+ * Since Grub may align the size of initrd to 4, we must
+ * check the preceding 3 bytes as well.
+ */
+ for (i = 0; i < 4; i++) {
+ if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+ goto found;
+ data--;
+ }
+ return NULL;
+
+found:
+ hdr = (u32 *)(data - 8);
+ size = le32_to_cpu(hdr[0]);
+ csum = le32_to_cpu(hdr[1]);
+
+ data = ((void *)hdr) - size;
+ if ((unsigned long)data < initrd_start) {
+ pr_err("bootconfig size %d is greater than initrd size %ld\n",
+ size, initrd_end - initrd_start);
+ return NULL;
+ }
+
+ if (xbc_calc_checksum(data, size) != csum) {
+ pr_err("bootconfig checksum failed\n");
+ return NULL;
+ }
+
+ /* Remove bootconfig from initramfs/initrd */
+ initrd_end = (unsigned long)data;
+ if (_size)
+ *_size = size;
+
+ return data;
+}
+#else
+static void * __init get_boot_config_from_initrd(size_t *_size)
+{
+ return NULL;
+}
+#endif
+
+#ifdef CONFIG_BOOT_CONFIG
+
+static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
+
+#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
+
+static int __init xbc_snprint_cmdline(char *buf, size_t size,
+ struct xbc_node *root)
+{
+ struct xbc_node *knode, *vnode;
+ char *end = buf + size;
+ const char *val;
+ int ret;
+
+ xbc_node_for_each_key_value(root, knode, val) {
+ ret = xbc_node_compose_key_after(root, knode,
+ xbc_namebuf, XBC_KEYLEN_MAX);
+ if (ret < 0)
+ return ret;
+
+ vnode = xbc_node_get_child(knode);
+ if (!vnode) {
+ ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ continue;
+ }
+ xbc_array_for_each_value(vnode, val) {
+ ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
+ xbc_namebuf, val);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ }
+ }
+
+ return buf - (end - size);
+}
+#undef rest
+
+/* Make an extra command line under given key word */
+static char * __init xbc_make_cmdline(const char *key)
+{
+ struct xbc_node *root;
+ char *new_cmdline;
+ int ret, len = 0;
+
+ root = xbc_find_node(key);
+ if (!root)
+ return NULL;
+
+ /* Count required buffer size */
+ len = xbc_snprint_cmdline(NULL, 0, root);
+ if (len <= 0)
+ return NULL;
+
+ new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES);
+ if (!new_cmdline) {
+ pr_err("Failed to allocate memory for extra kernel cmdline.\n");
+ return NULL;
+ }
+
+ ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
+ if (ret < 0 || ret > len) {
+ pr_err("Failed to print extra kernel cmdline.\n");
+ memblock_free(new_cmdline, len + 1);
+ return NULL;
+ }
+
+ return new_cmdline;
+}
+
+static int __init bootconfig_params(char *param, char *val,
+ const char *unused, void *arg)
+{
+ if (strcmp(param, "bootconfig") == 0) {
+ bootconfig_found = true;
+ }
+ return 0;
+}
+
+static int __init warn_bootconfig(char *str)
+{
+ /* The 'bootconfig' has been handled by bootconfig_params(). */
+ return 0;
+}
+
+static void __init setup_boot_config(void)
+{
+ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
+ const char *msg, *data;
+ int pos, ret;
+ size_t size;
+ char *err;
+
+ /* Cut out the bootconfig data even if we have no bootconfig option */
+ data = get_boot_config_from_initrd(&size);
+ /* If there is no bootconfig in initrd, try embedded one. */
+ if (!data)
+ data = xbc_get_embedded_bootconfig(&size);
+
+ strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
+ bootconfig_params);
+
+ if (IS_ERR(err) || !bootconfig_found)
+ return;
+
+ /* parse_args() stops at the next param of '--' and returns an address */
+ if (err)
+ initargs_offs = err - tmp_cmdline;
+
+ if (!data) {
+ pr_err("'bootconfig' found on command line, but no bootconfig found\n");
+ return;
+ }
+
+ if (size >= XBC_DATA_MAX) {
+ pr_err("bootconfig size %ld greater than max size %d\n",
+ (long)size, XBC_DATA_MAX);
+ return;
+ }
+
+ ret = xbc_init(data, size, &msg, &pos);
+ if (ret < 0) {
+ if (pos < 0)
+ pr_err("Failed to init bootconfig: %s.\n", msg);
+ else
+ pr_err("Failed to parse bootconfig: %s at %d.\n",
+ msg, pos);
+ } else {
+ xbc_get_info(&ret, NULL);
+ pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
+ /* keys starting with "kernel." are passed via cmdline */
+ extra_command_line = xbc_make_cmdline("kernel");
+ /* Also, "init." keys are init arguments */
+ extra_init_args = xbc_make_cmdline("init");
+ }
+ return;
+}
+
+static void __init exit_boot_config(void)
+{
+ xbc_exit();
+}
+
+#else /* !CONFIG_BOOT_CONFIG */
+
+static void __init setup_boot_config(void)
+{
+ /* Remove bootconfig data from initrd */
+ get_boot_config_from_initrd(NULL);
+}
+
+static int __init warn_bootconfig(char *str)
+{
+ pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
+ return 0;
+}
+
+#define exit_boot_config() do {} while (0)
+
+#endif /* CONFIG_BOOT_CONFIG */
+
+early_param("bootconfig", warn_bootconfig);
+
+/* Change NUL term back to "=", to make "param" the whole string. */
+static void __init repair_env_string(char *param, char *val)
+{
+ if (val) {
+ /* param=val or param="val"? */
+ if (val == param+strlen(param)+1)
+ val[-1] = '=';
+ else if (val == param+strlen(param)+2) {
+ val[-2] = '=';
+ memmove(val-1, val, strlen(val)+1);
+ } else
+ BUG();
+ }
+}
+
+/* Anything after -- gets handed straight to init. */
+static int __init set_init_arg(char *param, char *val,
+ const char *unused, void *arg)
+{
+ unsigned int i;
+
+ if (panic_later)
+ return 0;
+
+ repair_env_string(param, val);
+
+ for (i = 0; argv_init[i]; i++) {
+ if (i == MAX_INIT_ARGS) {
+ panic_later = "init";
+ panic_param = param;
+ return 0;
+ }
+ }
+ argv_init[i] = param;
+ return 0;
+}
+
+/*
+ * Unknown boot options get handed to init, unless they look like
+ * unused parameters (modprobe will find them in /proc/cmdline).
+ */
+static int __init unknown_bootoption(char *param, char *val,
+ const char *unused, void *arg)
+{
+ size_t len = strlen(param);
+
+ /* Handle params aliased to sysctls */
+ if (sysctl_is_alias(param))
+ return 0;
+
+ repair_env_string(param, val);
+
+ /* Handle obsolete-style parameters */
+ if (obsolete_checksetup(param))
+ return 0;
+
+ /* Unused module parameter. */
+ if (strnchr(param, len, '.'))
+ return 0;
+
+ if (panic_later)
+ return 0;
+
+ if (val) {
+ /* Environment option */
+ unsigned int i;
+ for (i = 0; envp_init[i]; i++) {
+ if (i == MAX_INIT_ENVS) {
+ panic_later = "env";
+ panic_param = param;
+ }
+ if (!strncmp(param, envp_init[i], len+1))
+ break;
+ }
+ envp_init[i] = param;
+ } else {
+ /* Command line option */
+ unsigned int i;
+ for (i = 0; argv_init[i]; i++) {
+ if (i == MAX_INIT_ARGS) {
+ panic_later = "init";
+ panic_param = param;
+ }
+ }
+ argv_init[i] = param;
+ }
+ return 0;
+}
+
+static int __init init_setup(char *str)
+{
+ unsigned int i;
+
+ execute_command = str;
+ /*
+ * In case LILO is going to boot us with default command line,
+ * it prepends "auto" before the whole cmdline which makes
+ * the shell think it should execute a script with such name.
+ * So we ignore all arguments entered _before_ init=... [MJ]
+ */
+ for (i = 1; i < MAX_INIT_ARGS; i++)
+ argv_init[i] = NULL;
+ return 1;
+}
+__setup("init=", init_setup);
+
+static int __init rdinit_setup(char *str)
+{
+ unsigned int i;
+
+ ramdisk_execute_command = str;
+ /* See "auto" comment in init_setup */
+ for (i = 1; i < MAX_INIT_ARGS; i++)
+ argv_init[i] = NULL;
+ return 1;
+}
+__setup("rdinit=", rdinit_setup);
+
+#ifndef CONFIG_SMP
+static const unsigned int setup_max_cpus = NR_CPUS;
+static inline void setup_nr_cpu_ids(void) { }
+static inline void smp_prepare_cpus(unsigned int maxcpus) { }
+#endif
+
+/*
+ * We need to store the untouched command line for future reference.
+ * We also need to store the touched command line since the parameter
+ * parsing is performed in place, and we should allow a component to
+ * store reference of name/value for future reference.
+ */
+static void __init setup_command_line(char *command_line)
+{
+ size_t len, xlen = 0, ilen = 0;
+
+ if (extra_command_line)
+ xlen = strlen(extra_command_line);
+ if (extra_init_args)
+ ilen = strlen(extra_init_args) + 4; /* for " -- " */
+
+ len = xlen + strlen(boot_command_line) + 1;
+
+ saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES);
+ if (!saved_command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+
+ static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+ if (!static_command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+ if (xlen) {
+ /*
+ * We have to put extra_command_line before boot command
+ * lines because there could be dashes (separator of init
+ * command line) in the command lines.
+ */
+ strcpy(saved_command_line, extra_command_line);
+ strcpy(static_command_line, extra_command_line);
+ }
+ strcpy(saved_command_line + xlen, boot_command_line);
+ strcpy(static_command_line + xlen, command_line);
+
+ if (ilen) {
+ /*
+ * Append supplemental init boot args to saved_command_line
+ * so that user can check what command line options passed
+ * to init.
+ * The order should always be
+ * " -- "[bootconfig init-param][cmdline init-param]
+ */
+ if (initargs_offs) {
+ len = xlen + initargs_offs;
+ strcpy(saved_command_line + len, extra_init_args);
+ len += ilen - 4; /* strlen(extra_init_args) */
+ strcpy(saved_command_line + len,
+ boot_command_line + initargs_offs - 1);
+ } else {
+ len = strlen(saved_command_line);
+ strcpy(saved_command_line + len, " -- ");
+ len += 4;
+ strcpy(saved_command_line + len, extra_init_args);
+ }
+ }
+}
+
+/*
+ * We need to finalize in a non-__init function or else race conditions
+ * between the root thread and the init thread may cause start_kernel to
+ * be reaped by free_initmem before the root thread has proceeded to
+ * cpu_idle.
+ *
+ * gcc-3.4 accidentally inlines this function, so use noinline.
+ */
+
+static __initdata DECLARE_COMPLETION(kthreadd_done);
+
+noinline void __ref rest_init(void)
+{
+ struct task_struct *tsk;
+ int pid;
+
+ rcu_scheduler_starting();
+ /*
+ * We need to spawn init first so that it obtains pid 1, however
+ * the init task will end up wanting to create kthreads, which, if
+ * we schedule it before we create kthreadd, will OOPS.
+ */
+ pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
+ /*
+ * Pin init on the boot CPU. Task migration is not properly working
+ * until sched_init_smp() has been run. It will set the allowed
+ * CPUs for init to the non isolated CPUs.
+ */
+ rcu_read_lock();
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ tsk->flags |= PF_NO_SETAFFINITY;
+ set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
+ rcu_read_unlock();
+
+ numa_default_policy();
+ pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ rcu_read_lock();
+ kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+ rcu_read_unlock();
+
+ /*
+ * Enable might_sleep() and smp_processor_id() checks.
+ * They cannot be enabled earlier because with CONFIG_PREEMPTION=y
+ * kernel_thread() would trigger might_sleep() splats. With
+ * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
+ * already, but it's stuck on the kthreadd_done completion.
+ */
+ system_state = SYSTEM_SCHEDULING;
+
+ complete(&kthreadd_done);
+
+ /*
+ * The boot idle thread must execute schedule()
+ * at least once to get things moving:
+ */
+ schedule_preempt_disabled();
+ /* Call into cpu_idle with preempt disabled */
+ cpu_startup_entry(CPUHP_ONLINE);
+}
+
+/* Check for early params. */
+static int __init do_early_param(char *param, char *val,
+ const char *unused, void *arg)
+{
+ const struct obs_kernel_param *p;
+
+ for (p = __setup_start; p < __setup_end; p++) {
+ if ((p->early && parameq(param, p->str)) ||
+ (strcmp(param, "console") == 0 &&
+ strcmp(p->str, "earlycon") == 0)
+ ) {
+ if (p->setup_func(val) != 0)
+ pr_warn("Malformed early option '%s'\n", param);
+ }
+ }
+ /* We accept everything at this stage. */
+ return 0;
+}
+
+void __init parse_early_options(char *cmdline)
+{
+ parse_args("early options", cmdline, NULL, 0, 0, 0, NULL,
+ do_early_param);
+}
+
+/* Arch code calls this early on, or if not, just before other parsing. */
+void __init parse_early_param(void)
+{
+ static int done __initdata;
+ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
+
+ if (done)
+ return;
+
+ /* All fall through to do_early_param. */
+ strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ parse_early_options(tmp_cmdline);
+ done = 1;
+}
+
+void __init __weak arch_post_acpi_subsys_init(void) { }
+
+void __init __weak smp_setup_processor_id(void)
+{
+}
+
+# if THREAD_SIZE >= PAGE_SIZE
+void __init __weak thread_stack_cache_init(void)
+{
+}
+#endif
+
+void __init __weak poking_init(void) { }
+
+void __init __weak pgtable_cache_init(void) { }
+
+void __init __weak trap_init(void) { }
+
+bool initcall_debug;
+core_param(initcall_debug, initcall_debug, bool, 0644);
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void);
+#else
+static inline void initcall_debug_enable(void)
+{
+}
+#endif
+
+/* Report memory auto-initialization states for this boot. */
+static void __init report_meminit(void)
+{
+ const char *stack;
+
+ if (IS_ENABLED(CONFIG_INIT_STACK_ALL_PATTERN))
+ stack = "all(pattern)";
+ else if (IS_ENABLED(CONFIG_INIT_STACK_ALL_ZERO))
+ stack = "all(zero)";
+ else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL))
+ stack = "byref_all(zero)";
+ else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF))
+ stack = "byref(zero)";
+ else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER))
+ stack = "__user(zero)";
+ else
+ stack = "off";
+
+ pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n",
+ stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off",
+ want_init_on_free() ? "on" : "off");
+ if (want_init_on_free())
+ pr_info("mem auto-init: clearing system memory may take some time...\n");
+}
+
+/*
+ * Set up kernel memory allocators
+ */
+static void __init mm_init(void)
+{
+ /*
+ * page_ext requires contiguous pages,
+ * bigger than MAX_ORDER unless SPARSEMEM.
+ */
+ page_ext_init_flatmem();
+ init_mem_debugging_and_hardening();
+ kfence_alloc_pool();
+ report_meminit();
+ kmsan_init_shadow();
+ stack_depot_early_init();
+ mem_init();
+ mem_init_print_info();
+ kmem_cache_init();
+ /*
+ * page_owner must be initialized after buddy is ready, and also after
+ * slab is ready so that stack_depot_init() works properly
+ */
+ page_ext_init_flatmem_late();
+ kmemleak_init();
+ pgtable_init();
+ debug_objects_mem_init();
+ vmalloc_init();
+ /* Should be run after vmap initialization */
+ if (early_page_ext_enabled())
+ page_ext_init();
+ /* Should be run before the first non-init thread is created */
+ init_espfix_bsp();
+ /* Should be run after espfix64 is set up. */
+ pti_init();
+ kmsan_init_runtime();
+ mm_cache_init();
+}
+
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+ randomize_kstack_offset);
+DEFINE_PER_CPU(u32, kstack_offset);
+
+static int __init early_randomize_kstack_offset(char *buf)
+{
+ int ret;
+ bool bool_result;
+
+ ret = kstrtobool(buf, &bool_result);
+ if (ret)
+ return ret;
+
+ if (bool_result)
+ static_branch_enable(&randomize_kstack_offset);
+ else
+ static_branch_disable(&randomize_kstack_offset);
+ return 0;
+}
+early_param("randomize_kstack_offset", early_randomize_kstack_offset);
+#endif
+
+void __init __weak arch_call_rest_init(void)
+{
+ rest_init();
+}
+
+static void __init print_unknown_bootoptions(void)
+{
+ char *unknown_options;
+ char *end;
+ const char *const *p;
+ size_t len;
+
+ if (panic_later || (!argv_init[1] && !envp_init[2]))
+ return;
+
+ /*
+ * Determine how many options we have to print out, plus a space
+ * before each
+ */
+ len = 1; /* null terminator */
+ for (p = &argv_init[1]; *p; p++) {
+ len++;
+ len += strlen(*p);
+ }
+ for (p = &envp_init[2]; *p; p++) {
+ len++;
+ len += strlen(*p);
+ }
+
+ unknown_options = memblock_alloc(len, SMP_CACHE_BYTES);
+ if (!unknown_options) {
+ pr_err("%s: Failed to allocate %zu bytes\n",
+ __func__, len);
+ return;
+ }
+ end = unknown_options;
+
+ for (p = &argv_init[1]; *p; p++)
+ end += sprintf(end, " %s", *p);
+ for (p = &envp_init[2]; *p; p++)
+ end += sprintf(end, " %s", *p);
+
+ /* Start at unknown_options[1] to skip the initial space */
+ pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n",
+ &unknown_options[1]);
+ memblock_free(unknown_options, len);
+}
+
+asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+{
+ char *command_line;
+ char *after_dashes;
+
+ set_task_stack_end_magic(&init_task);
+ smp_setup_processor_id();
+ debug_objects_early_init();
+ init_vmlinux_build_id();
+
+ cgroup_init_early();
+
+ local_irq_disable();
+ early_boot_irqs_disabled = true;
+
+ /*
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them.
+ */
+ boot_cpu_init();
+ page_address_init();
+ pr_notice("%s", linux_banner);
+ early_security_init();
+ setup_arch(&command_line);
+ setup_boot_config();
+ setup_command_line(command_line);
+ setup_nr_cpu_ids();
+ setup_per_cpu_areas();
+ smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+ boot_cpu_hotplug_init();
+
+ build_all_zonelists(NULL);
+ page_alloc_init();
+
+ pr_notice("Kernel command line: %s\n", saved_command_line);
+ /* parameters may set static keys */
+ jump_label_init();
+ parse_early_param();
+ after_dashes = parse_args("Booting kernel",
+ static_command_line, __start___param,
+ __stop___param - __start___param,
+ -1, -1, NULL, &unknown_bootoption);
+ print_unknown_bootoptions();
+ if (!IS_ERR_OR_NULL(after_dashes))
+ parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
+ NULL, set_init_arg);
+ if (extra_init_args)
+ parse_args("Setting extra init args", extra_init_args,
+ NULL, 0, -1, -1, NULL, set_init_arg);
+
+ /* Architectural and non-timekeeping rng init, before allocator init */
+ random_init_early(command_line);
+
+ /*
+ * These use large bootmem allocations and must precede
+ * kmem_cache_init()
+ */
+ setup_log_buf(0);
+ vfs_caches_init_early();
+ sort_main_extable();
+ trap_init();
+ mm_init();
+ poking_init();
+ ftrace_init();
+
+ /* trace_printk can be enabled here */
+ early_trace_init();
+
+ /*
+ * Set up the scheduler prior starting any interrupts (such as the
+ * timer interrupt). Full topology setup happens at smp_init()
+ * time - but meanwhile we still have a functioning scheduler.
+ */
+ sched_init();
+
+ if (WARN(!irqs_disabled(),
+ "Interrupts were enabled *very* early, fixing it\n"))
+ local_irq_disable();
+ radix_tree_init();
+ maple_tree_init();
+
+ /*
+ * Set up housekeeping before setting up workqueues to allow the unbound
+ * workqueue to take non-housekeeping into account.
+ */
+ housekeeping_init();
+
+ /*
+ * Allow workqueue creation and work item queueing/cancelling
+ * early. Work item execution depends on kthreads and starts after
+ * workqueue_init().
+ */
+ workqueue_init_early();
+
+ rcu_init();
+
+ /* Trace events are available after this */
+ trace_init();
+
+ if (initcall_debug)
+ initcall_debug_enable();
+
+ context_tracking_init();
+ /* init some links before init_ISA_irqs() */
+ early_irq_init();
+ init_IRQ();
+ tick_init();
+ rcu_init_nohz();
+ init_timers();
+ srcu_init();
+ hrtimers_init();
+ softirq_init();
+ timekeeping_init();
+ time_init();
+
+ /* This must be after timekeeping is initialized */
+ random_init();
+
+ /* These make use of the fully initialized rng */
+ kfence_init();
+ boot_init_stack_canary();
+
+ perf_event_init();
+ profile_init();
+ call_function_init();
+ WARN(!irqs_disabled(), "Interrupts were enabled early\n");
+
+ early_boot_irqs_disabled = false;
+ local_irq_enable();
+
+ kmem_cache_init_late();
+
+ /*
+ * HACK ALERT! This is early. We're enabling the console before
+ * we've done PCI setups etc, and console_init() must be aware of
+ * this. But we do want output early, in case something goes wrong.
+ */
+ console_init();
+ if (panic_later)
+ panic("Too many boot %s vars at `%s'", panic_later,
+ panic_param);
+
+ lockdep_init();
+
+ /*
+ * Need to run this when irqs are enabled, because it wants
+ * to self-test [hard/soft]-irqs on/off lock inversion bugs
+ * too:
+ */
+ locking_selftest();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start && !initrd_below_start_ok &&
+ page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
+ pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
+ page_to_pfn(virt_to_page((void *)initrd_start)),
+ min_low_pfn);
+ initrd_start = 0;
+ }
+#endif
+ setup_per_cpu_pageset();
+ numa_policy_init();
+ acpi_early_init();
+ if (late_time_init)
+ late_time_init();
+ sched_clock_init();
+ calibrate_delay();
+
+ arch_cpu_finalize_init();
+
+ pid_idr_init();
+ anon_vma_init();
+#ifdef CONFIG_X86
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ efi_enter_virtual_mode();
+#endif
+ thread_stack_cache_init();
+ cred_init();
+ fork_init();
+ proc_caches_init();
+ uts_ns_init();
+ key_init();
+ security_init();
+ dbg_late_init();
+ net_ns_init();
+ vfs_caches_init();
+ pagecache_init();
+ signals_init();
+ seq_file_init();
+ proc_root_init();
+ nsfs_init();
+ cpuset_init();
+ cgroup_init();
+ taskstats_init_early();
+ delayacct_init();
+
+ acpi_subsystem_init();
+ arch_post_acpi_subsys_init();
+ kcsan_init();
+
+ /* Do the rest non-__init'ed, we're now alive */
+ arch_call_rest_init();
+
+ prevent_tail_call_optimization();
+}
+
+/* Call all constructor functions linked into the kernel. */
+static void __init do_ctors(void)
+{
+/*
+ * For UML, the constructors have already been called by the
+ * normal setup code as it's just a normal ELF binary, so we
+ * cannot do it again - but we do need CONFIG_CONSTRUCTORS
+ * even on UML for modules.
+ */
+#if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML)
+ ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
+
+ for (; fn < (ctor_fn_t *) __ctors_end; fn++)
+ (*fn)();
+#endif
+}
+
+#ifdef CONFIG_KALLSYMS
+struct blacklist_entry {
+ struct list_head next;
+ char *buf;
+};
+
+static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
+
+static int __init initcall_blacklist(char *str)
+{
+ char *str_entry;
+ struct blacklist_entry *entry;
+
+ /* str argument is a comma-separated list of functions */
+ do {
+ str_entry = strsep(&str, ",");
+ if (str_entry) {
+ pr_debug("blacklisting initcall %s\n", str_entry);
+ entry = memblock_alloc(sizeof(*entry),
+ SMP_CACHE_BYTES);
+ if (!entry)
+ panic("%s: Failed to allocate %zu bytes\n",
+ __func__, sizeof(*entry));
+ entry->buf = memblock_alloc(strlen(str_entry) + 1,
+ SMP_CACHE_BYTES);
+ if (!entry->buf)
+ panic("%s: Failed to allocate %zu bytes\n",
+ __func__, strlen(str_entry) + 1);
+ strcpy(entry->buf, str_entry);
+ list_add(&entry->next, &blacklisted_initcalls);
+ }
+ } while (str_entry);
+
+ return 1;
+}
+
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ struct blacklist_entry *entry;
+ char fn_name[KSYM_SYMBOL_LEN];
+ unsigned long addr;
+
+ if (list_empty(&blacklisted_initcalls))
+ return false;
+
+ addr = (unsigned long) dereference_function_descriptor(fn);
+ sprint_symbol_no_offset(fn_name, addr);
+
+ /*
+ * fn will be "function_name [module_name]" where [module_name] is not
+ * displayed for built-in init functions. Strip off the [module_name].
+ */
+ strreplace(fn_name, ' ', '\0');
+
+ list_for_each_entry(entry, &blacklisted_initcalls, next) {
+ if (!strcmp(fn_name, entry->buf)) {
+ pr_debug("initcall %s blacklisted\n", fn_name);
+ return true;
+ }
+ }
+
+ return false;
+}
+#else
+static int __init initcall_blacklist(char *str)
+{
+ pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
+ return 0;
+}
+
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ return false;
+}
+#endif
+__setup("initcall_blacklist=", initcall_blacklist);
+
+static __init_or_module void
+trace_initcall_start_cb(void *data, initcall_t fn)
+{
+ ktime_t *calltime = data;
+
+ printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current));
+ *calltime = ktime_get();
+}
+
+static __init_or_module void
+trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
+{
+ ktime_t rettime, *calltime = data;
+
+ rettime = ktime_get();
+ printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
+ fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
+}
+
+static ktime_t initcall_calltime;
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void)
+{
+ int ret;
+
+ ret = register_trace_initcall_start(trace_initcall_start_cb,
+ &initcall_calltime);
+ ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
+ &initcall_calltime);
+ WARN(ret, "Failed to register initcall tracepoints\n");
+}
+# define do_trace_initcall_start trace_initcall_start
+# define do_trace_initcall_finish trace_initcall_finish
+#else
+static inline void do_trace_initcall_start(initcall_t fn)
+{
+ if (!initcall_debug)
+ return;
+ trace_initcall_start_cb(&initcall_calltime, fn);
+}
+static inline void do_trace_initcall_finish(initcall_t fn, int ret)
+{
+ if (!initcall_debug)
+ return;
+ trace_initcall_finish_cb(&initcall_calltime, fn, ret);
+}
+#endif /* !TRACEPOINTS_ENABLED */
+
+int __init_or_module do_one_initcall(initcall_t fn)
+{
+ int count = preempt_count();
+ char msgbuf[64];
+ int ret;
+
+ if (initcall_blacklisted(fn))
+ return -EPERM;
+
+ do_trace_initcall_start(fn);
+ ret = fn();
+ do_trace_initcall_finish(fn, ret);
+
+ msgbuf[0] = 0;
+
+ if (preempt_count() != count) {
+ sprintf(msgbuf, "preemption imbalance ");
+ preempt_count_set(count);
+ }
+ if (irqs_disabled()) {
+ strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
+ local_irq_enable();
+ }
+ WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf);
+
+ add_latent_entropy();
+ return ret;
+}
+
+
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
+ __initcall0_start,
+ __initcall1_start,
+ __initcall2_start,
+ __initcall3_start,
+ __initcall4_start,
+ __initcall5_start,
+ __initcall6_start,
+ __initcall7_start,
+ __initcall_end,
+};
+
+/* Keep these in sync with initcalls in include/linux/init.h */
+static const char *initcall_level_names[] __initdata = {
+ "pure",
+ "core",
+ "postcore",
+ "arch",
+ "subsys",
+ "fs",
+ "device",
+ "late",
+};
+
+static int __init ignore_unknown_bootoption(char *param, char *val,
+ const char *unused, void *arg)
+{
+ return 0;
+}
+
+static void __init do_initcall_level(int level, char *command_line)
+{
+ initcall_entry_t *fn;
+
+ parse_args(initcall_level_names[level],
+ command_line, __start___param,
+ __stop___param - __start___param,
+ level, level,
+ NULL, ignore_unknown_bootoption);
+
+ trace_initcall_level(initcall_level_names[level]);
+ for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
+ do_one_initcall(initcall_from_entry(fn));
+}
+
+static void __init do_initcalls(void)
+{
+ int level;
+ size_t len = strlen(saved_command_line) + 1;
+ char *command_line;
+
+ command_line = kzalloc(len, GFP_KERNEL);
+ if (!command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+ for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
+ /* Parser modifies command_line, restore it each time */
+ strcpy(command_line, saved_command_line);
+ do_initcall_level(level, command_line);
+ }
+
+ kfree(command_line);
+}
+
+/*
+ * Ok, the machine is now initialized. None of the devices
+ * have been touched yet, but the CPU subsystem is up and
+ * running, and memory and process management works.
+ *
+ * Now we can finally start doing some real work..
+ */
+static void __init do_basic_setup(void)
+{
+ cpuset_init_smp();
+ driver_init();
+ init_irq_proc();
+ do_ctors();
+ do_initcalls();
+}
+
+static void __init do_pre_smp_initcalls(void)
+{
+ initcall_entry_t *fn;
+
+ trace_initcall_level("early");
+ for (fn = __initcall_start; fn < __initcall0_start; fn++)
+ do_one_initcall(initcall_from_entry(fn));
+}
+
+static int run_init_process(const char *init_filename)
+{
+ const char *const *p;
+
+ argv_init[0] = init_filename;
+ pr_info("Run %s as init process\n", init_filename);
+ pr_debug(" with arguments:\n");
+ for (p = argv_init; *p; p++)
+ pr_debug(" %s\n", *p);
+ pr_debug(" with environment:\n");
+ for (p = envp_init; *p; p++)
+ pr_debug(" %s\n", *p);
+ return kernel_execve(init_filename, argv_init, envp_init);
+}
+
+static int try_to_run_init_process(const char *init_filename)
+{
+ int ret;
+
+ ret = run_init_process(init_filename);
+
+ if (ret && ret != -ENOENT) {
+ pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
+ init_filename, ret);
+ }
+
+ return ret;
+}
+
+static noinline void __init kernel_init_freeable(void);
+
+#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
+bool rodata_enabled __ro_after_init = true;
+
+#ifndef arch_parse_debug_rodata
+static inline bool arch_parse_debug_rodata(char *str) { return false; }
+#endif
+
+static int __init set_debug_rodata(char *str)
+{
+ if (arch_parse_debug_rodata(str))
+ return 0;
+
+ if (str && !strcmp(str, "on"))
+ rodata_enabled = true;
+ else if (str && !strcmp(str, "off"))
+ rodata_enabled = false;
+ else
+ pr_warn("Invalid option string for rodata: '%s'\n", str);
+ return 0;
+}
+early_param("rodata", set_debug_rodata);
+#endif
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static void mark_readonly(void)
+{
+ if (rodata_enabled) {
+ /*
+ * load_module() results in W+X mappings, which are cleaned
+ * up with call_rcu(). Let's make sure that queued work is
+ * flushed so that we don't hit false positives looking for
+ * insecure pages which are W+X.
+ */
+ rcu_barrier();
+ mark_rodata_ro();
+ rodata_test();
+ } else
+ pr_info("Kernel memory protection disabled.\n");
+}
+#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
+static inline void mark_readonly(void)
+{
+ pr_warn("Kernel memory protection not selected by kernel config.\n");
+}
+#else
+static inline void mark_readonly(void)
+{
+ pr_warn("This architecture does not have kernel memory protection.\n");
+}
+#endif
+
+void __weak free_initmem(void)
+{
+ free_initmem_default(POISON_FREE_INITMEM);
+}
+
+static int __ref kernel_init(void *unused)
+{
+ int ret;
+
+ /*
+ * Wait until kthreadd is all set-up.
+ */
+ wait_for_completion(&kthreadd_done);
+
+ kernel_init_freeable();
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
+
+ system_state = SYSTEM_FREEING_INITMEM;
+ kprobe_free_init_mem();
+ ftrace_free_init_mem();
+ kgdb_free_init_mem();
+ exit_boot_config();
+ free_initmem();
+ mark_readonly();
+
+ /*
+ * Kernel mappings are now finalized - update the userspace page-table
+ * to finalize PTI.
+ */
+ pti_finalize();
+
+ system_state = SYSTEM_RUNNING;
+ numa_default_policy();
+
+ rcu_end_inkernel_boot();
+
+ do_sysctl_args();
+
+ if (ramdisk_execute_command) {
+ ret = run_init_process(ramdisk_execute_command);
+ if (!ret)
+ return 0;
+ pr_err("Failed to execute %s (error %d)\n",
+ ramdisk_execute_command, ret);
+ }
+
+ /*
+ * We try each of these until one succeeds.
+ *
+ * The Bourne shell can be used instead of init if we are
+ * trying to recover a really broken machine.
+ */
+ if (execute_command) {
+ ret = run_init_process(execute_command);
+ if (!ret)
+ return 0;
+ panic("Requested init %s failed (error %d).",
+ execute_command, ret);
+ }
+
+ if (CONFIG_DEFAULT_INIT[0] != '\0') {
+ ret = run_init_process(CONFIG_DEFAULT_INIT);
+ if (ret)
+ pr_err("Default init %s failed (error %d)\n",
+ CONFIG_DEFAULT_INIT, ret);
+ else
+ return 0;
+ }
+
+ if (!try_to_run_init_process("/sbin/init") ||
+ !try_to_run_init_process("/etc/init") ||
+ !try_to_run_init_process("/bin/init") ||
+ !try_to_run_init_process("/bin/sh"))
+ return 0;
+
+ panic("No working init found. Try passing init= option to kernel. "
+ "See Linux Documentation/admin-guide/init.rst for guidance.");
+}
+
+/* Open /dev/console, for stdin/stdout/stderr, this should never fail */
+void __init console_on_rootfs(void)
+{
+ struct file *file = filp_open("/dev/console", O_RDWR, 0);
+
+ if (IS_ERR(file)) {
+ pr_err("Warning: unable to open an initial console.\n");
+ return;
+ }
+ init_dup(file);
+ init_dup(file);
+ init_dup(file);
+ fput(file);
+}
+
+static noinline void __init kernel_init_freeable(void)
+{
+ /* Now the scheduler is fully set up and can do blocking allocations */
+ gfp_allowed_mask = __GFP_BITS_MASK;
+
+ /*
+ * init can allocate pages on any node
+ */
+ set_mems_allowed(node_states[N_MEMORY]);
+
+ cad_pid = get_pid(task_pid(current));
+
+ smp_prepare_cpus(setup_max_cpus);
+
+ workqueue_init();
+
+ init_mm_internals();
+
+ rcu_init_tasks_generic();
+ do_pre_smp_initcalls();
+ lockup_detector_init();
+
+ smp_init();
+ sched_init_smp();
+
+ padata_init();
+ page_alloc_init_late();
+ /* Initialize page ext after all struct pages are initialized. */
+ if (!early_page_ext_enabled())
+ page_ext_init();
+
+ do_basic_setup();
+
+ kunit_run_all_tests();
+
+ wait_for_initramfs();
+ console_on_rootfs();
+
+ /*
+ * check if there is an early userspace init. If yes, let it do all
+ * the work
+ */
+ if (init_eaccess(ramdisk_execute_command) != 0) {
+ ramdisk_execute_command = NULL;
+ prepare_namespace();
+ }
+
+ /*
+ * Ok, we have completed the initial bootup, and
+ * we're essentially up and running. Get rid of the
+ * initmem segments and start the user-mode stuff..
+ *
+ * rootfs is available now, try loading the public keys
+ * and default modules
+ */
+
+ integrity_load_keys();
+}
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
new file mode 100644
index 000000000..d1d26b93d
--- /dev/null
+++ b/init/noinitramfs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * init/noinitramfs.c
+ *
+ * Copyright (C) 2006, NXP Semiconductors, All Rights Reserved
+ * Author: Jean-Paul Saman <jean-paul.saman@nxp.com>
+ */
+#include <linux/init.h>
+#include <linux/stat.h>
+#include <linux/kdev_t.h>
+#include <linux/syscalls.h>
+#include <linux/init_syscalls.h>
+#include <linux/umh.h>
+
+/*
+ * Create a simple rootfs that is similar to the default initramfs
+ */
+static int __init default_rootfs(void)
+{
+ int err;
+
+ usermodehelper_enable();
+ err = init_mkdir("/dev", 0755);
+ if (err < 0)
+ goto out;
+
+ err = init_mknod("/dev/console", S_IFCHR | S_IRUSR | S_IWUSR,
+ new_encode_dev(MKDEV(5, 1)));
+ if (err < 0)
+ goto out;
+
+ err = init_mkdir("/root", 0700);
+ if (err < 0)
+ goto out;
+
+ return 0;
+
+out:
+ printk(KERN_WARNING "Failed to create a rootfs\n");
+ return err;
+}
+rootfs_initcall(default_rootfs);
diff --git a/init/version-timestamp.c b/init/version-timestamp.c
new file mode 100644
index 000000000..179e93bae
--- /dev/null
+++ b/init/version-timestamp.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+#include <linux/version.h>
+#include <linux/proc_ns.h>
+#include <linux/refcount.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+
+struct uts_namespace init_uts_ns = {
+ .ns.count = REFCOUNT_INIT(2),
+ .name = {
+ .sysname = UTS_SYSNAME,
+ .nodename = UTS_NODENAME,
+ .release = UTS_RELEASE,
+ .version = UTS_VERSION,
+ .machine = UTS_MACHINE,
+ .domainname = UTS_DOMAINNAME,
+ },
+ .user_ns = &init_user_ns,
+ .ns.inum = PROC_UTS_INIT_INO,
+#ifdef CONFIG_UTS_NS
+ .ns.ops = &utsns_operations,
+#endif
+};
+
+/* FIXED STRINGS! Don't touch! */
+const char linux_banner[] =
+ "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
diff --git a/init/version.c b/init/version.c
new file mode 100644
index 000000000..01d4ab05f
--- /dev/null
+++ b/init/version.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/init/version.c
+ *
+ * Copyright (C) 1992 Theodore Ts'o
+ *
+ * May be freely distributed as part of Linux.
+ */
+
+#include <generated/compile.h>
+#include <linux/build-salt.h>
+#include <linux/elfnote-lto.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <generated/utsrelease.h>
+#include <linux/proc_ns.h>
+
+static int __init early_hostname(char *arg)
+{
+ size_t bufsize = sizeof(init_uts_ns.name.nodename);
+ size_t maxlen = bufsize - 1;
+ size_t arglen;
+
+ arglen = strlcpy(init_uts_ns.name.nodename, arg, bufsize);
+ if (arglen > maxlen) {
+ pr_warn("hostname parameter exceeds %zd characters and will be truncated",
+ maxlen);
+ }
+ return 0;
+}
+early_param("hostname", early_hostname);
+
+const char linux_proc_banner[] =
+ "%s version %s"
+ " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
+ " (" LINUX_COMPILER ") %s\n";
+
+BUILD_SALT;
+BUILD_LTO_INFO;
+
+/*
+ * init_uts_ns and linux_banner contain the build version and timestamp,
+ * which are really fixed at the very last step of build process.
+ * They are compiled with __weak first, and without __weak later.
+ */
+
+struct uts_namespace init_uts_ns __weak;
+const char linux_banner[] __weak;
+
+#include "version-timestamp.c"
+
+EXPORT_SYMBOL_GPL(init_uts_ns);