#![allow(unsafe_code)] use core::convert::TryFrom; use core::mem::MaybeUninit; use core::num::NonZeroU64; use core::ptr; use core::ptr::NonNull; use core::sync::atomic::AtomicU8; use bitflags::bitflags; use crate::backend::c::{c_int, c_uint, c_void}; use crate::backend::process::syscalls; use crate::ffi::{CStr, CString}; use crate::io; use crate::process::{ prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional, Pid, PointerAuthenticationKeys, }; // // PR_GET_KEEPCAPS/PR_SET_KEEPCAPS // const PR_GET_KEEPCAPS: c_int = 7; /// Get the current state of the calling thread's `keep capabilities` flag. /// /// # References /// - [`prctl(PR_GET_KEEPCAPS,...)`] /// /// [`prctl(PR_GET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn get_keep_capabilities() -> io::Result { unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0) } const PR_SET_KEEPCAPS: c_int = 8; /// Set the state of the calling thread's `keep capabilities` flag. /// /// # References /// - [`prctl(PR_SET_KEEPCAPS,...)`] /// /// [`prctl(PR_SET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn set_keep_capabilities(enable: bool) -> io::Result<()> { unsafe { prctl_2args(PR_SET_KEEPCAPS, enable as usize as *mut _) }.map(|_r| ()) } // // PR_GET_NAME/PR_SET_NAME // const PR_GET_NAME: c_int = 16; /// Get the name of the calling thread. /// /// # References /// - [`prctl(PR_GET_NAME,...)`] /// /// [`prctl(PR_GET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn name() -> io::Result { let mut buffer = [0_u8; 16]; unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? }; let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0); CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ) } const PR_SET_NAME: c_int = 15; /// Set the name of the calling thread. /// /// # References /// - [`prctl(PR_SET_NAME,...)`] /// /// [`prctl(PR_SET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn set_name(name: &CStr) -> io::Result<()> { unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ()) } // // PR_GET_SECCOMP/PR_SET_SECCOMP // //const PR_GET_SECCOMP: c_int = 21; const SECCOMP_MODE_DISABLED: i32 = 0; const SECCOMP_MODE_STRICT: i32 = 1; const SECCOMP_MODE_FILTER: i32 = 2; /// `SECCOMP_MODE_*`. #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[repr(i32)] pub enum SecureComputingMode { /// Secure computing is not in use. Disabled = SECCOMP_MODE_DISABLED, /// Use hard-coded filter. Strict = SECCOMP_MODE_STRICT, /// Use user-supplied filter. Filter = SECCOMP_MODE_FILTER, } impl TryFrom for SecureComputingMode { type Error = io::Errno; fn try_from(value: i32) -> Result { match value { SECCOMP_MODE_DISABLED => Ok(Self::Disabled), SECCOMP_MODE_STRICT => Ok(Self::Strict), SECCOMP_MODE_FILTER => Ok(Self::Filter), _ => Err(io::Errno::RANGE), } } } /* /// Get the secure computing mode of the calling thread. /// /// If the caller is not in secure computing mode, this returns [`SecureComputingMode::Disabled`]. /// If the caller is in strict secure computing mode, then this call will cause a `SIGKILL` signal /// to be sent to the process. /// If the caller is in filter mode, and this system call is allowed by the seccomp filters, /// it returns [`SecureComputingMode::Filter`]; otherwise, the process is killed with /// a `SIGKILL` signal. /// /// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file provides a method /// of obtaining the same information, without the risk that the process is killed; see `proc(5)`. /// /// # References /// - [`prctl(PR_GET_SECCOMP,...)`] /// /// [`prctl(PR_GET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn secure_computing_mode() -> io::Result { unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into) } */ const PR_SET_SECCOMP: c_int = 22; /// Set the secure computing mode for the calling thread, to limit the available system calls. /// /// # References /// - [`prctl(PR_SET_SECCOMP,...)`] /// /// [`prctl(PR_SET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> { unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ()) } // // PR_CAPBSET_READ/PR_CAPBSET_DROP // const PR_CAPBSET_READ: c_int = 23; const CAP_CHOWN: u32 = 0; const CAP_DAC_OVERRIDE: u32 = 1; const CAP_DAC_READ_SEARCH: u32 = 2; const CAP_FOWNER: u32 = 3; const CAP_FSETID: u32 = 4; const CAP_KILL: u32 = 5; const CAP_SETGID: u32 = 6; const CAP_SETUID: u32 = 7; const CAP_SETPCAP: u32 = 8; const CAP_LINUX_IMMUTABLE: u32 = 9; const CAP_NET_BIND_SERVICE: u32 = 10; const CAP_NET_BROADCAST: u32 = 11; const CAP_NET_ADMIN: u32 = 12; const CAP_NET_RAW: u32 = 13; const CAP_IPC_LOCK: u32 = 14; const CAP_IPC_OWNER: u32 = 15; const CAP_SYS_MODULE: u32 = 16; const CAP_SYS_RAWIO: u32 = 17; const CAP_SYS_CHROOT: u32 = 18; const CAP_SYS_PTRACE: u32 = 19; const CAP_SYS_PACCT: u32 = 20; const CAP_SYS_ADMIN: u32 = 21; const CAP_SYS_BOOT: u32 = 22; const CAP_SYS_NICE: u32 = 23; const CAP_SYS_RESOURCE: u32 = 24; const CAP_SYS_TIME: u32 = 25; const CAP_SYS_TTY_CONFIG: u32 = 26; const CAP_MKNOD: u32 = 27; const CAP_LEASE: u32 = 28; const CAP_AUDIT_WRITE: u32 = 29; const CAP_AUDIT_CONTROL: u32 = 30; const CAP_SETFCAP: u32 = 31; const CAP_MAC_OVERRIDE: u32 = 32; const CAP_MAC_ADMIN: u32 = 33; const CAP_SYSLOG: u32 = 34; const CAP_WAKE_ALARM: u32 = 35; const CAP_BLOCK_SUSPEND: u32 = 36; const CAP_AUDIT_READ: u32 = 37; const CAP_PERFMON: u32 = 38; const CAP_BPF: u32 = 39; const CAP_CHECKPOINT_RESTORE: u32 = 40; /// Linux per-thread capability. #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[repr(u32)] pub enum Capability { /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this overrides /// the restriction of changing file ownership and group ownership. ChangeOwnership = CAP_CHOWN, /// Override all DAC access, including ACL execute access if `_POSIX_ACL` is defined. /// Excluding DAC access covered by [`Capability::LinuxImmutable`]. DACOverride = CAP_DAC_OVERRIDE, /// Overrides all DAC restrictions regarding read and search on files and directories, /// including ACL restrictions if `_POSIX_ACL` is defined. Excluding DAC access covered /// by [`Capability::LinuxImmutable`]. DACReadSearch = CAP_DAC_READ_SEARCH, /// Overrides all restrictions about allowed operations on files, where file owner ID must be /// equal to the user ID, except where [`Capability::FileSetID`] is applicable. /// It doesn't override MAC and DAC restrictions. FileOwner = CAP_FOWNER, /// Overrides the following restrictions that the effective user ID shall match the file owner /// ID when setting the `S_ISUID` and `S_ISGID` bits on that file; that the effective group ID /// (or one of the supplementary group IDs) shall match the file owner ID when setting the /// `S_ISGID` bit on that file; that the `S_ISUID` and `S_ISGID` bits are cleared on successful /// return from `chown` (not implemented). FileSetID = CAP_FSETID, /// Overrides the restriction that the real or effective user ID of a process sending a signal /// must match the real or effective user ID of the process receiving the signal. Kill = CAP_KILL, /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on socket /// credentials passing. SetGroupID = CAP_SETGID, /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on socket /// credentials passing. SetUserID = CAP_SETUID, /// Without VFS support for capabilities: /// - Transfer any capability in your permitted set to any pid. /// - remove any capability in your permitted set from any pid. /// With VFS support for capabilities (neither of above, but) /// - Add any capability from current's capability bounding set to the current process' /// inheritable set. /// - Allow taking bits out of capability bounding set. /// - Allow modification of the securebits for a process. SetPermittedCapabilities = CAP_SETPCAP, /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes. LinuxImmutable = CAP_LINUX_IMMUTABLE, /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM VCIs below 32. NetBindService = CAP_NET_BIND_SERVICE, /// Allow broadcasting, listen to multicast. NetBroadcast = CAP_NET_BROADCAST, /// Allow interface configuration. Allow administration of IP firewall, masquerading and /// accounting. Allow setting debug option on sockets. Allow modification of routing tables. /// Allow setting arbitrary process / process group ownership on sockets. Allow binding to any /// address for transparent proxying (also via [`Capability::NetRaw`]). Allow setting TOS /// (type of service). Allow setting promiscuous mode. Allow clearing driver statistics. /// Allow multicasting. Allow read/write of device-specific registers. Allow activation of ATM /// control sockets. NetAdmin = CAP_NET_ADMIN, /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow binding to any address for /// transparent proxying (also via [`Capability::NetAdmin`]). NetRaw = CAP_NET_RAW, /// Allow locking of shared memory segments. Allow mlock and mlockall (which doesn't really have /// anything to do with IPC). IPCLock = CAP_IPC_LOCK, /// Override IPC ownership checks. IPCOwner = CAP_IPC_OWNER, /// Insert and remove kernel modules - modify kernel without limit. SystemModule = CAP_SYS_MODULE, /// Allow ioperm/iopl access. Allow sending USB messages to any device via `/dev/bus/usb`. SystemRawIO = CAP_SYS_RAWIO, /// Allow use of `chroot`. SystemChangeRoot = CAP_SYS_CHROOT, /// Allow `ptrace` of any process. SystemProcessTrace = CAP_SYS_PTRACE, /// Allow configuration of process accounting. SystemProcessAccounting = CAP_SYS_PACCT, /// Allow configuration of the secure attention key. Allow administration of the random device. /// Allow examination and configuration of disk quotas. Allow setting the domainname. /// Allow setting the hostname. Allow `mount` and `umount`, setting up new smb connection. /// Allow some autofs root ioctls. Allow nfsservctl. Allow `VM86_REQUEST_IRQ`. /// Allow to read/write pci config on alpha. Allow `irix_prctl` on mips (setstacksize). /// Allow flushing all cache on m68k (`sys_cacheflush`). Allow removing semaphores. /// Used instead of [`Capability::ChangeOwnership`] to "chown" IPC message queues, semaphores /// and shared memory. Allow locking/unlocking of shared memory segment. Allow turning swap /// on/off. Allow forged pids on socket credentials passing. Allow setting readahead and /// flushing buffers on block devices. Allow setting geometry in floppy driver. Allow turning /// DMA on/off in `xd` driver. Allow administration of md devices (mostly the above, but some /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram device. Allow /// administration of `apm_bios`, serial and bttv (TV) device. Allow manufacturer commands in /// isdn CAPI support driver. Allow reading non-standardized portions of pci configuration /// space. Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports. Allow sending /// raw qic-117 commands. Allow enabling/disabling tagged queuing on SCSI controllers and /// sending arbitrary SCSI commands. Allow setting encryption key on loopback filesystem. /// Allow setting zone reclaim policy. Allow everything under /// [`Capability::BerkeleyPacketFilters`] and [`Capability::PerformanceMonitoring`] for backward /// compatibility. SystemAdmin = CAP_SYS_ADMIN, /// Allow use of `reboot`. SystemBoot = CAP_SYS_BOOT, /// Allow raising priority and setting priority on other (different UID) processes. Allow use of /// FIFO and round-robin (realtime) scheduling on own processes and setting the scheduling /// algorithm used by another process. Allow setting cpu affinity on other processes. /// Allow setting realtime ioprio class. Allow setting ioprio class on other processes. SystemNice = CAP_SYS_NICE, /// Override resource limits. Set resource limits. Override quota limits. Override reserved /// space on ext2 filesystem. Modify data journaling mode on ext3 filesystem (uses journaling /// resources). NOTE: ext2 honors fsuid when checking for resource overrides, so you can /// override using fsuid too. Override size restrictions on IPC message queues. Allow more than /// 64hz interrupts from the real-time clock. Override max number of consoles on console /// allocation. Override max number of keymaps. Control memory reclaim behavior. SystemResource = CAP_SYS_RESOURCE, /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow setting the real-time /// clock. SystemTime = CAP_SYS_TIME, /// Allow configuration of tty devices. Allow `vhangup` of tty. SystemTTYConfig = CAP_SYS_TTY_CONFIG, /// Allow the privileged aspects of `mknod`. MakeNode = CAP_MKNOD, /// Allow taking of leases on files. Lease = CAP_LEASE, /// Allow writing the audit log via unicast netlink socket. AuditWrite = CAP_AUDIT_WRITE, /// Allow configuration of audit via unicast netlink socket. AuditControl = CAP_AUDIT_CONTROL, /// Set or remove capabilities on files. Map `uid=0` into a child user namespace. SetFileCapabilities = CAP_SETFCAP, /// Override MAC access. The base kernel enforces no MAC policy. An LSM may enforce a MAC /// policy, and if it does and it chooses to implement capability based overrides of that /// policy, this is the capability it should use to do so. MACOverride = CAP_MAC_OVERRIDE, /// Allow MAC configuration or state changes. The base kernel requires no MAC configuration. /// An LSM may enforce a MAC policy, and if it does and it chooses to implement capability based /// checks on modifications to that policy or the data required to maintain it, this is the /// capability it should use to do so. MACAdmin = CAP_MAC_ADMIN, /// Allow configuring the kernel's `syslog` (`printk` behaviour). SystemLog = CAP_SYSLOG, /// Allow triggering something that will wake the system. WakeAlarm = CAP_WAKE_ALARM, /// Allow preventing system suspends. BlockSuspend = CAP_BLOCK_SUSPEND, /// Allow reading the audit log via multicast netlink socket. AuditRead = CAP_AUDIT_READ, /// Allow system performance and observability privileged operations using `perf_events`, /// `i915_perf` and other kernel subsystems. PerformanceMonitoring = CAP_PERFMON, /// This capability allows the following BPF operations: /// - Creating all types of BPF maps /// - Advanced verifier features /// - Indirect variable access /// - Bounded loops /// - BPF to BPF function calls /// - Scalar precision tracking /// - Larger complexity limits /// - Dead code elimination /// - And potentially other features /// - Loading BPF Type Format (BTF) data /// - Retrieve `xlated` and JITed code of BPF programs /// - Use `bpf_spin_lock` helper /// /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks further: /// - BPF progs can use of pointer-to-integer conversions /// - speculation attack hardening measures are bypassed /// - `bpf_probe_read` to read arbitrary kernel memory is allowed /// - `bpf_trace_printk` to print kernel memory is allowed /// /// [`Capability::SystemAdmin`] is required to use bpf_probe_write_user. /// /// [`Capability::SystemAdmin`] is required to iterate system wide loaded /// programs, maps, links, BTFs and convert their IDs to file descriptors. /// /// [`Capability::PerformanceMonitoring`] and [`Capability::BerkeleyPacketFilters`] are required /// to load tracing programs. /// [`Capability::NetAdmin`] and [`Capability::BerkeleyPacketFilters`] are required to load /// networking programs. BerkeleyPacketFilters = CAP_BPF, /// Allow checkpoint/restore related operations. Allow PID selection during `clone3`. /// Allow writing to `ns_last_pid`. CheckpointRestore = CAP_CHECKPOINT_RESTORE, } /// Check if the specified capability is in the calling thread's capability bounding set. /// /// # References /// - [`prctl(PR_CAPBSET_READ,...)`] /// /// [`prctl(PR_CAPBSET_READ,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn is_in_capability_bounding_set(capability: Capability) -> io::Result { unsafe { prctl_2args(PR_CAPBSET_READ, capability as usize as *mut _) }.map(|r| r != 0) } const PR_CAPBSET_DROP: c_int = 24; /// If the calling thread has the [`Capability::SetPermittedCapabilities`] capability within its /// user namespace, then drop the specified capability from the thread's capability bounding set. /// /// # References /// - [`prctl(PR_CAPBSET_DROP,...)`] /// /// [`prctl(PR_CAPBSET_DROP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn remove_capability_from_capability_bounding_set(capability: Capability) -> io::Result<()> { unsafe { prctl_2args(PR_CAPBSET_DROP, capability as usize as *mut _) }.map(|_r| ()) } // // PR_GET_SECUREBITS/PR_SET_SECUREBITS // const PR_GET_SECUREBITS: c_int = 27; bitflags! { /// `SECBIT_*`. pub struct CapabilitiesSecureBits: u32 { /// If this bit is set, then the kernel does not grant capabilities when /// a `set-user-ID-root` program is executed, or when a process with an effective or real /// UID of 0 calls `execve`. const NO_ROOT = 1_u32 << 0; /// Set [`NO_ROOT`] irreversibly. const NO_ROOT_LOCKED = 1_u32 << 1; /// Setting this flag stops the kernel from adjusting the process's permitted, effective, /// and ambient capability sets when the thread's effective and filesystem UIDs are switched /// between zero and nonzero values. const NO_SETUID_FIXUP = 1_u32 << 2; /// Set [`NO_SETUID_FIXUP`] irreversibly. const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3; /// Setting this flag allows a thread that has one or more 0 UIDs to retain capabilities in /// its permitted set when it switches all of its UIDs to nonzero values. const KEEP_CAPS = 1_u32 << 4; /// Set [`KEEP_CAPS`] irreversibly. const KEEP_CAPS_LOCKED = 1_u32 << 5; /// Setting this flag disallows raising ambient capabilities via the `prctl`'s /// `PR_CAP_AMBIENT_RAISE` operation. const NO_CAP_AMBIENT_RAISE = 1_u32 << 6; /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly. const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7; } } /// Get the `securebits` flags of the calling thread. /// /// # References /// - [`prctl(PR_GET_SECUREBITS,...)`] /// /// [`prctl(PR_GET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn capabilities_secure_bits() -> io::Result { let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint; CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE) } const PR_SET_SECUREBITS: c_int = 28; /// Set the `securebits` flags of the calling thread. /// /// # References /// - [`prctl(PR_SET_SECUREBITS,...)`] /// /// [`prctl(PR_SET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> { unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ()) } // // PR_GET_TIMERSLACK/PR_SET_TIMERSLACK // const PR_GET_TIMERSLACK: c_int = 30; /// Get the `current` timer slack value of the calling thread. /// /// # References /// - [`prctl(PR_GET_TIMERSLACK,...)`] /// /// [`prctl(PR_GET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn current_timer_slack() -> io::Result { unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64) } const PR_SET_TIMERSLACK: c_int = 29; /// Sets the `current` timer slack value for the calling thread. /// /// # References /// - [`prctl(PR_SET_TIMERSLACK,...)`] /// /// [`prctl(PR_SET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn set_current_timer_slack(value: Option) -> io::Result<()> { let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?; unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ()) } // // PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS // const PR_GET_NO_NEW_PRIVS: c_int = 39; /// Get the value of the `no_new_privs` attribute for the calling thread. /// /// # References /// - [`prctl(PR_GET_NO_NEW_PRIVS,...)`] /// /// [`prctl(PR_GET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn no_new_privs() -> io::Result { unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0) } const PR_SET_NO_NEW_PRIVS: c_int = 38; /// Set the calling thread's `no_new_privs` attribute. /// /// # References /// - [`prctl(PR_SET_NO_NEW_PRIVS,...)`] /// /// [`prctl(PR_SET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> { unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, no_new_privs as usize as *mut _) }.map(|_r| ()) } // // PR_GET_TID_ADDRESS // const PR_GET_TID_ADDRESS: c_int = 40; /// Get the `clear_child_tid` address set by `set_tid_address` /// and `clone`'s `CLONE_CHILD_CLEARTID` flag. /// /// # References /// - [`prctl(PR_GET_TID_ADDRESS,...)`] /// /// [`prctl(PR_GET_TID_ADDRESS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn get_clear_child_tid_address() -> io::Result>> { unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new) } // // PR_GET_THP_DISABLE/PR_SET_THP_DISABLE // const PR_GET_THP_DISABLE: c_int = 42; /// Get the current setting of the `THP disable` flag for the calling thread. /// /// # References /// - [`prctl(PR_GET_THP_DISABLE,...)`] /// /// [`prctl(PR_GET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn transparent_huge_pages_are_disabled() -> io::Result { unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0) } const PR_SET_THP_DISABLE: c_int = 41; /// Set the state of the `THP disable` flag for the calling thread. /// /// # References /// - [`prctl(PR_SET_THP_DISABLE,...)`] /// /// [`prctl(PR_SET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> { unsafe { prctl_2args(PR_SET_THP_DISABLE, thp_disable as usize as *mut _) }.map(|_r| ()) } // // PR_CAP_AMBIENT // const PR_CAP_AMBIENT: c_int = 47; const PR_CAP_AMBIENT_IS_SET: usize = 1; /// Check if the specified capability is in the ambient set. /// /// # References /// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`] /// /// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn capability_is_in_ambient_capability_set(capability: Capability) -> io::Result { let cap = capability as usize as *mut _; unsafe { prctl_3args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET as *mut _, cap) }.map(|r| r != 0) } const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4; /// Remove all capabilities from the ambient set. /// /// # References /// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`] /// /// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn clear_ambient_capability_set() -> io::Result<()> { unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ()) } const PR_CAP_AMBIENT_RAISE: usize = 2; const PR_CAP_AMBIENT_LOWER: usize = 3; /// Add or remove the specified capability to the ambient set. /// /// # References /// - [`prctl(PR_CAP_AMBIENT,...)`] /// /// [`prctl(PR_CAP_AMBIENT,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn configure_capability_in_ambient_capability_set( capability: Capability, enable: bool, ) -> io::Result<()> { let sub_operation = if enable { PR_CAP_AMBIENT_RAISE } else { PR_CAP_AMBIENT_LOWER }; let cap = capability as usize as *mut _; unsafe { prctl_3args(PR_CAP_AMBIENT, sub_operation as *mut _, cap) }.map(|_r| ()) } // // PR_SVE_GET_VL/PR_SVE_SET_VL // const PR_SVE_GET_VL: c_int = 51; const PR_SVE_VL_LEN_MASK: u32 = 0xffff; const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17; /// Scalable Vector Extension vector length configuration. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct SVEVectorLengthConfig { /// Vector length in bytes. pub vector_length_in_bytes: u32, /// Vector length inherited across `execve`. pub vector_length_inherited_across_execve: bool, } /// Get the thread's current SVE vector length configuration. /// /// # References /// - [`prctl(PR_SVE_GET_VL,...)`] /// /// [`prctl(PR_SVE_GET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn sve_vector_length_configuration() -> io::Result { let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint; Ok(SVEVectorLengthConfig { vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK, vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0, }) } const PR_SVE_SET_VL: c_int = 50; const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18; /// Configure the thread's vector length of Scalable Vector Extension. /// /// # References /// - [`prctl(PR_SVE_SET_VL,...)`] /// /// # Safety /// /// Please ensure the conditions necessary to safely call this function, /// as detailed in the references above. /// /// [`prctl(PR_SVE_SET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub unsafe fn set_sve_vector_length_configuration( vector_length_in_bytes: usize, vector_length_inherited_across_execve: bool, defer_change_to_next_execve: bool, ) -> io::Result<()> { let vector_length_in_bytes = u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?; let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK; if vector_length_inherited_across_execve { bits |= PR_SVE_VL_INHERIT; } if defer_change_to_next_execve { bits |= PR_SVE_SET_VL_ONEXEC; } prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ()) } // // PR_PAC_RESET_KEYS // const PR_PAC_RESET_KEYS: c_int = 54; /// Securely reset the thread's pointer authentication keys to fresh random values generated /// by the kernel. /// /// # References /// - [`prctl(PR_PAC_RESET_KEYS,...)`] /// /// # Safety /// /// Please ensure the conditions necessary to safely call this function, /// as detailed in the references above. /// /// [`prctl(PR_PAC_RESET_KEYS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub unsafe fn reset_pointer_authentication_keys( keys: Option, ) -> io::Result<()> { let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits); prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ()) } // // PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL // const PR_GET_TAGGED_ADDR_CTRL: c_int = 56; const PR_MTE_TAG_SHIFT: u32 = 3; const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT; bitflags! { /// Zero means addresses that are passed for the purpose of being dereferenced by the kernel must be untagged. pub struct TaggedAddressMode: u32 { /// Addresses that are passed for the purpose of being dereferenced by the kernel may be tagged. const ENABLED = 1_u32 << 0; /// Synchronous tag check fault mode. const TCF_SYNC = 1_u32 << 1; /// Asynchronous tag check fault mode. const TCF_ASYNC = 1_u32 << 2; } } /// Get the current tagged address mode for the calling thread. /// /// # References /// - [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`] /// /// [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub fn current_tagged_address_mode() -> io::Result<(Option, u32)> { let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint; let mode = r & 0b111_u32; let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; Ok((TaggedAddressMode::from_bits(mode), mte_tag)) } const PR_SET_TAGGED_ADDR_CTRL: c_int = 55; /// Controls support for passing tagged user-space addresses to the kernel. /// /// # References /// - [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`] /// /// # Safety /// /// Please ensure the conditions necessary to safely call this function, /// as detailed in the references above. /// /// [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub unsafe fn set_current_tagged_address_mode( mode: Option, mte_tag: u32, ) -> io::Result<()> { let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits) | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK); prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ()) } // // PR_SET_SYSCALL_USER_DISPATCH // const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59; const PR_SYS_DISPATCH_OFF: usize = 0; /// Disable Syscall User Dispatch mechanism. /// /// # References /// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`] /// /// # Safety /// /// Please ensure the conditions necessary to safely call this function, /// as detailed in the references above. /// /// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> { prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ()) } const PR_SYS_DISPATCH_ON: usize = 1; /// Allow system calls to be executed. const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0; /// Block system calls from executing. const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1; /// Value of the fast switch flag controlling system calls user dispatch mechanism without the need /// to issue a syscall. #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[repr(u8)] pub enum SysCallUserDispatchFastSwitch { /// System calls are allowed to execute. Allow = SYSCALL_DISPATCH_FILTER_ALLOW, /// System calls are blocked from executing. Block = SYSCALL_DISPATCH_FILTER_BLOCK, } impl TryFrom for SysCallUserDispatchFastSwitch { type Error = io::Errno; fn try_from(value: u8) -> Result { match value { SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow), SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block), _ => Err(io::Errno::RANGE), } } } /// Enable Syscall User Dispatch mechanism. /// /// # References /// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`] /// /// # Safety /// /// Please ensure the conditions necessary to safely call this function, /// as detailed in the references above. /// /// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html #[inline] pub unsafe fn enable_syscall_user_dispatch( always_allowed_region: &[u8], fast_switch_flag: &AtomicU8, ) -> io::Result<()> { syscalls::prctl( PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON as *mut _, always_allowed_region.as_ptr() as *mut _, always_allowed_region.len() as *mut _, fast_switch_flag as *const AtomicU8 as *mut _, ) .map(|_r| ()) } // // PR_SCHED_CORE // const PR_SCHED_CORE: c_int = 62; const PR_SCHED_CORE_GET: usize = 0; const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0; const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1; const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2; /// `PR_SCHED_CORE_SCOPE_*`. #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[repr(u32)] pub enum CoreSchedulingScope { /// Operation will be performed for the thread. Thread = PR_SCHED_CORE_SCOPE_THREAD, /// Operation will be performed for all tasks in the task group of the process. ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP, /// Operation will be performed for all processes in the process group. ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP, } impl TryFrom for CoreSchedulingScope { type Error = io::Errno; fn try_from(value: u32) -> Result { match value { PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread), PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup), PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup), _ => Err(io::Errno::RANGE), } } } /// Get core scheduling cookie of a process. /// /// # References /// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`] /// /// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html #[inline] pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result { let mut value: MaybeUninit = MaybeUninit::uninit(); unsafe { syscalls::prctl( PR_SCHED_CORE, PR_SCHED_CORE_GET as *mut _, pid.as_raw_nonzero().get() as usize as *mut _, scope as usize as *mut _, value.as_mut_ptr().cast(), )?; Ok(value.assume_init()) } } const PR_SCHED_CORE_CREATE: usize = 1; /// Create unique core scheduling cookie. /// /// # References /// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`] /// /// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html #[inline] pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { unsafe { syscalls::prctl( PR_SCHED_CORE, PR_SCHED_CORE_CREATE as *mut _, pid.as_raw_nonzero().get() as usize as *mut _, scope as usize as *mut _, ptr::null_mut(), ) .map(|_r| ()) } } const PR_SCHED_CORE_SHARE_TO: usize = 2; /// Push core scheduling cookie to a process. /// /// # References /// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`] /// /// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html #[inline] pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { unsafe { syscalls::prctl( PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO as *mut _, pid.as_raw_nonzero().get() as usize as *mut _, scope as usize as *mut _, ptr::null_mut(), ) .map(|_r| ()) } } const PR_SCHED_CORE_SHARE_FROM: usize = 3; /// Pull core scheduling cookie from a process. /// /// # References /// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`] /// /// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html #[inline] pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { unsafe { syscalls::prctl( PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM as *mut _, pid.as_raw_nonzero().get() as usize as *mut _, scope as usize as *mut _, ptr::null_mut(), ) .map(|_r| ()) } }