diff options
Diffstat (limited to '')
219 files changed, 49459 insertions, 0 deletions
diff --git a/src/basic/MurmurHash2.c b/src/basic/MurmurHash2.c new file mode 100644 index 0000000..5859af0 --- /dev/null +++ b/src/basic/MurmurHash2.c @@ -0,0 +1,90 @@ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +#include "MurmurHash2.h" + +#if __GNUC__ >= 7 +_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +#endif + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const uint32_t m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while (len >= 4) + { + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; /* fall through */ + case 2: h ^= data[1] << 8; /* fall through */ + case 1: h ^= data[0]; /* fall through */ + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} diff --git a/src/basic/MurmurHash2.h b/src/basic/MurmurHash2.h new file mode 100644 index 0000000..1aef3af --- /dev/null +++ b/src/basic/MurmurHash2.h @@ -0,0 +1,30 @@ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#pragma once + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include <stdint.h> + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ); + +//----------------------------------------------------------------------------- diff --git a/src/basic/af-list.c b/src/basic/af-list.c new file mode 100644 index 0000000..abad221 --- /dev/null +++ b/src/basic/af-list.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <string.h> +#include <sys/socket.h> + +#include "af-list.h" +#include "macro.h" + +static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len); + +#include "af-from-name.h" +#include "af-to-name.h" + +const char *af_to_name(int id) { + + if (id <= 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(af_names)) + return NULL; + + return af_names[id]; +} + +int af_from_name(const char *name) { + const struct af_name *sc; + + assert(name); + + sc = lookup_af(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +int af_max(void) { + return ELEMENTSOF(af_names); +} diff --git a/src/basic/af-list.h b/src/basic/af-list.h new file mode 100644 index 0000000..8342323 --- /dev/null +++ b/src/basic/af-list.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/socket.h> + +#include "string-util.h" + +const char *af_to_name(int id); +int af_from_name(const char *name); + +static inline const char* af_to_name_short(int id) { + const char *f; + + if (id == AF_UNSPEC) + return "*"; + + f = af_to_name(id); + if (!f) + return "unknown"; + + assert(startswith(f, "AF_")); + return f + 3; +} + +int af_max(void); diff --git a/src/basic/af-to-name.awk b/src/basic/af-to-name.awk new file mode 100644 index 0000000..18d0a89 --- /dev/null +++ b/src/basic/af-to-name.awk @@ -0,0 +1,9 @@ +BEGIN{ + print "static const char* const af_names[] = { " +} +!/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ { + printf " [%s] = \"%s\",\n", $1, $1 +} +END{ + print "};" +} diff --git a/src/basic/alloc-util.c b/src/basic/alloc-util.c new file mode 100644 index 0000000..ab7a42c --- /dev/null +++ b/src/basic/alloc-util.c @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdint.h> +#include <string.h> + +#include "alloc-util.h" +#include "macro.h" +#include "util.h" + +void* memdup(const void *p, size_t l) { + void *ret; + + assert(l == 0 || p); + + ret = malloc(l ?: 1); + if (!ret) + return NULL; + + memcpy(ret, p, l); + return ret; +} + +void* memdup_suffix0(const void *p, size_t l) { + void *ret; + + assert(l == 0 || p); + + /* The same as memdup() but place a safety NUL byte after the allocated memory */ + + ret = malloc(l + 1); + if (!ret) + return NULL; + + *((uint8_t*) mempcpy(ret, p, l)) = 0; + return ret; +} + +void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size) { + size_t a, newalloc; + void *q; + + assert(p); + assert(allocated); + + if (*allocated >= need) + return *p; + + newalloc = MAX(need * 2, 64u / size); + a = newalloc * size; + + /* check for overflows */ + if (a < size * need) + return NULL; + + q = realloc(*p, a); + if (!q) + return NULL; + + *p = q; + *allocated = newalloc; + return q; +} + +void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size) { + size_t prev; + uint8_t *q; + + assert(p); + assert(allocated); + + prev = *allocated; + + q = greedy_realloc(p, allocated, need, size); + if (!q) + return NULL; + + if (*allocated > prev) + memzero(q + prev * size, (*allocated - prev) * size); + + return q; +} diff --git a/src/basic/alloc-util.h b/src/basic/alloc-util.h new file mode 100644 index 0000000..893a123 --- /dev/null +++ b/src/basic/alloc-util.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <alloca.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "macro.h" + +typedef void (*free_func_t)(void *p); + +/* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than + * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */ +#define ALLOCA_MAX (4U*1024U*1024U) + +#define new(t, n) ((t*) malloc_multiply(sizeof(t), (n))) + +#define new0(t, n) ((t*) calloc((n) ?: 1, sizeof(t))) + +#define newa(t, n) \ + ({ \ + size_t _n_ = n; \ + assert(!size_multiply_overflow(sizeof(t), _n_)); \ + assert(sizeof(t)*_n_ <= ALLOCA_MAX); \ + (t*) alloca(sizeof(t)*_n_); \ + }) + +#define newa0(t, n) \ + ({ \ + size_t _n_ = n; \ + assert(!size_multiply_overflow(sizeof(t), _n_)); \ + assert(sizeof(t)*_n_ <= ALLOCA_MAX); \ + (t*) alloca0(sizeof(t)*_n_); \ + }) + +#define newdup(t, p, n) ((t*) memdup_multiply(p, sizeof(t), (n))) + +#define newdup_suffix0(t, p, n) ((t*) memdup_suffix0_multiply(p, sizeof(t), (n))) + +#define malloc0(n) (calloc(1, (n))) + +static inline void *mfree(void *memory) { + free(memory); + return NULL; +} + +#define free_and_replace(a, b) \ + ({ \ + free(a); \ + (a) = (b); \ + (b) = NULL; \ + 0; \ + }) + +void* memdup(const void *p, size_t l) _alloc_(2); +void* memdup_suffix0(const void *p, size_t l) _alloc_(2); + +#define memdupa(p, l) \ + ({ \ + void *_q_; \ + size_t _l_ = l; \ + assert(_l_ <= ALLOCA_MAX); \ + _q_ = alloca(_l_); \ + memcpy(_q_, p, _l_); \ + }) + +#define memdupa_suffix0(p, l) \ + ({ \ + void *_q_; \ + size_t _l_ = l; \ + assert(_l_ <= ALLOCA_MAX); \ + _q_ = alloca(_l_ + 1); \ + ((uint8_t*) _q_)[_l_] = 0; \ + memcpy(_q_, p, _l_); \ + }) + +static inline void freep(void *p) { + free(*(void**) p); +} + +#define _cleanup_free_ _cleanup_(freep) + +static inline bool size_multiply_overflow(size_t size, size_t need) { + return _unlikely_(need != 0 && size > (SIZE_MAX / need)); +} + +_malloc_ _alloc_(1, 2) static inline void *malloc_multiply(size_t size, size_t need) { + if (size_multiply_overflow(size, need)) + return NULL; + + return malloc(size * need ?: 1); +} + +#if !HAVE_REALLOCARRAY +_alloc_(2, 3) static inline void *reallocarray(void *p, size_t need, size_t size) { + if (size_multiply_overflow(size, need)) + return NULL; + + return realloc(p, size * need ?: 1); +} +#endif + +_alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t size, size_t need) { + if (size_multiply_overflow(size, need)) + return NULL; + + return memdup(p, size * need); +} + +_alloc_(2, 3) static inline void *memdup_suffix0_multiply(const void *p, size_t size, size_t need) { + if (size_multiply_overflow(size, need)) + return NULL; + + return memdup_suffix0(p, size * need); +} + +void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size); +void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size); + +#define GREEDY_REALLOC(array, allocated, need) \ + greedy_realloc((void**) &(array), &(allocated), (need), sizeof((array)[0])) + +#define GREEDY_REALLOC0(array, allocated, need) \ + greedy_realloc0((void**) &(array), &(allocated), (need), sizeof((array)[0])) + +#define alloca0(n) \ + ({ \ + char *_new_; \ + size_t _len_ = n; \ + assert(_len_ <= ALLOCA_MAX); \ + _new_ = alloca(_len_); \ + (void *) memset(_new_, 0, _len_); \ + }) + +/* It's not clear what alignment glibc/gcc alloca() guarantee, hence provide a guaranteed safe version */ +#define alloca_align(size, align) \ + ({ \ + void *_ptr_; \ + size_t _mask_ = (align) - 1; \ + size_t _size_ = size; \ + assert(_size_ <= ALLOCA_MAX); \ + _ptr_ = alloca(_size_ + _mask_); \ + (void*)(((uintptr_t)_ptr_ + _mask_) & ~_mask_); \ + }) + +#define alloca0_align(size, align) \ + ({ \ + void *_new_; \ + size_t _xsize_ = (size); \ + _new_ = alloca_align(_xsize_, (align)); \ + (void*)memset(_new_, 0, _xsize_); \ + }) + +/* Takes inspiration from Rusts's Option::take() method: reads and returns a pointer, but at the same time resets it to + * NULL. See: https://doc.rust-lang.org/std/option/enum.Option.html#method.take */ +#define TAKE_PTR(ptr) \ + ({ \ + typeof(ptr) _ptr_ = (ptr); \ + (ptr) = NULL; \ + _ptr_; \ + }) diff --git a/src/basic/architecture.c b/src/basic/architecture.c new file mode 100644 index 0000000..85837b5 --- /dev/null +++ b/src/basic/architecture.c @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/utsname.h> + +#include "architecture.h" +#include "macro.h" +#include "string-table.h" +#include "string-util.h" + +int uname_architecture(void) { + + /* Return a sanitized enum identifying the architecture we are + * running on. This is based on uname(), and the user may + * hence control what this returns by using + * personality(). This puts the user in control on systems + * that can run binaries of multiple architectures. + * + * We do not translate the string returned by uname() + * 1:1. Instead we try to clean it up and break down the + * confusion on x86 and arm in particular. + * + * We do not try to distinguish CPUs not CPU features, but + * actual architectures, i.e. that have genuinely different + * code. */ + + static const struct { + const char *machine; + int arch; + } arch_map[] = { +#if defined(__x86_64__) || defined(__i386__) + { "x86_64", ARCHITECTURE_X86_64 }, + { "i686", ARCHITECTURE_X86 }, + { "i586", ARCHITECTURE_X86 }, + { "i486", ARCHITECTURE_X86 }, + { "i386", ARCHITECTURE_X86 }, +#elif defined(__powerpc__) || defined(__powerpc64__) + { "ppc64", ARCHITECTURE_PPC64 }, + { "ppc64le", ARCHITECTURE_PPC64_LE }, + { "ppc", ARCHITECTURE_PPC }, + { "ppcle", ARCHITECTURE_PPC_LE }, +#elif defined(__ia64__) + { "ia64", ARCHITECTURE_IA64 }, +#elif defined(__hppa__) || defined(__hppa64__) + { "parisc64", ARCHITECTURE_PARISC64 }, + { "parisc", ARCHITECTURE_PARISC }, +#elif defined(__s390__) || defined(__s390x__) + { "s390x", ARCHITECTURE_S390X }, + { "s390", ARCHITECTURE_S390 }, +#elif defined(__sparc__) + { "sparc64", ARCHITECTURE_SPARC64 }, + { "sparc", ARCHITECTURE_SPARC }, +#elif defined(__mips__) || defined(__mips64__) + { "mips64", ARCHITECTURE_MIPS64 }, + { "mips", ARCHITECTURE_MIPS }, +#elif defined(__alpha__) + { "alpha" , ARCHITECTURE_ALPHA }, +#elif defined(__arm__) || defined(__aarch64__) + { "aarch64", ARCHITECTURE_ARM64 }, + { "aarch64_be", ARCHITECTURE_ARM64_BE }, + { "armv4l", ARCHITECTURE_ARM }, + { "armv4b", ARCHITECTURE_ARM_BE }, + { "armv4tl", ARCHITECTURE_ARM }, + { "armv4tb", ARCHITECTURE_ARM_BE }, + { "armv5tl", ARCHITECTURE_ARM }, + { "armv5tb", ARCHITECTURE_ARM_BE }, + { "armv5tel", ARCHITECTURE_ARM }, + { "armv5teb" , ARCHITECTURE_ARM_BE }, + { "armv5tejl", ARCHITECTURE_ARM }, + { "armv5tejb", ARCHITECTURE_ARM_BE }, + { "armv6l", ARCHITECTURE_ARM }, + { "armv6b", ARCHITECTURE_ARM_BE }, + { "armv7l", ARCHITECTURE_ARM }, + { "armv7b", ARCHITECTURE_ARM_BE }, + { "armv7ml", ARCHITECTURE_ARM }, + { "armv7mb", ARCHITECTURE_ARM_BE }, + { "armv4l", ARCHITECTURE_ARM }, + { "armv4b", ARCHITECTURE_ARM_BE }, + { "armv4tl", ARCHITECTURE_ARM }, + { "armv4tb", ARCHITECTURE_ARM_BE }, + { "armv5tl", ARCHITECTURE_ARM }, + { "armv5tb", ARCHITECTURE_ARM_BE }, + { "armv5tel", ARCHITECTURE_ARM }, + { "armv5teb", ARCHITECTURE_ARM_BE }, + { "armv5tejl", ARCHITECTURE_ARM }, + { "armv5tejb", ARCHITECTURE_ARM_BE }, + { "armv6l", ARCHITECTURE_ARM }, + { "armv6b", ARCHITECTURE_ARM_BE }, + { "armv7l", ARCHITECTURE_ARM }, + { "armv7b", ARCHITECTURE_ARM_BE }, + { "armv7ml", ARCHITECTURE_ARM }, + { "armv7mb", ARCHITECTURE_ARM_BE }, + { "armv8l", ARCHITECTURE_ARM }, + { "armv8b", ARCHITECTURE_ARM_BE }, +#elif defined(__sh__) || defined(__sh64__) + { "sh5", ARCHITECTURE_SH64 }, + { "sh2", ARCHITECTURE_SH }, + { "sh2a", ARCHITECTURE_SH }, + { "sh3", ARCHITECTURE_SH }, + { "sh4", ARCHITECTURE_SH }, + { "sh4a", ARCHITECTURE_SH }, +#elif defined(__m68k__) + { "m68k", ARCHITECTURE_M68K }, +#elif defined(__tilegx__) + { "tilegx", ARCHITECTURE_TILEGX }, +#elif defined(__cris__) + { "crisv32", ARCHITECTURE_CRIS }, +#elif defined(__nios2__) + { "nios2", ARCHITECTURE_NIOS2 }, +#elif defined(__riscv__) || defined(__riscv) + /* __riscv__ is obsolete, remove in 2018 */ + { "riscv32", ARCHITECTURE_RISCV32 }, + { "riscv64", ARCHITECTURE_RISCV64 }, +# if __SIZEOF_POINTER__ == 4 + { "riscv", ARCHITECTURE_RISCV32 }, +# elif __SIZEOF_POINTER__ == 8 + { "riscv", ARCHITECTURE_RISCV64 }, +# endif +#elif defined(__arc__) + { "arc", ARCHITECTURE_ARC }, + { "arceb", ARCHITECTURE_ARC_BE }, +#else +#error "Please register your architecture here!" +#endif + }; + + static int cached = _ARCHITECTURE_INVALID; + struct utsname u; + unsigned i; + + if (cached != _ARCHITECTURE_INVALID) + return cached; + + assert_se(uname(&u) >= 0); + + for (i = 0; i < ELEMENTSOF(arch_map); i++) + if (streq(arch_map[i].machine, u.machine)) + return cached = arch_map[i].arch; + + assert_not_reached("Couldn't identify architecture. You need to patch systemd."); + return _ARCHITECTURE_INVALID; +} + +static const char *const architecture_table[_ARCHITECTURE_MAX] = { + [ARCHITECTURE_X86] = "x86", + [ARCHITECTURE_X86_64] = "x86-64", + [ARCHITECTURE_PPC] = "ppc", + [ARCHITECTURE_PPC_LE] = "ppc-le", + [ARCHITECTURE_PPC64] = "ppc64", + [ARCHITECTURE_PPC64_LE] = "ppc64-le", + [ARCHITECTURE_IA64] = "ia64", + [ARCHITECTURE_PARISC] = "parisc", + [ARCHITECTURE_PARISC64] = "parisc64", + [ARCHITECTURE_S390] = "s390", + [ARCHITECTURE_S390X] = "s390x", + [ARCHITECTURE_SPARC] = "sparc", + [ARCHITECTURE_SPARC64] = "sparc64", + [ARCHITECTURE_MIPS] = "mips", + [ARCHITECTURE_MIPS_LE] = "mips-le", + [ARCHITECTURE_MIPS64] = "mips64", + [ARCHITECTURE_MIPS64_LE] = "mips64-le", + [ARCHITECTURE_ALPHA] = "alpha", + [ARCHITECTURE_ARM] = "arm", + [ARCHITECTURE_ARM_BE] = "arm-be", + [ARCHITECTURE_ARM64] = "arm64", + [ARCHITECTURE_ARM64_BE] = "arm64-be", + [ARCHITECTURE_SH] = "sh", + [ARCHITECTURE_SH64] = "sh64", + [ARCHITECTURE_M68K] = "m68k", + [ARCHITECTURE_TILEGX] = "tilegx", + [ARCHITECTURE_CRIS] = "cris", + [ARCHITECTURE_NIOS2] = "nios2", + [ARCHITECTURE_RISCV32] = "riscv32", + [ARCHITECTURE_RISCV64] = "riscv64", + [ARCHITECTURE_ARC] = "arc", + [ARCHITECTURE_ARC_BE] = "arc-be", +}; + +DEFINE_STRING_TABLE_LOOKUP(architecture, int); diff --git a/src/basic/architecture.h b/src/basic/architecture.h new file mode 100644 index 0000000..443e890 --- /dev/null +++ b/src/basic/architecture.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <endian.h> + +#include "macro.h" +#include "util.h" + +/* A cleaned up architecture definition. We don't want to get lost in + * processor features, models, generations or even ABIs. Hence we + * focus on general family, and distinguish word width and + * endianness. */ + +enum { + ARCHITECTURE_X86 = 0, + ARCHITECTURE_X86_64, + ARCHITECTURE_PPC, + ARCHITECTURE_PPC_LE, + ARCHITECTURE_PPC64, + ARCHITECTURE_PPC64_LE, + ARCHITECTURE_IA64, + ARCHITECTURE_PARISC, + ARCHITECTURE_PARISC64, + ARCHITECTURE_S390, + ARCHITECTURE_S390X, + ARCHITECTURE_SPARC, + ARCHITECTURE_SPARC64, + ARCHITECTURE_MIPS, + ARCHITECTURE_MIPS_LE, + ARCHITECTURE_MIPS64, + ARCHITECTURE_MIPS64_LE, + ARCHITECTURE_ALPHA, + ARCHITECTURE_ARM, + ARCHITECTURE_ARM_BE, + ARCHITECTURE_ARM64, + ARCHITECTURE_ARM64_BE, + ARCHITECTURE_SH, + ARCHITECTURE_SH64, + ARCHITECTURE_M68K, + ARCHITECTURE_TILEGX, + ARCHITECTURE_CRIS, + ARCHITECTURE_NIOS2, + ARCHITECTURE_RISCV32, + ARCHITECTURE_RISCV64, + ARCHITECTURE_ARC, + ARCHITECTURE_ARC_BE, + _ARCHITECTURE_MAX, + _ARCHITECTURE_INVALID = -1 +}; + +int uname_architecture(void); + +/* + * LIB_ARCH_TUPLE should resolve to the local library path + * architecture tuple systemd is built for, according to the Debian + * tuple list: + * + * https://wiki.debian.org/Multiarch/Tuples + * + * This is used in library search paths that should understand + * Debian's paths on all distributions. + */ + +#if defined(__x86_64__) +# define native_architecture() ARCHITECTURE_X86_64 +# if defined(__ILP32__) +# define LIB_ARCH_TUPLE "x86_64-linux-gnux32" +# else +# define LIB_ARCH_TUPLE "x86_64-linux-gnu" +# endif +# define SECONDARY_ARCHITECTURE ARCHITECTURE_X86 +#elif defined(__i386__) +# define native_architecture() ARCHITECTURE_X86 +# define LIB_ARCH_TUPLE "i386-linux-gnu" +#elif defined(__powerpc64__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_PPC64 +# define LIB_ARCH_TUPLE "ppc64-linux-gnu" +# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC +# else +# define native_architecture() ARCHITECTURE_PPC64_LE +# define LIB_ARCH_TUPLE "powerpc64le-linux-gnu" +# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC_LE +# endif +#elif defined(__powerpc__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_PPC +# if defined(__NO_FPRS__) +# define LIB_ARCH_TUPLE "powerpc-linux-gnuspe" +# else +# define LIB_ARCH_TUPLE "powerpc-linux-gnu" +# endif +# else +# define native_architecture() ARCHITECTURE_PPC_LE +# error "Missing LIB_ARCH_TUPLE for PPCLE" +# endif +#elif defined(__ia64__) +# define native_architecture() ARCHITECTURE_IA64 +# define LIB_ARCH_TUPLE "ia64-linux-gnu" +#elif defined(__hppa64__) +# define native_architecture() ARCHITECTURE_PARISC64 +# error "Missing LIB_ARCH_TUPLE for HPPA64" +#elif defined(__hppa__) +# define native_architecture() ARCHITECTURE_PARISC +# define LIB_ARCH_TUPLE "hppa‑linux‑gnu" +#elif defined(__s390x__) +# define native_architecture() ARCHITECTURE_S390X +# define LIB_ARCH_TUPLE "s390x-linux-gnu" +# define SECONDARY_ARCHITECTURE ARCHITECTURE_S390 +#elif defined(__s390__) +# define native_architecture() ARCHITECTURE_S390 +# define LIB_ARCH_TUPLE "s390-linux-gnu" +#elif defined(__sparc__) && defined (__arch64__) +# define native_architecture() ARCHITECTURE_SPARC64 +# define LIB_ARCH_TUPLE "sparc64-linux-gnu" +#elif defined(__sparc__) +# define native_architecture() ARCHITECTURE_SPARC +# define LIB_ARCH_TUPLE "sparc-linux-gnu" +#elif defined(__mips64) && defined(__LP64__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_MIPS64 +# define LIB_ARCH_TUPLE "mips64-linux-gnuabi64" +# else +# define native_architecture() ARCHITECTURE_MIPS64_LE +# define LIB_ARCH_TUPLE "mips64el-linux-gnuabi64" +# endif +#elif defined(__mips64) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_MIPS64 +# define LIB_ARCH_TUPLE "mips64-linux-gnuabin32" +# else +# define native_architecture() ARCHITECTURE_MIPS64_LE +# define LIB_ARCH_TUPLE "mips64el-linux-gnuabin32" +# endif +#elif defined(__mips__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_MIPS +# define LIB_ARCH_TUPLE "mips-linux-gnu" +# else +# define native_architecture() ARCHITECTURE_MIPS_LE +# define LIB_ARCH_TUPLE "mipsel-linux-gnu" +# endif +#elif defined(__alpha__) +# define native_architecture() ARCHITECTURE_ALPHA +# define LIB_ARCH_TUPLE "alpha-linux-gnu" +#elif defined(__aarch64__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_ARM64_BE +# define LIB_ARCH_TUPLE "aarch64_be-linux-gnu" +# else +# define native_architecture() ARCHITECTURE_ARM64 +# define LIB_ARCH_TUPLE "aarch64-linux-gnu" +# define SECONDARY_ARCHITECTURE ARCHITECTURE_ARM +# endif +#elif defined(__arm__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_ARM_BE +# if defined(__ARM_EABI__) +# if defined(__ARM_PCS_VFP) +# define LIB_ARCH_TUPLE "armeb-linux-gnueabihf" +# else +# define LIB_ARCH_TUPLE "armeb-linux-gnueabi" +# endif +# else +# define LIB_ARCH_TUPLE "armeb-linux-gnu" +# endif +# else +# define native_architecture() ARCHITECTURE_ARM +# if defined(__ARM_EABI__) +# if defined(__ARM_PCS_VFP) +# define LIB_ARCH_TUPLE "arm-linux-gnueabihf" +# else +# define LIB_ARCH_TUPLE "arm-linux-gnueabi" +# endif +# else +# define LIB_ARCH_TUPLE "arm-linux-gnu" +# endif +# endif +#elif defined(__sh64__) +# define native_architecture() ARCHITECTURE_SH64 +# error "Missing LIB_ARCH_TUPLE for SH64" +#elif defined(__sh__) +# define native_architecture() ARCHITECTURE_SH +# if defined(__SH1__) +# define LIB_ARCH_TUPLE "sh1-linux-gnu" +# elif defined(__SH2__) +# define LIB_ARCH_TUPLE "sh2-linux-gnu" +# elif defined(__SH2A__) +# define LIB_ARCH_TUPLE "sh2a-linux-gnu" +# elif defined(__SH2E__) +# define LIB_ARCH_TUPLE "sh2e-linux-gnu" +# elif defined(__SH3__) +# define LIB_ARCH_TUPLE "sh3-linux-gnu" +# elif defined(__SH3E__) +# define LIB_ARCH_TUPLE "sh3e-linux-gnu" +# elif defined(__SH4__) && !defined(__SH4A__) +# define LIB_ARCH_TUPLE "sh4-linux-gnu" +# elif defined(__SH4A__) +# define LIB_ARCH_TUPLE "sh4a-linux-gnu" +# endif +#elif defined(__m68k__) +# define native_architecture() ARCHITECTURE_M68K +# define LIB_ARCH_TUPLE "m68k-linux-gnu" +#elif defined(__tilegx__) +# define native_architecture() ARCHITECTURE_TILEGX +# define LIB_ARCH_TUPLE "tilegx-linux-gnu" +#elif defined(__cris__) +# define native_architecture() ARCHITECTURE_CRIS +# error "Missing LIB_ARCH_TUPLE for CRIS" +#elif defined(__nios2__) +# define native_architecture() ARCHITECTURE_NIOS2 +# define LIB_ARCH_TUPLE "nios2-linux-gnu" +#elif defined(__riscv__) || defined(__riscv) + /* __riscv__ is obsolete, remove in 2018 */ +# if __SIZEOF_POINTER__ == 4 +# define native_architecture() ARCHITECTURE_RISCV32 +# define LIB_ARCH_TUPLE "riscv32-linux-gnu" +# elif __SIZEOF_POINTER__ == 8 +# define native_architecture() ARCHITECTURE_RISCV64 +# define LIB_ARCH_TUPLE "riscv64-linux-gnu" +# else +# error "Unrecognized riscv architecture variant" +# endif +#elif defined(__arc__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_ARC_BE +# define LIB_ARCH_TUPLE "arceb-linux" +# else +# define native_architecture() ARCHITECTURE_ARC +# define LIB_ARCH_TUPLE "arc-linux" +# endif +#else +# error "Please register your architecture here!" +#endif + +const char *architecture_to_string(int a) _const_; +int architecture_from_string(const char *s) _pure_; diff --git a/src/basic/arphrd-list.c b/src/basic/arphrd-list.c new file mode 100644 index 0000000..b6e2486 --- /dev/null +++ b/src/basic/arphrd-list.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <net/if_arp.h> +#include <string.h> + +#include "arphrd-list.h" +#include "macro.h" +#include "missing_network.h" + +static const struct arphrd_name* lookup_arphrd(register const char *str, register GPERF_LEN_TYPE len); + +#include "arphrd-from-name.h" +#include "arphrd-to-name.h" + +const char *arphrd_to_name(int id) { + + if (id <= 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(arphrd_names)) + return NULL; + + return arphrd_names[id]; +} + +int arphrd_from_name(const char *name) { + const struct arphrd_name *sc; + + assert(name); + + sc = lookup_arphrd(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +int arphrd_max(void) { + return ELEMENTSOF(arphrd_names); +} diff --git a/src/basic/arphrd-list.h b/src/basic/arphrd-list.h new file mode 100644 index 0000000..5dcfe5e --- /dev/null +++ b/src/basic/arphrd-list.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +const char *arphrd_to_name(int id); +int arphrd_from_name(const char *name); + +int arphrd_max(void); diff --git a/src/basic/arphrd-to-name.awk b/src/basic/arphrd-to-name.awk new file mode 100644 index 0000000..5a35673 --- /dev/null +++ b/src/basic/arphrd-to-name.awk @@ -0,0 +1,9 @@ +BEGIN{ + print "static const char* const arphrd_names[] = { " +} +!/CISCO/ { + printf " [ARPHRD_%s] = \"%s\",\n", $1, $1 +} +END{ + print "};" +} diff --git a/src/basic/async.c b/src/basic/async.c new file mode 100644 index 0000000..c45ca01 --- /dev/null +++ b/src/basic/async.c @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <pthread.h> +#include <stddef.h> +#include <unistd.h> + +#include "async.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "process-util.h" +#include "signal-util.h" +#include "util.h" + +int asynchronous_job(void* (*func)(void *p), void *arg) { + sigset_t ss, saved_ss; + pthread_attr_t a; + pthread_t t; + int r, k; + + /* It kinda sucks that we have to resort to threads to implement an asynchronous close(), but well, such is + * life. */ + + r = pthread_attr_init(&a); + if (r > 0) + return -r; + + r = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED); + if (r > 0) { + r = -r; + goto finish; + } + + assert_se(sigfillset(&ss) >= 0); + + /* Block all signals before forking off the thread, so that the new thread is started with all signals + * blocked. This way the existence of the new thread won't affect signal handling in other threads. */ + + r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss); + if (r > 0) { + r = -r; + goto finish; + } + + r = pthread_create(&t, &a, func, arg); + + k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL); + + if (r > 0) + r = -r; + else if (k > 0) + r = -k; + else + r = 0; + +finish: + pthread_attr_destroy(&a); + return r; +} + +int asynchronous_sync(pid_t *ret_pid) { + int r; + + /* This forks off an invocation of fork() as a child process, in order to initiate synchronization to + * disk. Note that we implement this as helper process rather than thread as we don't want the sync() to hang our + * original process ever, and a thread would do that as the process can't exit with threads hanging in blocking + * syscalls. */ + + r = safe_fork("(sd-sync)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, ret_pid); + if (r < 0) + return r; + if (r == 0) { + /* Child process */ + (void) sync(); + _exit(EXIT_SUCCESS); + } + + return 0; +} + +static void *close_thread(void *p) { + (void) pthread_setname_np(pthread_self(), "close"); + + assert_se(close_nointr(PTR_TO_FD(p)) != -EBADF); + return NULL; +} + +int asynchronous_close(int fd) { + int r; + + /* This is supposed to behave similar to safe_close(), but + * actually invoke close() asynchronously, so that it will + * never block. Ideally the kernel would have an API for this, + * but it doesn't, so we work around it, and hide this as a + * far away as we can. */ + + if (fd >= 0) { + PROTECT_ERRNO; + + r = asynchronous_job(close_thread, FD_TO_PTR(fd)); + if (r < 0) + assert_se(close_nointr(fd) != -EBADF); + } + + return -1; +} diff --git a/src/basic/async.h b/src/basic/async.h new file mode 100644 index 0000000..3160613 --- /dev/null +++ b/src/basic/async.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +int asynchronous_job(void* (*func)(void *p), void *arg); + +int asynchronous_sync(pid_t *ret_pid); +int asynchronous_close(int fd); diff --git a/src/basic/audit-util.c b/src/basic/audit-util.c new file mode 100644 index 0000000..5cbaef3 --- /dev/null +++ b/src/basic/audit-util.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <linux/netlink.h> +#include <stdio.h> +#include <sys/socket.h> + +#include "alloc-util.h" +#include "audit-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "macro.h" +#include "parse-util.h" +#include "process-util.h" +#include "user-util.h" + +int audit_session_from_pid(pid_t pid, uint32_t *id) { + _cleanup_free_ char *s = NULL; + const char *p; + uint32_t u; + int r; + + assert(id); + + /* We don't convert ENOENT to ESRCH here, since we can't + * really distuingish between "audit is not available in the + * kernel" and "the process does not exist", both which will + * result in ENOENT. */ + + p = procfs_file_alloca(pid, "sessionid"); + + r = read_one_line_file(p, &s); + if (r < 0) + return r; + + r = safe_atou32(s, &u); + if (r < 0) + return r; + + if (!audit_session_is_valid(u)) + return -ENODATA; + + *id = u; + return 0; +} + +int audit_loginuid_from_pid(pid_t pid, uid_t *uid) { + _cleanup_free_ char *s = NULL; + const char *p; + uid_t u; + int r; + + assert(uid); + + p = procfs_file_alloca(pid, "loginuid"); + + r = read_one_line_file(p, &s); + if (r < 0) + return r; + + r = parse_uid(s, &u); + if (r == -ENXIO) /* the UID was -1 */ + return -ENODATA; + if (r < 0) + return r; + + *uid = u; + return 0; +} + +bool use_audit(void) { + static int cached_use = -1; + + if (cached_use < 0) { + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT); + if (fd < 0) { + cached_use = !IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT, EPERM); + if (!cached_use) + log_debug_errno(errno, "Won't talk to audit: %m"); + } else { + cached_use = true; + safe_close(fd); + } + } + + return cached_use; +} diff --git a/src/basic/audit-util.h b/src/basic/audit-util.h new file mode 100644 index 0000000..c9fc498 --- /dev/null +++ b/src/basic/audit-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +#define AUDIT_SESSION_INVALID ((uint32_t) -1) + +int audit_session_from_pid(pid_t pid, uint32_t *id); +int audit_loginuid_from_pid(pid_t pid, uid_t *uid); + +bool use_audit(void); + +static inline bool audit_session_is_valid(uint32_t id) { + return id > 0 && id != AUDIT_SESSION_INVALID; +} diff --git a/src/basic/blockdev-util.c b/src/basic/blockdev-util.c new file mode 100644 index 0000000..3017ecd --- /dev/null +++ b/src/basic/blockdev-util.c @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/stat.h> +#include <sys/statfs.h> + +#include "alloc-util.h" +#include "blockdev-util.h" +#include "btrfs-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "missing.h" +#include "parse-util.h" +#include "stat-util.h" + +int block_get_whole_disk(dev_t d, dev_t *ret) { + char p[SYS_BLOCK_PATH_MAX("/partition")]; + _cleanup_free_ char *s = NULL; + dev_t devt; + int r; + + assert(ret); + + /* If it has a queue this is good enough for us */ + xsprintf_sys_block_path(p, "/queue", d); + if (access(p, F_OK) >= 0) { + *ret = d; + return 0; + } + + /* If it is a partition find the originating device */ + xsprintf_sys_block_path(p, "/partition", d); + if (access(p, F_OK) < 0) + return -ENOENT; + + /* Get parent dev_t */ + xsprintf_sys_block_path(p, "/../dev", d); + r = read_one_line_file(p, &s); + if (r < 0) + return r; + + r = parse_dev(s, &devt); + if (r < 0) + return r; + + /* Only return this if it is really good enough for us. */ + xsprintf_sys_block_path(p, "/queue", devt); + if (access(p, F_OK) < 0) + return -ENOENT; + + *ret = devt; + return 0; +} + +int get_block_device(const char *path, dev_t *dev) { + struct stat st; + struct statfs sfs; + + assert(path); + assert(dev); + + /* Get's the block device directly backing a file system. If + * the block device is encrypted, returns the device mapper + * block device. */ + + if (lstat(path, &st)) + return -errno; + + if (major(st.st_dev) != 0) { + *dev = st.st_dev; + return 1; + } + + if (statfs(path, &sfs) < 0) + return -errno; + + if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) + return btrfs_get_block_device(path, dev); + + *dev = 0; + return 0; +} + +int block_get_originating(dev_t dt, dev_t *ret) { + _cleanup_closedir_ DIR *d = NULL; + _cleanup_free_ char *t = NULL; + char p[SYS_BLOCK_PATH_MAX("/slaves")]; + struct dirent *de, *found = NULL; + const char *q; + dev_t devt; + int r; + + /* For the specified block device tries to chase it through the layers, in case LUKS-style DM stacking is used, + * trying to find the next underlying layer. */ + + xsprintf_sys_block_path(p, "/slaves", dt); + d = opendir(p); + if (!d) + return -errno; + + FOREACH_DIRENT_ALL(de, d, return -errno) { + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN)) + continue; + + if (found) { + _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL; + + /* We found a device backed by multiple other devices. We don't really support automatic + * discovery on such setups, with the exception of dm-verity partitions. In this case there are + * two backing devices: the data partition and the hash partition. We are fine with such + * setups, however, only if both partitions are on the same physical device. Hence, let's + * verify this. */ + + u = strjoin(p, "/", de->d_name, "/../dev"); + if (!u) + return -ENOMEM; + + v = strjoin(p, "/", found->d_name, "/../dev"); + if (!v) + return -ENOMEM; + + r = read_one_line_file(u, &a); + if (r < 0) + return log_debug_errno(r, "Failed to read %s: %m", u); + + r = read_one_line_file(v, &b); + if (r < 0) + return log_debug_errno(r, "Failed to read %s: %m", v); + + /* Check if the parent device is the same. If not, then the two backing devices are on + * different physical devices, and we don't support that. */ + if (!streq(a, b)) + return -ENOTUNIQ; + } + + found = de; + } + + if (!found) + return -ENOENT; + + q = strjoina(p, "/", found->d_name, "/dev"); + + r = read_one_line_file(q, &t); + if (r < 0) + return r; + + r = parse_dev(t, &devt); + if (r < 0) + return -EINVAL; + + if (major(devt) == 0) + return -ENOENT; + + *ret = devt; + return 1; +} + +int get_block_device_harder(const char *path, dev_t *ret) { + int r; + + assert(path); + assert(ret); + + /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its + * immediate parent, if there is one. */ + + r = get_block_device(path, ret); + if (r <= 0) + return r; + + r = block_get_originating(*ret, ret); + if (r < 0) + log_debug_errno(r, "Failed to chase block device '%s', ignoring: %m", path); + + return 1; +} diff --git a/src/basic/blockdev-util.h b/src/basic/blockdev-util.h new file mode 100644 index 0000000..6d8a796 --- /dev/null +++ b/src/basic/blockdev-util.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/types.h> + +#include "macro.h" +#include "stdio-util.h" +#include "string-util.h" + +#define SYS_BLOCK_PATH_MAX(suffix) \ + (STRLEN("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen_ptr(suffix)) +#define xsprintf_sys_block_path(buf, suffix, devno) \ + xsprintf(buf, "/sys/dev/block/%u:%u%s", major(devno), minor(devno), strempty(suffix)) + +int block_get_whole_disk(dev_t d, dev_t *ret); +int block_get_originating(dev_t d, dev_t *ret); + +int get_block_device(const char *path, dev_t *dev); + +int get_block_device_harder(const char *path, dev_t *dev); diff --git a/src/basic/btrfs-util.c b/src/basic/btrfs-util.c new file mode 100644 index 0000000..da4dd2a --- /dev/null +++ b/src/basic/btrfs-util.c @@ -0,0 +1,1993 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <linux/fs.h> +#include <linux/loop.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/sysmacros.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "blockdev-util.h" +#include "btrfs-util.h" +#include "chattr-util.h" +#include "copy.h" +#include "device-nodes.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "macro.h" +#include "missing.h" +#include "path-util.h" +#include "rm-rf.h" +#include "smack-util.h" +#include "sparse-endian.h" +#include "stat-util.h" +#include "string-util.h" +#include "time-util.h" +#include "util.h" + +/* WARNING: Be careful with file system ioctls! When we get an fd, we + * need to make sure it either refers to only a regular file or + * directory, or that it is located on btrfs, before invoking any + * btrfs ioctls. The ioctl numbers are reused by some device drivers + * (such as DRM), and hence might have bad effects when invoked on + * device nodes (that reference drivers) rather than fds to normal + * files or directories. */ + +static int validate_subvolume_name(const char *name) { + + if (!filename_is_valid(name)) + return -EINVAL; + + if (strlen(name) > BTRFS_SUBVOL_NAME_MAX) + return -E2BIG; + + return 0; +} + +static int extract_subvolume_name(const char *path, const char **subvolume) { + const char *fn; + int r; + + assert(path); + assert(subvolume); + + fn = basename(path); + + r = validate_subvolume_name(fn); + if (r < 0) + return r; + + *subvolume = fn; + return 0; +} + +int btrfs_is_filesystem(int fd) { + struct statfs sfs; + + assert(fd >= 0); + + if (fstatfs(fd, &sfs) < 0) + return -errno; + + return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC); +} + +int btrfs_is_subvol_fd(int fd) { + struct stat st; + + assert(fd >= 0); + + /* On btrfs subvolumes always have the inode 256 */ + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode) || st.st_ino != 256) + return 0; + + return btrfs_is_filesystem(fd); +} + +int btrfs_is_subvol(const char *path) { + _cleanup_close_ int fd = -1; + + assert(path); + + fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (fd < 0) + return -errno; + + return btrfs_is_subvol_fd(fd); +} + +int btrfs_subvol_make_fd(int fd, const char *subvolume) { + struct btrfs_ioctl_vol_args args = {}; + _cleanup_close_ int real_fd = -1; + int r; + + assert(subvolume); + + r = validate_subvolume_name(subvolume); + if (r < 0) + return r; + + r = fcntl(fd, F_GETFL); + if (r < 0) + return -errno; + if (FLAGS_SET(r, O_PATH)) { + /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with + * O_PATH. */ + + real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (real_fd < 0) + return real_fd; + + fd = real_fd; + } + + strncpy(args.name, subvolume, sizeof(args.name)-1); + + if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0) + return -errno; + + return 0; +} + +int btrfs_subvol_make(const char *path) { + _cleanup_close_ int fd = -1; + const char *subvolume; + int r; + + assert(path); + + r = extract_subvolume_name(path, &subvolume); + if (r < 0) + return r; + + fd = open_parent(path, O_CLOEXEC, 0); + if (fd < 0) + return fd; + + return btrfs_subvol_make_fd(fd, subvolume); +} + +int btrfs_subvol_set_read_only_fd(int fd, bool b) { + uint64_t flags, nflags; + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode) || st.st_ino != 256) + return -EINVAL; + + if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0) + return -errno; + + if (b) + nflags = flags | BTRFS_SUBVOL_RDONLY; + else + nflags = flags & ~BTRFS_SUBVOL_RDONLY; + + if (flags == nflags) + return 0; + + if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0) + return -errno; + + return 0; +} + +int btrfs_subvol_set_read_only(const char *path, bool b) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (fd < 0) + return -errno; + + return btrfs_subvol_set_read_only_fd(fd, b); +} + +int btrfs_subvol_get_read_only_fd(int fd) { + uint64_t flags; + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode) || st.st_ino != 256) + return -EINVAL; + + if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0) + return -errno; + + return !!(flags & BTRFS_SUBVOL_RDONLY); +} + +int btrfs_reflink(int infd, int outfd) { + int r; + + assert(infd >= 0); + assert(outfd >= 0); + + /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */ + + r = fd_verify_regular(outfd); + if (r < 0) + return r; + + if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0) + return -errno; + + return 0; +} + +int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) { + struct btrfs_ioctl_clone_range_args args = { + .src_fd = infd, + .src_offset = in_offset, + .src_length = sz, + .dest_offset = out_offset, + }; + int r; + + assert(infd >= 0); + assert(outfd >= 0); + assert(sz > 0); + + r = fd_verify_regular(outfd); + if (r < 0) + return r; + + if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0) + return -errno; + + return 0; +} + +int btrfs_get_block_device_fd(int fd, dev_t *dev) { + struct btrfs_ioctl_fs_info_args fsi = {}; + uint64_t id; + int r; + + assert(fd >= 0); + assert(dev); + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0) + return -errno; + + /* We won't do this for btrfs RAID */ + if (fsi.num_devices != 1) { + *dev = 0; + return 0; + } + + for (id = 1; id <= fsi.max_id; id++) { + struct btrfs_ioctl_dev_info_args di = { + .devid = id, + }; + struct stat st; + + if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) { + if (errno == ENODEV) + continue; + + return -errno; + } + + if (stat((char*) di.path, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENODEV; + + if (major(st.st_rdev) == 0) + return -ENODEV; + + *dev = st.st_rdev; + return 1; + } + + return -ENODEV; +} + +int btrfs_get_block_device(const char *path, dev_t *dev) { + _cleanup_close_ int fd = -1; + + assert(path); + assert(dev); + + fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return -errno; + + return btrfs_get_block_device_fd(fd, dev); +} + +int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) { + struct btrfs_ioctl_ino_lookup_args args = { + .objectid = BTRFS_FIRST_FREE_OBJECTID + }; + int r; + + assert(fd >= 0); + assert(ret); + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0) + return -errno; + + *ret = args.treeid; + return 0; +} + +int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) { + _cleanup_close_ int subvol_fd = -1; + + assert(fd >= 0); + assert(ret); + + subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (subvol_fd < 0) + return -errno; + + return btrfs_subvol_get_id_fd(subvol_fd, ret); +} + +static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) { + assert(args); + + /* the objectid, type, offset together make up the btrfs key, + * which is considered a single 136byte integer when + * comparing. This call increases the counter by one, dealing + * with the overflow between the overflows */ + + if (args->key.min_offset < (uint64_t) -1) { + args->key.min_offset++; + return true; + } + + if (args->key.min_type < (uint8_t) -1) { + args->key.min_type++; + args->key.min_offset = 0; + return true; + } + + if (args->key.min_objectid < (uint64_t) -1) { + args->key.min_objectid++; + args->key.min_offset = 0; + args->key.min_type = 0; + return true; + } + + return 0; +} + +static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) { + assert(args); + assert(h); + + args->key.min_objectid = h->objectid; + args->key.min_type = h->type; + args->key.min_offset = h->offset; +} + +static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) { + int r; + + assert(args); + + /* Compare min and max */ + + r = CMP(args->key.min_objectid, args->key.max_objectid); + if (r != 0) + return r; + + r = CMP(args->key.min_type, args->key.max_type); + if (r != 0) + return r; + + return CMP(args->key.min_offset, args->key.max_offset); +} + +#define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \ + for ((i) = 0, \ + (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \ + (i) < (args).key.nr_items; \ + (i)++, \ + (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len)) + +#define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \ + ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header))) + +int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) { + struct btrfs_ioctl_search_args args = { + /* Tree of tree roots */ + .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, + + /* Look precisely for the subvolume items */ + .key.min_type = BTRFS_ROOT_ITEM_KEY, + .key.max_type = BTRFS_ROOT_ITEM_KEY, + + .key.min_offset = 0, + .key.max_offset = (uint64_t) -1, + + /* No restrictions on the other components */ + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + bool found = false; + int r; + + assert(fd >= 0); + assert(ret); + + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.key.min_objectid = args.key.max_objectid = subvol_id; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return -errno; + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + + const struct btrfs_root_item *ri; + + /* Make sure we start the next search at least from this entry */ + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->objectid != subvol_id) + continue; + if (sh->type != BTRFS_ROOT_ITEM_KEY) + continue; + + /* Older versions of the struct lacked the otime setting */ + if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec)) + continue; + + ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + + ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC + + (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC; + + ret->subvol_id = subvol_id; + ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY; + + assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid)); + memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid)); + memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid)); + + found = true; + goto finish; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + +finish: + if (!found) + return -ENODATA; + + return 0; +} + +int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) { + + struct btrfs_ioctl_search_args args = { + /* Tree of quota items */ + .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID, + + /* The object ID is always 0 */ + .key.min_objectid = 0, + .key.max_objectid = 0, + + /* Look precisely for the quota items */ + .key.min_type = BTRFS_QGROUP_STATUS_KEY, + .key.max_type = BTRFS_QGROUP_LIMIT_KEY, + + /* No restrictions on the other components */ + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + bool found_info = false, found_limit = false; + int r; + + assert(fd >= 0); + assert(ret); + + if (qgroupid == 0) { + r = btrfs_subvol_get_id_fd(fd, &qgroupid); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.key.min_offset = args.key.max_offset = qgroupid; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) { + if (errno == ENOENT) /* quota tree is missing: quota disabled */ + break; + + return -errno; + } + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + + /* Make sure we start the next search at least from this entry */ + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->objectid != 0) + continue; + if (sh->offset != qgroupid) + continue; + + if (sh->type == BTRFS_QGROUP_INFO_KEY) { + const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + + ret->referenced = le64toh(qii->rfer); + ret->exclusive = le64toh(qii->excl); + + found_info = true; + + } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) { + const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + + if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER) + ret->referenced_max = le64toh(qli->max_rfer); + else + ret->referenced_max = (uint64_t) -1; + + if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL) + ret->exclusive_max = le64toh(qli->max_excl); + else + ret->exclusive_max = (uint64_t) -1; + + found_limit = true; + } + + if (found_info && found_limit) + goto finish; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + +finish: + if (!found_limit && !found_info) + return -ENODATA; + + if (!found_info) { + ret->referenced = (uint64_t) -1; + ret->exclusive = (uint64_t) -1; + } + + if (!found_limit) { + ret->referenced_max = (uint64_t) -1; + ret->exclusive_max = (uint64_t) -1; + } + + return 0; +} + +int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret); +} + +int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) { + uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0; + _cleanup_free_ uint64_t *qgroups = NULL; + int r, n, i; + + assert(fd >= 0); + assert(ret); + + /* This finds the "subtree" qgroup for a specific + * subvolume. This only works for subvolumes that have been + * prepared with btrfs_subvol_auto_qgroup_fd() with + * insert_intermediary_qgroup=true (or equivalent). For others + * it will return the leaf qgroup instead. The two cases may + * be distuingished via the return value, which is 1 in case + * an appropriate "subtree" qgroup was found, and 0 + * otherwise. */ + + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } + + r = btrfs_qgroupid_split(subvol_id, &level, NULL); + if (r < 0) + return r; + if (level != 0) /* Input must be a leaf qgroup */ + return -EINVAL; + + n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups); + if (n < 0) + return n; + + for (i = 0; i < n; i++) { + uint64_t id; + + r = btrfs_qgroupid_split(qgroups[i], &level, &id); + if (r < 0) + return r; + + if (id != subvol_id) + continue; + + if (lowest == (uint64_t) -1 || level < lowest) { + lowest_qgroupid = qgroups[i]; + lowest = level; + } + } + + if (lowest == (uint64_t) -1) { + /* No suitable higher-level qgroup found, let's return + * the leaf qgroup instead, and indicate that with the + * return value. */ + + *ret = subvol_id; + return 0; + } + + *ret = lowest_qgroupid; + return 1; +} + +int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) { + uint64_t qgroupid; + int r; + + assert(fd >= 0); + assert(ret); + + /* This determines the quota data of the qgroup with the + * lowest level, that shares the id part with the specified + * subvolume. This is useful for determining the quota data + * for entire subvolume subtrees, as long as the subtrees have + * been set up with btrfs_qgroup_subvol_auto_fd() or in a + * compatible way */ + + r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid); + if (r < 0) + return r; + + return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret); +} + +int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret); +} + +int btrfs_defrag_fd(int fd) { + int r; + + assert(fd >= 0); + + r = fd_verify_regular(fd); + if (r < 0) + return r; + + if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0) + return -errno; + + return 0; +} + +int btrfs_defrag(const char *p) { + _cleanup_close_ int fd = -1; + + fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_defrag_fd(fd); +} + +int btrfs_quota_enable_fd(int fd, bool b) { + struct btrfs_ioctl_quota_ctl_args args = { + .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE, + }; + int r; + + assert(fd >= 0); + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0) + return -errno; + + return 0; +} + +int btrfs_quota_enable(const char *path, bool b) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_quota_enable_fd(fd, b); +} + +int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) { + + struct btrfs_ioctl_qgroup_limit_args args = { + .lim.max_rfer = referenced_max, + .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER, + }; + unsigned c; + int r; + + assert(fd >= 0); + + if (qgroupid == 0) { + r = btrfs_subvol_get_id_fd(fd, &qgroupid); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.qgroupid = qgroupid; + + for (c = 0;; c++) { + if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) { + + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + + return -errno; + } + + break; + } + + return 0; +} + +int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max); +} + +int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) { + uint64_t qgroupid; + int r; + + assert(fd >= 0); + + r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid); + if (r < 0) + return r; + + return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max); +} + +int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max); +} + +int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) { + assert(ret); + + if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT))) + return -EINVAL; + + if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT)) + return -EINVAL; + + *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id; + return 0; +} + +int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) { + assert(level || id); + + if (level) + *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; + + if (id) + *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1); + + return 0; +} + +static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) { + + struct btrfs_ioctl_qgroup_create_args args = { + .create = b, + .qgroupid = qgroupid, + }; + unsigned c; + int r; + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (r == 0) + return -ENOTTY; + + for (c = 0;; c++) { + if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) { + + /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */ + if (errno == EINVAL) + return -ENOPROTOOPT; + + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + + return -errno; + } + + break; + } + + return 0; +} + +int btrfs_qgroup_create(int fd, uint64_t qgroupid) { + return qgroup_create_or_destroy(fd, true, qgroupid); +} + +int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) { + return qgroup_create_or_destroy(fd, false, qgroupid); +} + +int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) { + _cleanup_free_ uint64_t *qgroups = NULL; + uint64_t subvol_id; + int i, n, r; + + /* Destroys the specified qgroup, but unassigns it from all + * its parents first. Also, it recursively destroys all + * qgroups it is assigned to that have the same id part of the + * qgroupid as the specified group. */ + + r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id); + if (r < 0) + return r; + + n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups); + if (n < 0) + return n; + + for (i = 0; i < n; i++) { + uint64_t id; + + r = btrfs_qgroupid_split(qgroups[i], NULL, &id); + if (r < 0) + return r; + + r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]); + if (r < 0) + return r; + + if (id != subvol_id) + continue; + + /* The parent qgroupid shares the same id part with + * us? If so, destroy it too. */ + + (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]); + } + + return btrfs_qgroup_destroy(fd, qgroupid); +} + +int btrfs_quota_scan_start(int fd) { + struct btrfs_ioctl_quota_rescan_args args = {}; + + assert(fd >= 0); + + if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0) + return -errno; + + return 0; +} + +int btrfs_quota_scan_wait(int fd) { + assert(fd >= 0); + + if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0) + return -errno; + + return 0; +} + +int btrfs_quota_scan_ongoing(int fd) { + struct btrfs_ioctl_quota_rescan_args args = {}; + + assert(fd >= 0); + + if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0) + return -errno; + + return !!args.flags; +} + +static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) { + struct btrfs_ioctl_qgroup_assign_args args = { + .assign = b, + .src = child, + .dst = parent, + }; + unsigned c; + int r; + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (r == 0) + return -ENOTTY; + + for (c = 0;; c++) { + r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args); + if (r < 0) { + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + + return -errno; + } + + if (r == 0) + return 0; + + /* If the return value is > 0, we need to request a rescan */ + + (void) btrfs_quota_scan_start(fd); + return 1; + } +} + +int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) { + return qgroup_assign_or_unassign(fd, true, child, parent); +} + +int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) { + return qgroup_assign_or_unassign(fd, false, child, parent); +} + +static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) { + struct btrfs_ioctl_search_args args = { + .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, + + .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID, + .key.max_objectid = BTRFS_LAST_FREE_OBJECTID, + + .key.min_type = BTRFS_ROOT_BACKREF_KEY, + .key.max_type = BTRFS_ROOT_BACKREF_KEY, + + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + struct btrfs_ioctl_vol_args vol_args = {}; + _cleanup_close_ int subvol_fd = -1; + struct stat st; + bool made_writable = false; + int r; + + assert(fd >= 0); + assert(subvolume); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -EINVAL; + + subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + if (subvol_fd < 0) + return -errno; + + /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY + * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return + * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we + * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a + * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case + * let's prefer ENOTTY over EPERM/EACCES though. */ + r = btrfs_is_subvol_fd(subvol_fd); + if (r < 0) + return r; + if (r == 0) /* Not a btrfs subvolume */ + return -ENOTTY; + + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id); + if (r < 0) + return r; + } + + /* First, try to remove the subvolume. If it happens to be + * already empty, this will just work. */ + strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1); + if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) { + (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */ + return 0; + } + if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY) + return -errno; + + /* OK, the subvolume is not empty, let's look for child + * subvolumes, and remove them, first */ + + args.key.min_offset = args.key.max_offset = subvol_id; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return -errno; + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + _cleanup_free_ char *p = NULL; + const struct btrfs_root_ref *ref; + struct btrfs_ioctl_ino_lookup_args ino_args; + + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->type != BTRFS_ROOT_BACKREF_KEY) + continue; + if (sh->offset != subvol_id) + continue; + + ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + + p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len)); + if (!p) + return -ENOMEM; + + zero(ino_args); + ino_args.treeid = subvol_id; + ino_args.objectid = htole64(ref->dirid); + + if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0) + return -errno; + + if (!made_writable) { + r = btrfs_subvol_set_read_only_fd(subvol_fd, false); + if (r < 0) + return r; + + made_writable = true; + } + + if (isempty(ino_args.name)) + /* Subvolume is in the top-level + * directory of the subvolume. */ + r = subvol_remove_children(subvol_fd, p, sh->objectid, flags); + else { + _cleanup_close_ int child_fd = -1; + + /* Subvolume is somewhere further down, + * hence we need to open the + * containing directory first */ + + child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + if (child_fd < 0) + return -errno; + + r = subvol_remove_children(child_fd, p, sh->objectid, flags); + } + if (r < 0) + return r; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + + /* OK, the child subvolumes should all be gone now, let's try + * again to remove the subvolume */ + if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0) + return -errno; + + (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); + return 0; +} + +int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) { + _cleanup_close_ int fd = -1; + const char *subvolume; + int r; + + assert(path); + + r = extract_subvolume_name(path, &subvolume); + if (r < 0) + return r; + + fd = open_parent(path, O_CLOEXEC, 0); + if (fd < 0) + return fd; + + return subvol_remove_children(fd, subvolume, 0, flags); +} + +int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) { + return subvol_remove_children(fd, subvolume, 0, flags); +} + +int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) { + + struct btrfs_ioctl_search_args args = { + /* Tree of quota items */ + .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID, + + /* The object ID is always 0 */ + .key.min_objectid = 0, + .key.max_objectid = 0, + + /* Look precisely for the quota items */ + .key.min_type = BTRFS_QGROUP_LIMIT_KEY, + .key.max_type = BTRFS_QGROUP_LIMIT_KEY, + + /* For our qgroup */ + .key.min_offset = old_qgroupid, + .key.max_offset = old_qgroupid, + + /* No restrictions on the other components */ + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + int r; + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) { + if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */ + break; + + return -errno; + } + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + struct btrfs_ioctl_qgroup_limit_args qargs; + unsigned c; + + /* Make sure we start the next search at least from this entry */ + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->objectid != 0) + continue; + if (sh->type != BTRFS_QGROUP_LIMIT_KEY) + continue; + if (sh->offset != old_qgroupid) + continue; + + /* We found the entry, now copy things over. */ + + qargs = (struct btrfs_ioctl_qgroup_limit_args) { + .qgroupid = new_qgroupid, + + .lim.max_rfer = le64toh(qli->max_rfer), + .lim.max_excl = le64toh(qli->max_excl), + .lim.rsv_rfer = le64toh(qli->rsv_rfer), + .lim.rsv_excl = le64toh(qli->rsv_excl), + + .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER| + BTRFS_QGROUP_LIMIT_MAX_EXCL| + BTRFS_QGROUP_LIMIT_RSV_RFER| + BTRFS_QGROUP_LIMIT_RSV_EXCL), + }; + + for (c = 0;; c++) { + if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) { + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + return -errno; + } + + break; + } + + return 1; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + + return 0; +} + +static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) { + _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL; + bool copy_from_parent = false, insert_intermediary_qgroup = false; + int n_old_qgroups, n_old_parent_qgroups, r, i; + uint64_t old_parent_id; + + assert(fd >= 0); + + /* Copies a reduced form of quota information from the old to + * the new subvolume. */ + + n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups); + if (n_old_qgroups <= 0) /* Nothing to copy */ + return n_old_qgroups; + + r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id); + if (r == -ENXIO) + /* We have no parent, hence nothing to copy. */ + n_old_parent_qgroups = 0; + else if (r < 0) + return r; + else { + n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups); + if (n_old_parent_qgroups < 0) + return n_old_parent_qgroups; + } + + for (i = 0; i < n_old_qgroups; i++) { + uint64_t id; + int j; + + r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id); + if (r < 0) + return r; + + if (id == old_subvol_id) { + /* The old subvolume was member of a qgroup + * that had the same id, but a different level + * as it self. Let's set up something similar + * in the destination. */ + insert_intermediary_qgroup = true; + break; + } + + for (j = 0; j < n_old_parent_qgroups; j++) + if (old_parent_qgroups[j] == old_qgroups[i]) { + /* The old subvolume shared a common + * parent qgroup with its parent + * subvolume. Let's set up something + * similar in the destination. */ + copy_from_parent = true; + } + } + + if (!insert_intermediary_qgroup && !copy_from_parent) + return 0; + + return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup); +} + +static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) { + uint64_t old_subtree_qgroup, new_subtree_qgroup; + bool changed; + int r; + + /* First copy the leaf limits */ + r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol); + if (r < 0) + return r; + changed = r > 0; + + /* Then, try to copy the subtree limits, if there are any. */ + r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup); + if (r < 0) + return r; + if (r == 0) + return changed; + + r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup); + if (r < 0) + return r; + if (r == 0) + return changed; + + r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup); + if (r != 0) + return r; + + return changed; +} + +static int subvol_snapshot_children( + int old_fd, + int new_fd, + const char *subvolume, + uint64_t old_subvol_id, + BtrfsSnapshotFlags flags) { + + struct btrfs_ioctl_search_args args = { + .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, + + .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID, + .key.max_objectid = BTRFS_LAST_FREE_OBJECTID, + + .key.min_type = BTRFS_ROOT_BACKREF_KEY, + .key.max_type = BTRFS_ROOT_BACKREF_KEY, + + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + struct btrfs_ioctl_vol_args_v2 vol_args = { + .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0, + .fd = old_fd, + }; + _cleanup_close_ int subvolume_fd = -1; + uint64_t new_subvol_id; + int r; + + assert(old_fd >= 0); + assert(new_fd >= 0); + assert(subvolume); + + strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1); + + if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0) + return -errno; + + if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) && + !(flags & BTRFS_SNAPSHOT_QUOTA)) + return 0; + + if (old_subvol_id == 0) { + r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id); + if (r < 0) + return r; + } + + r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id); + if (r < 0) + return r; + + if (flags & BTRFS_SNAPSHOT_QUOTA) + (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id); + + if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) { + + if (flags & BTRFS_SNAPSHOT_QUOTA) + (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id); + + return 0; + } + + args.key.min_offset = args.key.max_offset = old_subvol_id; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return -errno; + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL; + struct btrfs_ioctl_ino_lookup_args ino_args; + const struct btrfs_root_ref *ref; + _cleanup_close_ int old_child_fd = -1, new_child_fd = -1; + + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->type != BTRFS_ROOT_BACKREF_KEY) + continue; + + /* Avoid finding the source subvolume a second + * time */ + if (sh->offset != old_subvol_id) + continue; + + /* Avoid running into loops if the new + * subvolume is below the old one. */ + if (sh->objectid == new_subvol_id) + continue; + + ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len)); + if (!p) + return -ENOMEM; + + zero(ino_args); + ino_args.treeid = old_subvol_id; + ino_args.objectid = htole64(ref->dirid); + + if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0) + return -errno; + + /* The kernel returns an empty name if the + * subvolume is in the top-level directory, + * and otherwise appends a slash, so that we + * can just concatenate easily here, without + * adding a slash. */ + c = strappend(ino_args.name, p); + if (!c) + return -ENOMEM; + + old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + if (old_child_fd < 0) + return -errno; + + np = strjoin(subvolume, "/", ino_args.name); + if (!np) + return -ENOMEM; + + new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + if (new_child_fd < 0) + return -errno; + + if (flags & BTRFS_SNAPSHOT_READ_ONLY) { + /* If the snapshot is read-only we + * need to mark it writable + * temporarily, to put the subsnapshot + * into place. */ + + if (subvolume_fd < 0) { + subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + if (subvolume_fd < 0) + return -errno; + } + + r = btrfs_subvol_set_read_only_fd(subvolume_fd, false); + if (r < 0) + return r; + } + + /* When btrfs clones the subvolumes, child + * subvolumes appear as empty directories. Remove + * them, so that we can create a new snapshot + * in their place */ + if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) { + int k = -errno; + + if (flags & BTRFS_SNAPSHOT_READ_ONLY) + (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true); + + return k; + } + + r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY); + + /* Restore the readonly flag */ + if (flags & BTRFS_SNAPSHOT_READ_ONLY) { + int k; + + k = btrfs_subvol_set_read_only_fd(subvolume_fd, true); + if (r >= 0 && k < 0) + return k; + } + + if (r < 0) + return r; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + + if (flags & BTRFS_SNAPSHOT_QUOTA) + (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id); + + return 0; +} + +int btrfs_subvol_snapshot_fd_full( + int old_fd, + const char *new_path, + BtrfsSnapshotFlags flags, + copy_progress_path_t progress_path, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + _cleanup_close_ int new_fd = -1; + const char *subvolume; + int r; + + assert(old_fd >= 0); + assert(new_path); + + r = btrfs_is_subvol_fd(old_fd); + if (r < 0) + return r; + if (r == 0) { + bool plain_directory = false; + + /* If the source isn't a proper subvolume, fail unless fallback is requested */ + if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY)) + return -EISDIR; + + r = btrfs_subvol_make(new_path); + if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) { + /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */ + if (mkdir(new_path, 0755) < 0) + return -errno; + + plain_directory = true; + } else if (r < 0) + return r; + + r = copy_directory_fd_full(old_fd, new_path, COPY_MERGE|COPY_REFLINK, progress_path, progress_bytes, userdata); + if (r < 0) + goto fallback_fail; + + if (flags & BTRFS_SNAPSHOT_READ_ONLY) { + + if (plain_directory) { + /* Plain directories have no recursive read-only flag, but something pretty close to + * it: the IMMUTABLE bit. Let's use this here, if this is requested. */ + + if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE) + (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL); + } else { + r = btrfs_subvol_set_read_only(new_path, true); + if (r < 0) + goto fallback_fail; + } + } + + return 0; + + fallback_fail: + (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); + return r; + } + + r = extract_subvolume_name(new_path, &subvolume); + if (r < 0) + return r; + + new_fd = open_parent(new_path, O_CLOEXEC, 0); + if (new_fd < 0) + return new_fd; + + return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags); +} + +int btrfs_subvol_snapshot_full( + const char *old_path, + const char *new_path, + BtrfsSnapshotFlags flags, + copy_progress_path_t progress_path, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + _cleanup_close_ int old_fd = -1; + + assert(old_path); + assert(new_path); + + old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (old_fd < 0) + return -errno; + + return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata); +} + +int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) { + + struct btrfs_ioctl_search_args args = { + /* Tree of quota items */ + .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID, + + /* Look precisely for the quota relation items */ + .key.min_type = BTRFS_QGROUP_RELATION_KEY, + .key.max_type = BTRFS_QGROUP_RELATION_KEY, + + /* No restrictions on the other components */ + .key.min_offset = 0, + .key.max_offset = (uint64_t) -1, + + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + _cleanup_free_ uint64_t *items = NULL; + size_t n_items = 0, n_allocated = 0; + int r; + + assert(fd >= 0); + assert(ret); + + if (qgroupid == 0) { + r = btrfs_subvol_get_id_fd(fd, &qgroupid); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.key.min_objectid = args.key.max_objectid = qgroupid; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) { + if (errno == ENOENT) /* quota tree missing: quota is disabled */ + break; + + return -errno; + } + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + + /* Make sure we start the next search at least from this entry */ + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->type != BTRFS_QGROUP_RELATION_KEY) + continue; + if (sh->offset < sh->objectid) + continue; + if (sh->objectid != qgroupid) + continue; + + if (!GREEDY_REALLOC(items, n_allocated, n_items+1)) + return -ENOMEM; + + items[n_items++] = sh->offset; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + + if (n_items <= 0) { + *ret = NULL; + return 0; + } + + *ret = TAKE_PTR(items); + + return (int) n_items; +} + +int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) { + _cleanup_free_ uint64_t *qgroups = NULL; + uint64_t parent_subvol; + bool changed = false; + int n = 0, r; + + assert(fd >= 0); + + /* + * Sets up the specified subvolume's qgroup automatically in + * one of two ways: + * + * If insert_intermediary_qgroup is false, the subvolume's + * leaf qgroup will be assigned to the same parent qgroups as + * the subvolume's parent subvolume. + * + * If insert_intermediary_qgroup is true a new intermediary + * higher-level qgroup is created, with a higher level number, + * but reusing the id of the subvolume. The level number is + * picked as one smaller than the lowest level qgroup the + * parent subvolume is a member of. If the parent subvolume's + * leaf qgroup is assigned to no higher-level qgroup a new + * qgroup of level 255 is created instead. Either way, the new + * qgroup is then assigned to the parent's higher-level + * qgroup, and the subvolume itself is assigned to it. + * + * If the subvolume is already assigned to a higher level + * qgroup, no operation is executed. + * + * Effectively this means: regardless if + * insert_intermediary_qgroup is true or not, after this + * function is invoked the subvolume will be accounted within + * the same qgroups as the parent. However, if it is true, it + * will also get its own higher-level qgroup, which may in + * turn be used by subvolumes created beneath this subvolume + * later on. + * + * This hence defines a simple default qgroup setup for + * subvolumes, as long as this function is invoked on each + * created subvolume: each subvolume is always accounting + * together with its immediate parents. Optionally, if + * insert_intermediary_qgroup is true, it will also get a + * qgroup that then includes all its own child subvolumes. + */ + + if (subvol_id == 0) { + r = btrfs_is_subvol_fd(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } + + n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups); + if (n < 0) + return n; + if (n > 0) /* already parent qgroups set up, let's bail */ + return 0; + + qgroups = mfree(qgroups); + + r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol); + if (r == -ENXIO) + /* No parent, hence no qgroup memberships */ + n = 0; + else if (r < 0) + return r; + else { + n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups); + if (n < 0) + return n; + } + + if (insert_intermediary_qgroup) { + uint64_t lowest = 256, new_qgroupid; + bool created = false; + int i; + + /* Determine the lowest qgroup that the parent + * subvolume is assigned to. */ + + for (i = 0; i < n; i++) { + uint64_t level; + + r = btrfs_qgroupid_split(qgroups[i], &level, NULL); + if (r < 0) + return r; + + if (level < lowest) + lowest = level; + } + + if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */ + return -EBUSY; + + r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid); + if (r < 0) + return r; + + /* Create the new intermediary group, unless it already exists */ + r = btrfs_qgroup_create(fd, new_qgroupid); + if (r < 0 && r != -EEXIST) + return r; + if (r >= 0) + changed = created = true; + + for (i = 0; i < n; i++) { + r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]); + if (r < 0 && r != -EEXIST) { + if (created) + (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid); + + return r; + } + if (r >= 0) + changed = true; + } + + r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid); + if (r < 0 && r != -EEXIST) { + if (created) + (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid); + return r; + } + if (r >= 0) + changed = true; + + } else { + int i; + + /* Assign our subvolume to all the same qgroups as the parent */ + + for (i = 0; i < n; i++) { + r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]); + if (r < 0 && r != -EEXIST) + return r; + if (r >= 0) + changed = true; + } + } + + return changed; +} + +int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (fd < 0) + return -errno; + + return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup); +} + +int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) { + + struct btrfs_ioctl_search_args args = { + /* Tree of tree roots */ + .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, + + /* Look precisely for the subvolume items */ + .key.min_type = BTRFS_ROOT_BACKREF_KEY, + .key.max_type = BTRFS_ROOT_BACKREF_KEY, + + /* No restrictions on the other components */ + .key.min_offset = 0, + .key.max_offset = (uint64_t) -1, + + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + int r; + + assert(fd >= 0); + assert(ret); + + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.key.min_objectid = args.key.max_objectid = subvol_id; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return negative_errno(); + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + + if (sh->type != BTRFS_ROOT_BACKREF_KEY) + continue; + if (sh->objectid != subvol_id) + continue; + + *ret = sh->offset; + return 0; + } + } + + return -ENXIO; +} diff --git a/src/basic/btrfs-util.h b/src/basic/btrfs-util.h new file mode 100644 index 0000000..7d848a7 --- /dev/null +++ b/src/basic/btrfs-util.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +#include "sd-id128.h" + +#include "copy.h" +#include "time-util.h" + +typedef struct BtrfsSubvolInfo { + uint64_t subvol_id; + usec_t otime; + + sd_id128_t uuid; + sd_id128_t parent_uuid; + + bool read_only; +} BtrfsSubvolInfo; + +typedef struct BtrfsQuotaInfo { + uint64_t referenced; + uint64_t exclusive; + uint64_t referenced_max; + uint64_t exclusive_max; +} BtrfsQuotaInfo; + +typedef enum BtrfsSnapshotFlags { + BTRFS_SNAPSHOT_FALLBACK_COPY = 1 << 0, /* If the source isn't a subvolume, reflink everything */ + BTRFS_SNAPSHOT_READ_ONLY = 1 << 1, + BTRFS_SNAPSHOT_RECURSIVE = 1 << 2, + BTRFS_SNAPSHOT_QUOTA = 1 << 3, + BTRFS_SNAPSHOT_FALLBACK_DIRECTORY = 1 << 4, /* If the destination doesn't support subvolumes, reflink/copy instead */ + BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE = 1 << 5, /* When we can't create a subvolume, use the FS_IMMUTABLE attribute for indicating read-only */ +} BtrfsSnapshotFlags; + +typedef enum BtrfsRemoveFlags { + BTRFS_REMOVE_RECURSIVE = 1 << 0, + BTRFS_REMOVE_QUOTA = 1 << 1, +} BtrfsRemoveFlags; + +int btrfs_is_filesystem(int fd); + +int btrfs_is_subvol_fd(int fd); +int btrfs_is_subvol(const char *path); + +int btrfs_reflink(int infd, int outfd); +int btrfs_clone_range(int infd, uint64_t in_offset, int ofd, uint64_t out_offset, uint64_t sz); + +int btrfs_get_block_device_fd(int fd, dev_t *dev); +int btrfs_get_block_device(const char *path, dev_t *dev); + +int btrfs_defrag_fd(int fd); +int btrfs_defrag(const char *p); + +int btrfs_quota_enable_fd(int fd, bool b); +int btrfs_quota_enable(const char *path, bool b); + +int btrfs_quota_scan_start(int fd); +int btrfs_quota_scan_wait(int fd); +int btrfs_quota_scan_ongoing(int fd); + +int btrfs_subvol_make(const char *path); +int btrfs_subvol_make_fd(int fd, const char *subvolume); + +int btrfs_subvol_snapshot_fd_full(int old_fd, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata); +static inline int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) { + return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, NULL, NULL, NULL); +} + +int btrfs_subvol_snapshot_full(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata); +static inline int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) { + return btrfs_subvol_snapshot_full(old_path, new_path, flags, NULL, NULL, NULL); +} + +int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags); +int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags); + +int btrfs_subvol_set_read_only_fd(int fd, bool b); +int btrfs_subvol_set_read_only(const char *path, bool b); +int btrfs_subvol_get_read_only_fd(int fd); + +int btrfs_subvol_get_id(int fd, const char *subvolume, uint64_t *ret); +int btrfs_subvol_get_id_fd(int fd, uint64_t *ret); +int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret); + +int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *info); + +int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret); + +int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *quota); +int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *quota); + +int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max); +int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max); + +int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool new_qgroup); +int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup); + +int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret); +int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id); + +int btrfs_qgroup_create(int fd, uint64_t qgroupid); +int btrfs_qgroup_destroy(int fd, uint64_t qgroupid); +int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid); + +int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max); +int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max); + +int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid); + +int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent); +int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent); + +int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret); + +int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *quota); +int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *quota); diff --git a/src/basic/build.h b/src/basic/build.h new file mode 100644 index 0000000..7a59059 --- /dev/null +++ b/src/basic/build.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "version.h" + +#if HAVE_PAM +#define _PAM_FEATURE_ "+PAM" +#else +#define _PAM_FEATURE_ "-PAM" +#endif + +#if HAVE_AUDIT +#define _AUDIT_FEATURE_ "+AUDIT" +#else +#define _AUDIT_FEATURE_ "-AUDIT" +#endif + +#if HAVE_SELINUX +#define _SELINUX_FEATURE_ "+SELINUX" +#else +#define _SELINUX_FEATURE_ "-SELINUX" +#endif + +#if HAVE_APPARMOR +#define _APPARMOR_FEATURE_ "+APPARMOR" +#else +#define _APPARMOR_FEATURE_ "-APPARMOR" +#endif + +#if ENABLE_IMA +#define _IMA_FEATURE_ "+IMA" +#else +#define _IMA_FEATURE_ "-IMA" +#endif + +#if ENABLE_SMACK +#define _SMACK_FEATURE_ "+SMACK" +#else +#define _SMACK_FEATURE_ "-SMACK" +#endif + +#if HAVE_SYSV_COMPAT +#define _SYSVINIT_FEATURE_ "+SYSVINIT" +#else +#define _SYSVINIT_FEATURE_ "-SYSVINIT" +#endif + +#if ENABLE_UTMP +#define _UTMP_FEATURE_ "+UTMP" +#else +#define _UTMP_FEATURE_ "-UTMP" +#endif + +#if HAVE_LIBCRYPTSETUP +#define _LIBCRYPTSETUP_FEATURE_ "+LIBCRYPTSETUP" +#else +#define _LIBCRYPTSETUP_FEATURE_ "-LIBCRYPTSETUP" +#endif + +#if HAVE_GCRYPT +#define _GCRYPT_FEATURE_ "+GCRYPT" +#else +#define _GCRYPT_FEATURE_ "-GCRYPT" +#endif + +#if HAVE_GNUTLS +#define _GNUTLS_FEATURE_ "+GNUTLS" +#else +#define _GNUTLS_FEATURE_ "-GNUTLS" +#endif + +#if HAVE_ACL +#define _ACL_FEATURE_ "+ACL" +#else +#define _ACL_FEATURE_ "-ACL" +#endif + +#if HAVE_XZ +#define _XZ_FEATURE_ "+XZ" +#else +#define _XZ_FEATURE_ "-XZ" +#endif + +#if HAVE_LZ4 +#define _LZ4_FEATURE_ "+LZ4" +#else +#define _LZ4_FEATURE_ "-LZ4" +#endif + +#if HAVE_SECCOMP +#define _SECCOMP_FEATURE_ "+SECCOMP" +#else +#define _SECCOMP_FEATURE_ "-SECCOMP" +#endif + +#if HAVE_BLKID +#define _BLKID_FEATURE_ "+BLKID" +#else +#define _BLKID_FEATURE_ "-BLKID" +#endif + +#if HAVE_ELFUTILS +#define _ELFUTILS_FEATURE_ "+ELFUTILS" +#else +#define _ELFUTILS_FEATURE_ "-ELFUTILS" +#endif + +#if HAVE_KMOD +#define _KMOD_FEATURE_ "+KMOD" +#else +#define _KMOD_FEATURE_ "-KMOD" +#endif + +#if HAVE_LIBIDN2 +#define _IDN2_FEATURE_ "+IDN2" +#else +#define _IDN2_FEATURE_ "-IDN2" +#endif + +#if HAVE_LIBIDN +#define _IDN_FEATURE_ "+IDN" +#else +#define _IDN_FEATURE_ "-IDN" +#endif + +#if HAVE_PCRE2 +#define _PCRE2_FEATURE_ "+PCRE2" +#else +#define _PCRE2_FEATURE_ "-PCRE2" +#endif + +#define _CGROUP_HIEARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME + +#define SYSTEMD_FEATURES \ + _PAM_FEATURE_ " " \ + _AUDIT_FEATURE_ " " \ + _SELINUX_FEATURE_ " " \ + _IMA_FEATURE_ " " \ + _APPARMOR_FEATURE_ " " \ + _SMACK_FEATURE_ " " \ + _SYSVINIT_FEATURE_ " " \ + _UTMP_FEATURE_ " " \ + _LIBCRYPTSETUP_FEATURE_ " " \ + _GCRYPT_FEATURE_ " " \ + _GNUTLS_FEATURE_ " " \ + _ACL_FEATURE_ " " \ + _XZ_FEATURE_ " " \ + _LZ4_FEATURE_ " " \ + _SECCOMP_FEATURE_ " " \ + _BLKID_FEATURE_ " " \ + _ELFUTILS_FEATURE_ " " \ + _KMOD_FEATURE_ " " \ + _IDN2_FEATURE_ " " \ + _IDN_FEATURE_ " " \ + _PCRE2_FEATURE_ " " \ + _CGROUP_HIEARCHY_ diff --git a/src/basic/bus-label.c b/src/basic/bus-label.c new file mode 100644 index 0000000..1613cf7 --- /dev/null +++ b/src/basic/bus-label.c @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdlib.h> + +#include "alloc-util.h" +#include "bus-label.h" +#include "hexdecoct.h" +#include "macro.h" + +char *bus_label_escape(const char *s) { + char *r, *t; + const char *f; + + assert_return(s, NULL); + + /* Escapes all chars that D-Bus' object path cannot deal + * with. Can be reversed with bus_path_unescape(). We special + * case the empty string. */ + + if (*s == 0) + return strdup("_"); + + r = new(char, strlen(s)*3 + 1); + if (!r) + return NULL; + + for (f = s, t = r; *f; f++) { + + /* Escape everything that is not a-zA-Z0-9. We also + * escape 0-9 if it's the first character */ + + if (!(*f >= 'A' && *f <= 'Z') && + !(*f >= 'a' && *f <= 'z') && + !(f > s && *f >= '0' && *f <= '9')) { + *(t++) = '_'; + *(t++) = hexchar(*f >> 4); + *(t++) = hexchar(*f); + } else + *(t++) = *f; + } + + *t = 0; + + return r; +} + +char *bus_label_unescape_n(const char *f, size_t l) { + char *r, *t; + size_t i; + + assert_return(f, NULL); + + /* Special case for the empty string */ + if (l == 1 && *f == '_') + return strdup(""); + + r = new(char, l + 1); + if (!r) + return NULL; + + for (i = 0, t = r; i < l; ++i) { + if (f[i] == '_') { + int a, b; + + if (l - i < 3 || + (a = unhexchar(f[i + 1])) < 0 || + (b = unhexchar(f[i + 2])) < 0) { + /* Invalid escape code, let's take it literal then */ + *(t++) = '_'; + } else { + *(t++) = (char) ((a << 4) | b); + i += 2; + } + } else + *(t++) = f[i]; + } + + *t = 0; + + return r; +} diff --git a/src/basic/bus-label.h b/src/basic/bus-label.h new file mode 100644 index 0000000..664cfaf --- /dev/null +++ b/src/basic/bus-label.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "string-util.h" + +char *bus_label_escape(const char *s); +char *bus_label_unescape_n(const char *f, size_t l); + +static inline char *bus_label_unescape(const char *f) { + return bus_label_unescape_n(f, strlen_ptr(f)); +} diff --git a/src/basic/cap-list.c b/src/basic/cap-list.c new file mode 100644 index 0000000..29a17d9 --- /dev/null +++ b/src/basic/cap-list.c @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <string.h> + +#include "alloc-util.h" +#include "capability-util.h" +#include "cap-list.h" +#include "extract-word.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "util.h" + +static const struct capability_name* lookup_capability(register const char *str, register GPERF_LEN_TYPE len); + +#include "cap-from-name.h" +#include "cap-to-name.h" + +const char *capability_to_name(int id) { + + if (id < 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(capability_names)) + return NULL; + + return capability_names[id]; +} + +int capability_from_name(const char *name) { + const struct capability_name *sc; + int r, i; + + assert(name); + + /* Try to parse numeric capability */ + r = safe_atoi(name, &i); + if (r >= 0) { + if (i >= 0 && (size_t) i < ELEMENTSOF(capability_names)) + return i; + else + return -EINVAL; + } + + /* Try to parse string capability */ + sc = lookup_capability(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +int capability_list_length(void) { + return (int) ELEMENTSOF(capability_names); +} + +int capability_set_to_string_alloc(uint64_t set, char **s) { + _cleanup_free_ char *str = NULL; + unsigned long i; + size_t allocated = 0, n = 0; + + assert(s); + + for (i = 0; i < cap_last_cap(); i++) + if (set & (UINT64_C(1) << i)) { + const char *p; + size_t add; + + p = capability_to_name(i); + if (!p) + return -EINVAL; + + add = strlen(p); + + if (!GREEDY_REALLOC(str, allocated, n + add + 2)) + return -ENOMEM; + + strcpy(mempcpy(str + n, p, add), " "); + n += add + 1; + } + + if (!GREEDY_REALLOC(str, allocated, n + 1)) + return -ENOMEM; + + str[n > 0 ? n - 1 : 0] = '\0'; /* truncate the last space, if it's there */ + + *s = TAKE_PTR(str); + + return 0; +} + +int capability_set_from_string(const char *s, uint64_t *set) { + uint64_t val = 0; + const char *p; + + assert(set); + + for (p = s;;) { + _cleanup_free_ char *word = NULL; + int r; + + r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES); + if (r == -ENOMEM) + return r; + if (r <= 0) + break; + + r = capability_from_name(word); + if (r < 0) + continue; + + val |= ((uint64_t) UINT64_C(1)) << (uint64_t) r; + } + + *set = val; + + return 0; +} diff --git a/src/basic/cap-list.h b/src/basic/cap-list.h new file mode 100644 index 0000000..ab41924 --- /dev/null +++ b/src/basic/cap-list.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +const char *capability_to_name(int id); +int capability_from_name(const char *name); +int capability_list_length(void); + +int capability_set_to_string_alloc(uint64_t set, char **s); +int capability_set_from_string(const char *s, uint64_t *set); diff --git a/src/basic/cap-to-name.awk b/src/basic/cap-to-name.awk new file mode 100644 index 0000000..402a782 --- /dev/null +++ b/src/basic/cap-to-name.awk @@ -0,0 +1,9 @@ +BEGIN{ + print "static const char* const capability_names[] = { " +} +{ + printf " [%s] = \"%s\",\n", $1, tolower($1) +} +END{ + print "};" +} diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c new file mode 100644 index 0000000..b944ee6 --- /dev/null +++ b/src/basic/capability-util.c @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <grp.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/capability.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "capability-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "missing_prctl.h" +#include "parse-util.h" +#include "user-util.h" +#include "util.h" + +int have_effective_cap(int value) { + _cleanup_cap_free_ cap_t cap; + cap_flag_value_t fv; + + cap = cap_get_proc(); + if (!cap) + return -errno; + + if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0) + return -errno; + else + return fv == CAP_SET; +} + +unsigned long cap_last_cap(void) { + static thread_local unsigned long saved; + static thread_local bool valid = false; + _cleanup_free_ char *content = NULL; + unsigned long p = 0; + int r; + + if (valid) + return saved; + + /* available since linux-3.2 */ + r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content); + if (r >= 0) { + r = safe_atolu(content, &p); + if (r >= 0) { + saved = p; + valid = true; + return p; + } + } + + /* fall back to syscall-probing for pre linux-3.2 */ + p = (unsigned long) CAP_LAST_CAP; + + if (prctl(PR_CAPBSET_READ, p) < 0) { + + /* Hmm, look downwards, until we find one that + * works */ + for (p--; p > 0; p --) + if (prctl(PR_CAPBSET_READ, p) >= 0) + break; + + } else { + + /* Hmm, look upwards, until we find one that doesn't + * work */ + for (;; p++) + if (prctl(PR_CAPBSET_READ, p+1) < 0) + break; + } + + saved = p; + valid = true; + + return p; +} + +int capability_update_inherited_set(cap_t caps, uint64_t set) { + unsigned long i; + + /* Add capabilities in the set to the inherited caps. Do not apply + * them yet. */ + + for (i = 0; i < cap_last_cap(); i++) { + + if (set & (UINT64_C(1) << i)) { + cap_value_t v; + + v = (cap_value_t) i; + + /* Make the capability inheritable. */ + if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, CAP_SET) < 0) + return -errno; + } + } + + return 0; +} + +int capability_ambient_set_apply(uint64_t set, bool also_inherit) { + unsigned long i; + _cleanup_cap_free_ cap_t caps = NULL; + + /* Add the capabilities to the ambient set. */ + + if (also_inherit) { + int r; + caps = cap_get_proc(); + if (!caps) + return -errno; + + r = capability_update_inherited_set(caps, set); + if (r < 0) + return -errno; + + if (cap_set_proc(caps) < 0) + return -errno; + } + + for (i = 0; i < cap_last_cap(); i++) { + + if (set & (UINT64_C(1) << i)) { + + /* Add the capability to the ambient set. */ + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0) + return -errno; + } + } + + return 0; +} + +int capability_bounding_set_drop(uint64_t keep, bool right_now) { + _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL; + cap_flag_value_t fv; + unsigned long i; + int r; + + /* If we are run as PID 1 we will lack CAP_SETPCAP by default + * in the effective set (yes, the kernel drops that when + * executing init!), so get it back temporarily so that we can + * call PR_CAPBSET_DROP. */ + + before_cap = cap_get_proc(); + if (!before_cap) + return -errno; + + if (cap_get_flag(before_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) + return -errno; + + if (fv != CAP_SET) { + _cleanup_cap_free_ cap_t temp_cap = NULL; + static const cap_value_t v = CAP_SETPCAP; + + temp_cap = cap_dup(before_cap); + if (!temp_cap) + return -errno; + + if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) + return -errno; + + if (cap_set_proc(temp_cap) < 0) + log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m"); + + /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means + * we'll fail later, when we actually intend to drop some capabilities. */ + } + + after_cap = cap_dup(before_cap); + if (!after_cap) + return -errno; + + for (i = 0; i <= cap_last_cap(); i++) { + cap_value_t v; + + if ((keep & (UINT64_C(1) << i))) + continue; + + /* Drop it from the bounding set */ + if (prctl(PR_CAPBSET_DROP, i) < 0) { + r = -errno; + + /* If dropping the capability failed, let's see if we didn't have it in the first place. If so, + * continue anyway, as dropping a capability we didn't have in the first place doesn't really + * matter anyway. */ + if (prctl(PR_CAPBSET_READ, i) != 0) + goto finish; + } + v = (cap_value_t) i; + + /* Also drop it from the inheritable set, so + * that anything we exec() loses the + * capability for good. */ + if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) { + r = -errno; + goto finish; + } + + /* If we shall apply this right now drop it + * also from our own capability sets. */ + if (right_now) { + if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 || + cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) { + r = -errno; + goto finish; + } + } + } + + r = 0; + +finish: + if (cap_set_proc(after_cap) < 0) { + /* If there are no actual changes anyway then let's ignore this error. */ + if (cap_compare(before_cap, after_cap) != 0) + r = -errno; + } + + return r; +} + +static int drop_from_file(const char *fn, uint64_t keep) { + _cleanup_free_ char *p = NULL; + uint64_t current, after; + uint32_t hi, lo; + int r, k; + + r = read_one_line_file(fn, &p); + if (r < 0) + return r; + + assert_cc(sizeof(hi) == sizeof(unsigned)); + assert_cc(sizeof(lo) == sizeof(unsigned)); + + k = sscanf(p, "%u %u", &lo, &hi); + if (k != 2) + return -EIO; + + current = (uint64_t) lo | ((uint64_t) hi << 32ULL); + after = current & keep; + + if (current == after) + return 0; + + lo = (unsigned) (after & 0xFFFFFFFFULL); + hi = (unsigned) ((after >> 32ULL) & 0xFFFFFFFFULL); + + return write_string_filef(fn, WRITE_STRING_FILE_CREATE, "%u %u", lo, hi); +} + +int capability_bounding_set_drop_usermode(uint64_t keep) { + int r; + + r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep); + if (r < 0) + return r; + + r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep); + if (r < 0) + return r; + + return r; +} + +int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) { + _cleanup_cap_free_ cap_t d = NULL; + unsigned i, j = 0; + int r; + + /* Unfortunately we cannot leave privilege dropping to PID 1 + * here, since we want to run as user but want to keep some + * capabilities. Since file capabilities have been introduced + * this cannot be done across exec() anymore, unless our + * binary has the capability configured in the file system, + * which we want to avoid. */ + + if (setresgid(gid, gid, gid) < 0) + return log_error_errno(errno, "Failed to change group ID: %m"); + + r = maybe_setgroups(0, NULL); + if (r < 0) + return log_error_errno(r, "Failed to drop auxiliary groups list: %m"); + + /* Ensure we keep the permitted caps across the setresuid() */ + if (prctl(PR_SET_KEEPCAPS, 1) < 0) + return log_error_errno(errno, "Failed to enable keep capabilities flag: %m"); + + if (setresuid(uid, uid, uid) < 0) + return log_error_errno(errno, "Failed to change user ID: %m"); + + if (prctl(PR_SET_KEEPCAPS, 0) < 0) + return log_error_errno(errno, "Failed to disable keep capabilities flag: %m"); + + /* Drop all caps from the bounding set, except the ones we want */ + r = capability_bounding_set_drop(keep_capabilities, true); + if (r < 0) + return log_error_errno(r, "Failed to drop capabilities: %m"); + + /* Now upgrade the permitted caps we still kept to effective caps */ + d = cap_init(); + if (!d) + return log_oom(); + + if (keep_capabilities) { + cap_value_t bits[u64log2(keep_capabilities) + 1]; + + for (i = 0; i < ELEMENTSOF(bits); i++) + if (keep_capabilities & (1ULL << i)) + bits[j++] = i; + + /* use enough bits */ + assert(i == 64 || (keep_capabilities >> i) == 0); + /* don't use too many bits */ + assert(keep_capabilities & (1ULL << (i - 1))); + + if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 || + cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0) + return log_error_errno(errno, "Failed to enable capabilities bits: %m"); + + if (cap_set_proc(d) < 0) + return log_error_errno(errno, "Failed to increase capabilities: %m"); + } + + return 0; +} + +int drop_capability(cap_value_t cv) { + _cleanup_cap_free_ cap_t tmp_cap = NULL; + + tmp_cap = cap_get_proc(); + if (!tmp_cap) + return -errno; + + if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) || + (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) || + (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0)) + return -errno; + + if (cap_set_proc(tmp_cap) < 0) + return -errno; + + return 0; +} + +bool ambient_capabilities_supported(void) { + static int cache = -1; + + if (cache >= 0) + return cache; + + /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are + * available. */ + + cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 || + !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS); + + return cache; +} + +int capability_quintet_enforce(const CapabilityQuintet *q) { + _cleanup_cap_free_ cap_t c = NULL; + int r; + + if (q->ambient != (uint64_t) -1) { + unsigned long i; + bool changed = false; + + c = cap_get_proc(); + if (!c) + return -errno; + + /* In order to raise the ambient caps set we first need to raise the matching inheritable + permitted + * cap */ + for (i = 0; i <= cap_last_cap(); i++) { + uint64_t m = UINT64_C(1) << i; + cap_value_t cv = (cap_value_t) i; + cap_flag_value_t old_value_inheritable, old_value_permitted; + + if ((q->ambient & m) == 0) + continue; + + if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0) + return -errno; + if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0) + return -errno; + + if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET) + continue; + + if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0) + return -errno; + + if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0) + return -errno; + + changed = true; + } + + if (changed) + if (cap_set_proc(c) < 0) + return -errno; + + r = capability_ambient_set_apply(q->ambient, false); + if (r < 0) + return r; + } + + if (q->inheritable != (uint64_t) -1 || q->permitted != (uint64_t) -1 || q->effective != (uint64_t) -1) { + bool changed = false; + unsigned long i; + + if (!c) { + c = cap_get_proc(); + if (!c) + return -errno; + } + + for (i = 0; i <= cap_last_cap(); i++) { + uint64_t m = UINT64_C(1) << i; + cap_value_t cv = (cap_value_t) i; + + if (q->inheritable != (uint64_t) -1) { + cap_flag_value_t old_value, new_value; + + if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) + return -errno; + + new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR; + + if (old_value != new_value) { + changed = true; + + if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0) + return -errno; + } + } + + if (q->permitted != (uint64_t) -1) { + cap_flag_value_t old_value, new_value; + + if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) + return -errno; + + new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR; + + if (old_value != new_value) { + changed = true; + + if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0) + return -errno; + } + } + + if (q->effective != (uint64_t) -1) { + cap_flag_value_t old_value, new_value; + + if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) + return -errno; + + new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR; + + if (old_value != new_value) { + changed = true; + + if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0) + return -errno; + } + } + } + + if (changed) + if (cap_set_proc(c) < 0) + return -errno; + } + + if (q->bounding != (uint64_t) -1) { + r = capability_bounding_set_drop(q->bounding, false); + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/basic/capability-util.h b/src/basic/capability-util.h new file mode 100644 index 0000000..02c7d5c --- /dev/null +++ b/src/basic/capability-util.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/capability.h> +#include <sys/types.h> + +#include "macro.h" +#include "missing_capability.h" +#include "util.h" + +#define CAP_ALL (uint64_t) -1 + +unsigned long cap_last_cap(void); +int have_effective_cap(int value); +int capability_bounding_set_drop(uint64_t keep, bool right_now); +int capability_bounding_set_drop_usermode(uint64_t keep); + +int capability_ambient_set_apply(uint64_t set, bool also_inherit); +int capability_update_inherited_set(cap_t caps, uint64_t ambient_set); + +int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities); + +int drop_capability(cap_value_t cv); + +DEFINE_TRIVIAL_CLEANUP_FUNC(cap_t, cap_free); +#define _cleanup_cap_free_ _cleanup_(cap_freep) + +static inline void cap_free_charpp(char **p) { + if (*p) + cap_free(*p); +} +#define _cleanup_cap_free_charp_ _cleanup_(cap_free_charpp) + +static inline bool cap_test_all(uint64_t caps) { + uint64_t m; + m = (UINT64_C(1) << (cap_last_cap() + 1)) - 1; + return FLAGS_SET(caps, m); +} + +bool ambient_capabilities_supported(void); + +/* Identical to linux/capability.h's CAP_TO_MASK(), but uses an unsigned 1U instead of a signed 1 for shifting left, in + * order to avoid complaints about shifting a signed int left by 31 bits, which would make it negative. */ +#define CAP_TO_MASK_CORRECTED(x) (1U << ((x) & 31U)) + +typedef struct CapabilityQuintet { + /* Stores all five types of capabilities in one go. Note that we use (uint64_t) -1 for unset here. This hence + * needs to be updated as soon as Linux learns more than 63 caps. */ + uint64_t effective; + uint64_t bounding; + uint64_t inheritable; + uint64_t permitted; + uint64_t ambient; +} CapabilityQuintet; + +assert_cc(CAP_LAST_CAP < 64); + +#define CAPABILITY_QUINTET_NULL { (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1 } + +static inline bool capability_quintet_is_set(const CapabilityQuintet *q) { + return q->effective != (uint64_t) -1 || + q->bounding != (uint64_t) -1 || + q->inheritable != (uint64_t) -1 || + q->permitted != (uint64_t) -1 || + q->ambient != (uint64_t) -1; +} + +int capability_quintet_enforce(const CapabilityQuintet *q); diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c new file mode 100644 index 0000000..8ce7ccb --- /dev/null +++ b/src/basic/cgroup-util.c @@ -0,0 +1,2933 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <ftw.h> +#include <limits.h> +#include <signal.h> +#include <stddef.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "def.h" +#include "dirent-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "log.h" +#include "login-util.h" +#include "macro.h" +#include "missing.h" +#include "mkdir.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "set.h" +#include "special.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "unit-name.h" +#include "user-util.h" + +int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) { + _cleanup_free_ char *fs = NULL; + FILE *f; + int r; + + assert(_f); + + r = cg_get_path(controller, path, "cgroup.procs", &fs); + if (r < 0) + return r; + + f = fopen(fs, "re"); + if (!f) + return -errno; + + *_f = f; + return 0; +} + +int cg_read_pid(FILE *f, pid_t *_pid) { + unsigned long ul; + + /* Note that the cgroup.procs might contain duplicates! See + * cgroups.txt for details. */ + + assert(f); + assert(_pid); + + errno = 0; + if (fscanf(f, "%lu", &ul) != 1) { + + if (feof(f)) + return 0; + + return errno > 0 ? -errno : -EIO; + } + + if (ul <= 0) + return -EIO; + + *_pid = (pid_t) ul; + return 1; +} + +int cg_read_event( + const char *controller, + const char *path, + const char *event, + char **val) { + + _cleanup_free_ char *events = NULL, *content = NULL; + char *p, *line; + int r; + + r = cg_get_path(controller, path, "cgroup.events", &events); + if (r < 0) + return r; + + r = read_full_file(events, &content, NULL); + if (r < 0) + return r; + + p = content; + while ((line = strsep(&p, "\n"))) { + char *key; + + key = strsep(&line, " "); + if (!key || !line) + return -EINVAL; + + if (strcmp(key, event)) + continue; + + *val = strdup(line); + return 0; + } + + return -ENOENT; +} + +bool cg_ns_supported(void) { + static thread_local int enabled = -1; + + if (enabled >= 0) + return enabled; + + if (access("/proc/self/ns/cgroup", F_OK) < 0) { + if (errno != ENOENT) + log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m"); + enabled = false; + } else + enabled = true; + + return enabled; +} + +int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) { + _cleanup_free_ char *fs = NULL; + int r; + DIR *d; + + assert(_d); + + /* This is not recursive! */ + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + d = opendir(fs); + if (!d) + return -errno; + + *_d = d; + return 0; +} + +int cg_read_subgroup(DIR *d, char **fn) { + struct dirent *de; + + assert(d); + assert(fn); + + FOREACH_DIRENT_ALL(de, d, return -errno) { + char *b; + + if (de->d_type != DT_DIR) + continue; + + if (dot_or_dot_dot(de->d_name)) + continue; + + b = strdup(de->d_name); + if (!b) + return -ENOMEM; + + *fn = b; + return 1; + } + + return 0; +} + +int cg_rmdir(const char *controller, const char *path) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, NULL, &p); + if (r < 0) + return r; + + r = rmdir(p); + if (r < 0 && errno != ENOENT) + return -errno; + + r = cg_hybrid_unified(); + if (r <= 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path); + } + + return 0; +} + +int cg_kill( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata) { + + _cleanup_set_free_ Set *allocated_set = NULL; + bool done = false; + int r, ret = 0; + pid_t my_pid; + + assert(sig >= 0); + + /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send + * SIGCONT on SIGKILL. */ + if (IN_SET(sig, SIGCONT, SIGKILL)) + flags &= ~CGROUP_SIGCONT; + + /* This goes through the tasks list and kills them all. This + * is repeated until no further processes are added to the + * tasks list, to properly handle forking processes */ + + if (!s) { + s = allocated_set = set_new(NULL); + if (!s) + return -ENOMEM; + } + + my_pid = getpid_cached(); + + do { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0; + done = true; + + r = cg_enumerate_processes(controller, path, &f); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) + continue; + + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) + continue; + + if (log_kill) + log_kill(pid, sig, userdata); + + /* If we haven't killed this process yet, kill + * it */ + if (kill(pid, sig) < 0) { + if (ret >= 0 && errno != ESRCH) + ret = -errno; + } else { + if (flags & CGROUP_SIGCONT) + (void) kill(pid, SIGCONT); + + if (ret == 0) + ret = 1; + } + + done = false; + + r = set_put(s, PID_TO_PTR(pid)); + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } + + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + + /* To avoid racing against processes which fork + * quicker than we can kill them we repeat this until + * no new pids need to be killed. */ + + } while (!done); + + return ret; +} + +int cg_kill_recursive( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata) { + + _cleanup_set_free_ Set *allocated_set = NULL; + _cleanup_closedir_ DIR *d = NULL; + int r, ret; + char *fn; + + assert(path); + assert(sig >= 0); + + if (!s) { + s = allocated_set = set_new(NULL); + if (!s) + return -ENOMEM; + } + + ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata); + + r = cg_enumerate_subgroups(controller, path, &d); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = strjoin(path, "/", fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata); + if (r != 0 && ret >= 0) + ret = r; + } + if (ret >= 0 && r < 0) + ret = r; + + if (flags & CGROUP_REMOVE) { + r = cg_rmdir(controller, path); + if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) + return r; + } + + return ret; +} + +int cg_migrate( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + bool done = false; + _cleanup_set_free_ Set *s = NULL; + int r, ret = 0; + pid_t my_pid; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + s = set_new(NULL); + if (!s) + return -ENOMEM; + + my_pid = getpid_cached(); + + do { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0; + done = true; + + r = cg_enumerate_processes(cfrom, pfrom, &f); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + /* This might do weird stuff if we aren't a + * single-threaded program. However, we + * luckily know we are not */ + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) + continue; + + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) + continue; + + /* Ignore kernel threads. Since they can only + * exist in the root cgroup, we only check for + * them there. */ + if (cfrom && + empty_or_root(pfrom) && + is_kernel_thread(pid) > 0) + continue; + + r = cg_attach(cto, pto, pid); + if (r < 0) { + if (ret >= 0 && r != -ESRCH) + ret = r; + } else if (ret == 0) + ret = 1; + + done = false; + + r = set_put(s, PID_TO_PTR(pid)); + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } + + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } while (!done); + + return ret; +} + +int cg_migrate_recursive( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + _cleanup_closedir_ DIR *d = NULL; + int r, ret = 0; + char *fn; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + ret = cg_migrate(cfrom, pfrom, cto, pto, flags); + + r = cg_enumerate_subgroups(cfrom, pfrom, &d); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = strjoin(pfrom, "/", fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_migrate_recursive(cfrom, p, cto, pto, flags); + if (r != 0 && ret >= 0) + ret = r; + } + + if (r < 0 && ret >= 0) + ret = r; + + if (flags & CGROUP_REMOVE) { + r = cg_rmdir(cfrom, pfrom); + if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) + return r; + } + + return ret; +} + +int cg_migrate_recursive_fallback( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + int r; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); + if (r < 0) { + char prefix[strlen(pto) + 1]; + + /* This didn't work? Then let's try all prefixes of the destination */ + + PATH_FOREACH_PREFIX(prefix, pto) { + int q; + + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); + if (q >= 0) + return q; + } + } + + return r; +} + +static const char *controller_to_dirname(const char *controller) { + const char *e; + + assert(controller); + + /* Converts a controller name to the directory name below + * /sys/fs/cgroup/ we want to mount it to. Effectively, this + * just cuts off the name= prefixed used for named + * hierarchies, if it is specified. */ + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + if (cg_hybrid_unified() > 0) + controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID; + else + controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + } + + e = startswith(controller, "name="); + if (e) + return e; + + return controller; +} + +static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) { + const char *dn; + char *t = NULL; + + assert(fs); + assert(controller); + + dn = controller_to_dirname(controller); + + if (isempty(path) && isempty(suffix)) + t = strappend("/sys/fs/cgroup/", dn); + else if (isempty(path)) + t = strjoin("/sys/fs/cgroup/", dn, "/", suffix); + else if (isempty(suffix)) + t = strjoin("/sys/fs/cgroup/", dn, "/", path); + else + t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix); + if (!t) + return -ENOMEM; + + *fs = t; + return 0; +} + +static int join_path_unified(const char *path, const char *suffix, char **fs) { + char *t; + + assert(fs); + + if (isempty(path) && isempty(suffix)) + t = strdup("/sys/fs/cgroup"); + else if (isempty(path)) + t = strappend("/sys/fs/cgroup/", suffix); + else if (isempty(suffix)) + t = strappend("/sys/fs/cgroup/", path); + else + t = strjoin("/sys/fs/cgroup/", path, "/", suffix); + if (!t) + return -ENOMEM; + + *fs = t; + return 0; +} + +int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { + int r; + + assert(fs); + + if (!controller) { + char *t; + + /* If no controller is specified, we return the path + * *below* the controllers, without any prefix. */ + + if (!path && !suffix) + return -EINVAL; + + if (!suffix) + t = strdup(path); + else if (!path) + t = strdup(suffix); + else + t = strjoin(path, "/", suffix); + if (!t) + return -ENOMEM; + + *fs = path_simplify(t, false); + return 0; + } + + if (!cg_controller_is_valid(controller)) + return -EINVAL; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + r = join_path_unified(path, suffix, fs); + else + r = join_path_legacy(controller, path, suffix, fs); + if (r < 0) + return r; + + path_simplify(*fs, false); + return 0; +} + +static int controller_is_accessible(const char *controller) { + int r; + + assert(controller); + + /* Checks whether a specific controller is accessible, + * i.e. its hierarchy mounted. In the unified hierarchy all + * controllers are considered accessible, except for the named + * hierarchies */ + + if (!cg_controller_is_valid(controller)) + return -EINVAL; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) { + /* We don't support named hierarchies if we are using + * the unified hierarchy. */ + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + return 0; + + if (startswith(controller, "name=")) + return -EOPNOTSUPP; + + } else { + const char *cc, *dn; + + dn = controller_to_dirname(controller); + cc = strjoina("/sys/fs/cgroup/", dn); + + if (laccess(cc, F_OK) < 0) + return -errno; + } + + return 0; +} + +int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) { + int r; + + assert(controller); + assert(fs); + + /* Check if the specified controller is actually accessible */ + r = controller_is_accessible(controller); + if (r < 0) + return r; + + return cg_get_path(controller, path, suffix, fs); +} + +static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { + assert(path); + assert(sb); + assert(ftwbuf); + + if (typeflag != FTW_DP) + return 0; + + if (ftwbuf->level < 1) + return 0; + + (void) rmdir(path); + return 0; +} + +int cg_trim(const char *controller, const char *path, bool delete_root) { + _cleanup_free_ char *fs = NULL; + int r = 0, q; + + assert(path); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + errno = 0; + if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { + if (errno == ENOENT) + r = 0; + else if (errno > 0) + r = -errno; + else + r = -EIO; + } + + if (delete_root) { + if (rmdir(fs) < 0 && errno != ENOENT) + return -errno; + } + + q = cg_hybrid_unified(); + if (q < 0) + return q; + if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + + return r; +} + +/* Create a cgroup in the hierarchy of controller. + * Returns 0 if the group already existed, 1 on success, negative otherwise. + */ +int cg_create(const char *controller, const char *path) { + _cleanup_free_ char *fs = NULL; + int r; + + r = cg_get_path_and_check(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = mkdir_parents(fs, 0755); + if (r < 0) + return r; + + r = mkdir_errno_wrapper(fs, 0755); + if (r == -EEXIST) + return 0; + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + + return 1; +} + +int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { + int r, q; + + assert(pid >= 0); + + r = cg_create(controller, path); + if (r < 0) + return r; + + q = cg_attach(controller, path, pid); + if (q < 0) + return q; + + /* This does not remove the cgroup on failure */ + return r; +} + +int cg_attach(const char *controller, const char *path, pid_t pid) { + _cleanup_free_ char *fs = NULL; + char c[DECIMAL_STR_MAX(pid_t) + 2]; + int r; + + assert(path); + assert(pid >= 0); + + r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs); + if (r < 0) + return r; + + if (pid == 0) + pid = getpid_cached(); + + xsprintf(c, PID_FMT "\n", pid); + + r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path); + } + + return 0; +} + +int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { + int r; + + assert(controller); + assert(path); + assert(pid >= 0); + + r = cg_attach(controller, path, pid); + if (r < 0) { + char prefix[strlen(path) + 1]; + + /* This didn't work? Then let's try all prefixes of + * the destination */ + + PATH_FOREACH_PREFIX(prefix, path) { + int q; + + q = cg_attach(controller, prefix, pid); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_set_access( + const char *controller, + const char *path, + uid_t uid, + gid_t gid) { + + struct Attribute { + const char *name; + bool fatal; + }; + + /* cgroup v1, aka legacy/non-unified */ + static const struct Attribute legacy_attributes[] = { + { "cgroup.procs", true }, + { "tasks", false }, + { "cgroup.clone_children", false }, + {}, + }; + + /* cgroup v2, aka unified */ + static const struct Attribute unified_attributes[] = { + { "cgroup.procs", true }, + { "cgroup.subtree_control", true }, + { "cgroup.threads", false }, + {}, + }; + + static const struct Attribute* const attributes[] = { + [false] = legacy_attributes, + [true] = unified_attributes, + }; + + _cleanup_free_ char *fs = NULL; + const struct Attribute *i; + int r, unified; + + assert(path); + + if (uid == UID_INVALID && gid == GID_INVALID) + return 0; + + unified = cg_unified_controller(controller); + if (unified < 0) + return unified; + + /* Configure access to the cgroup itself */ + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0755, uid, gid); + if (r < 0) + return r; + + /* Configure access to the cgroup's attributes */ + for (i = attributes[unified]; i->name; i++) { + fs = mfree(fs); + + r = cg_get_path(controller, path, i->name, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0644, uid, gid); + if (r < 0) { + if (i->fatal) + return r; + + log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs); + } + } + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_hybrid_unified(); + if (r < 0) + return r; + if (r > 0) { + /* Always propagate access mode from unified to legacy controller */ + r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid); + if (r < 0) + log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path); + } + } + + return 0; +} + +int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) { + _cleanup_free_ char *fs = NULL; + int r; + + assert(path); + assert(name); + assert(value || size <= 0); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + if (setxattr(fs, name, value, size, flags) < 0) + return -errno; + + return 0; +} + +int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) { + _cleanup_free_ char *fs = NULL; + ssize_t n; + int r; + + assert(path); + assert(name); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + n = getxattr(fs, name, value, size); + if (n < 0) + return -errno; + + return (int) n; +} + +int cg_pid_get_path(const char *controller, pid_t pid, char **path) { + _cleanup_fclose_ FILE *f = NULL; + const char *fs, *controller_str; + int unified, r; + size_t cs = 0; + + assert(path); + assert(pid >= 0); + + if (controller) { + if (!cg_controller_is_valid(controller)) + return -EINVAL; + } else + controller = SYSTEMD_CGROUP_CONTROLLER; + + unified = cg_unified_controller(controller); + if (unified < 0) + return unified; + if (unified == 0) { + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + else + controller_str = controller; + + cs = strlen(controller_str); + } + + fs = procfs_file_alloca(pid, "cgroup"); + f = fopen(fs, "re"); + if (!f) + return errno == ENOENT ? -ESRCH : -errno; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + for (;;) { + _cleanup_free_ char *line = NULL; + char *e, *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + if (unified) { + e = startswith(line, "0:"); + if (!e) + continue; + + e = strchr(e, ':'); + if (!e) + continue; + } else { + char *l; + size_t k; + const char *word, *state; + bool found = false; + + l = strchr(line, ':'); + if (!l) + continue; + + l++; + e = strchr(l, ':'); + if (!e) + continue; + + *e = 0; + FOREACH_WORD_SEPARATOR(word, k, l, ",", state) + if (k == cs && memcmp(word, controller_str, cs) == 0) { + found = true; + break; + } + if (!found) + continue; + } + + p = strdup(e + 1); + if (!p) + return -ENOMEM; + + /* Truncate suffix indicating the process is a zombie */ + e = endswith(p, " (deleted)"); + if (e) + *e = 0; + + *path = p; + return 0; + } + + return -ENODATA; +} + +int cg_install_release_agent(const char *controller, const char *agent) { + _cleanup_free_ char *fs = NULL, *contents = NULL; + const char *sc; + int r; + + assert(agent); + + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r > 0) /* doesn't apply to unified hierarchy */ + return -EOPNOTSUPP; + + r = cg_get_path(controller, NULL, "release_agent", &fs); + if (r < 0) + return r; + + r = read_one_line_file(fs, &contents); + if (r < 0) + return r; + + sc = strstrip(contents); + if (isempty(sc)) { + r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + } else if (!path_equal(sc, agent)) + return -EEXIST; + + fs = mfree(fs); + r = cg_get_path(controller, NULL, "notify_on_release", &fs); + if (r < 0) + return r; + + contents = mfree(contents); + r = read_one_line_file(fs, &contents); + if (r < 0) + return r; + + sc = strstrip(contents); + if (streq(sc, "0")) { + r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + return 1; + } + + if (!streq(sc, "1")) + return -EIO; + + return 0; +} + +int cg_uninstall_release_agent(const char *controller) { + _cleanup_free_ char *fs = NULL; + int r; + + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r > 0) /* Doesn't apply to unified hierarchy */ + return -EOPNOTSUPP; + + r = cg_get_path(controller, NULL, "notify_on_release", &fs); + if (r < 0) + return r; + + r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + fs = mfree(fs); + + r = cg_get_path(controller, NULL, "release_agent", &fs); + if (r < 0) + return r; + + r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + return 0; +} + +int cg_is_empty(const char *controller, const char *path) { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid; + int r; + + assert(path); + + r = cg_enumerate_processes(controller, path, &f); + if (r == -ENOENT) + return true; + if (r < 0) + return r; + + r = cg_read_pid(f, &pid); + if (r < 0) + return r; + + return r == 0; +} + +int cg_is_empty_recursive(const char *controller, const char *path) { + int r; + + assert(path); + + /* The root cgroup is always populated */ + if (controller && empty_or_root(path)) + return false; + + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r > 0) { + _cleanup_free_ char *t = NULL; + + /* On the unified hierarchy we can check empty state + * via the "populated" attribute of "cgroup.events". */ + + r = cg_read_event(controller, path, "populated", &t); + if (r == -ENOENT) + return true; + if (r < 0) + return r; + + return streq(t, "0"); + } else { + _cleanup_closedir_ DIR *d = NULL; + char *fn; + + r = cg_is_empty(controller, path); + if (r <= 0) + return r; + + r = cg_enumerate_subgroups(controller, path, &d); + if (r == -ENOENT) + return true; + if (r < 0) + return r; + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = strjoin(path, "/", fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_is_empty_recursive(controller, p); + if (r <= 0) + return r; + } + if (r < 0) + return r; + + return true; + } +} + +int cg_split_spec(const char *spec, char **controller, char **path) { + char *t = NULL, *u = NULL; + const char *e; + + assert(spec); + + if (*spec == '/') { + if (!path_is_normalized(spec)) + return -EINVAL; + + if (path) { + t = strdup(spec); + if (!t) + return -ENOMEM; + + *path = path_simplify(t, false); + } + + if (controller) + *controller = NULL; + + return 0; + } + + e = strchr(spec, ':'); + if (!e) { + if (!cg_controller_is_valid(spec)) + return -EINVAL; + + if (controller) { + t = strdup(spec); + if (!t) + return -ENOMEM; + + *controller = t; + } + + if (path) + *path = NULL; + + return 0; + } + + t = strndup(spec, e-spec); + if (!t) + return -ENOMEM; + if (!cg_controller_is_valid(t)) { + free(t); + return -EINVAL; + } + + if (isempty(e+1)) + u = NULL; + else { + u = strdup(e+1); + if (!u) { + free(t); + return -ENOMEM; + } + + if (!path_is_normalized(u) || + !path_is_absolute(u)) { + free(t); + free(u); + return -EINVAL; + } + + path_simplify(u, false); + } + + if (controller) + *controller = t; + else + free(t); + + if (path) + *path = u; + else + free(u); + + return 0; +} + +int cg_mangle_path(const char *path, char **result) { + _cleanup_free_ char *c = NULL, *p = NULL; + char *t; + int r; + + assert(path); + assert(result); + + /* First, check if it already is a filesystem path */ + if (path_startswith(path, "/sys/fs/cgroup")) { + + t = strdup(path); + if (!t) + return -ENOMEM; + + *result = path_simplify(t, false); + return 0; + } + + /* Otherwise, treat it as cg spec */ + r = cg_split_spec(path, &c, &p); + if (r < 0) + return r; + + return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result); +} + +int cg_get_root_path(char **path) { + char *p, *e; + int r; + + assert(path); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p); + if (r < 0) + return r; + + e = endswith(p, "/" SPECIAL_INIT_SCOPE); + if (!e) + e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */ + if (!e) + e = endswith(p, "/system"); /* even more legacy */ + if (e) + *e = 0; + + *path = p; + return 0; +} + +int cg_shift_path(const char *cgroup, const char *root, const char **shifted) { + _cleanup_free_ char *rt = NULL; + char *p; + int r; + + assert(cgroup); + assert(shifted); + + if (!root) { + /* If the root was specified let's use that, otherwise + * let's determine it from PID 1 */ + + r = cg_get_root_path(&rt); + if (r < 0) + return r; + + root = rt; + } + + p = path_startswith(cgroup, root); + if (p && p > cgroup) + *shifted = p - 1; + else + *shifted = cgroup; + + return 0; +} + +int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) { + _cleanup_free_ char *raw = NULL; + const char *c; + int r; + + assert(pid >= 0); + assert(cgroup); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw); + if (r < 0) + return r; + + r = cg_shift_path(raw, root, &c); + if (r < 0) + return r; + + if (c == raw) + *cgroup = TAKE_PTR(raw); + else { + char *n; + + n = strdup(c); + if (!n) + return -ENOMEM; + + *cgroup = n; + } + + return 0; +} + +int cg_path_decode_unit(const char *cgroup, char **unit) { + char *c, *s; + size_t n; + + assert(cgroup); + assert(unit); + + n = strcspn(cgroup, "/"); + if (n < 3) + return -ENXIO; + + c = strndupa(cgroup, n); + c = cg_unescape(c); + + if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) + return -ENXIO; + + s = strdup(c); + if (!s) + return -ENOMEM; + + *unit = s; + return 0; +} + +static bool valid_slice_name(const char *p, size_t n) { + + if (!p) + return false; + + if (n < STRLEN("x.slice")) + return false; + + if (memcmp(p + n - 6, ".slice", 6) == 0) { + char buf[n+1], *c; + + memcpy(buf, p, n); + buf[n] = 0; + + c = cg_unescape(buf); + + return unit_name_is_valid(c, UNIT_NAME_PLAIN); + } + + return false; +} + +static const char *skip_slices(const char *p) { + assert(p); + + /* Skips over all slice assignments */ + + for (;;) { + size_t n; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (!valid_slice_name(p, n)) + return p; + + p += n; + } +} + +int cg_path_get_unit(const char *path, char **ret) { + const char *e; + char *unit; + int r; + + assert(path); + assert(ret); + + e = skip_slices(path); + + r = cg_path_decode_unit(e, &unit); + if (r < 0) + return r; + + /* We skipped over the slices, don't accept any now */ + if (endswith(unit, ".slice")) { + free(unit); + return -ENXIO; + } + + *ret = unit; + return 0; +} + +int cg_pid_get_unit(pid_t pid, char **unit) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(unit); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_unit(cgroup, unit); +} + +/** + * Skip session-*.scope, but require it to be there. + */ +static const char *skip_session(const char *p) { + size_t n; + + if (isempty(p)) + return NULL; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (n < STRLEN("session-x.scope")) + return NULL; + + if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) { + char buf[n - 8 - 6 + 1]; + + memcpy(buf, p + 8, n - 8 - 6); + buf[n - 8 - 6] = 0; + + /* Note that session scopes never need unescaping, + * since they cannot conflict with the kernel's own + * names, hence we don't need to call cg_unescape() + * here. */ + + if (!session_id_valid(buf)) + return false; + + p += n; + p += strspn(p, "/"); + return p; + } + + return NULL; +} + +/** + * Skip user@*.service, but require it to be there. + */ +static const char *skip_user_manager(const char *p) { + size_t n; + + if (isempty(p)) + return NULL; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (n < STRLEN("user@x.service")) + return NULL; + + if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) { + char buf[n - 5 - 8 + 1]; + + memcpy(buf, p + 5, n - 5 - 8); + buf[n - 5 - 8] = 0; + + /* Note that user manager services never need unescaping, + * since they cannot conflict with the kernel's own + * names, hence we don't need to call cg_unescape() + * here. */ + + if (parse_uid(buf, NULL) < 0) + return NULL; + + p += n; + p += strspn(p, "/"); + + return p; + } + + return NULL; +} + +static const char *skip_user_prefix(const char *path) { + const char *e, *t; + + assert(path); + + /* Skip slices, if there are any */ + e = skip_slices(path); + + /* Skip the user manager, if it's in the path now... */ + t = skip_user_manager(e); + if (t) + return t; + + /* Alternatively skip the user session if it is in the path... */ + return skip_session(e); +} + +int cg_path_get_user_unit(const char *path, char **ret) { + const char *t; + + assert(path); + assert(ret); + + t = skip_user_prefix(path); + if (!t) + return -ENXIO; + + /* And from here on it looks pretty much the same as for a + * system unit, hence let's use the same parser from here + * on. */ + return cg_path_get_unit(t, ret); +} + +int cg_pid_get_user_unit(pid_t pid, char **unit) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(unit); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_user_unit(cgroup, unit); +} + +int cg_path_get_machine_name(const char *path, char **machine) { + _cleanup_free_ char *u = NULL; + const char *sl; + int r; + + r = cg_path_get_unit(path, &u); + if (r < 0) + return r; + + sl = strjoina("/run/systemd/machines/unit:", u); + return readlink_malloc(sl, machine); +} + +int cg_pid_get_machine_name(pid_t pid, char **machine) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(machine); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_machine_name(cgroup, machine); +} + +int cg_path_get_session(const char *path, char **session) { + _cleanup_free_ char *unit = NULL; + char *start, *end; + int r; + + assert(path); + + r = cg_path_get_unit(path, &unit); + if (r < 0) + return r; + + start = startswith(unit, "session-"); + if (!start) + return -ENXIO; + end = endswith(start, ".scope"); + if (!end) + return -ENXIO; + + *end = 0; + if (!session_id_valid(start)) + return -ENXIO; + + if (session) { + char *rr; + + rr = strdup(start); + if (!rr) + return -ENOMEM; + + *session = rr; + } + + return 0; +} + +int cg_pid_get_session(pid_t pid, char **session) { + _cleanup_free_ char *cgroup = NULL; + int r; + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_session(cgroup, session); +} + +int cg_path_get_owner_uid(const char *path, uid_t *uid) { + _cleanup_free_ char *slice = NULL; + char *start, *end; + int r; + + assert(path); + + r = cg_path_get_slice(path, &slice); + if (r < 0) + return r; + + start = startswith(slice, "user-"); + if (!start) + return -ENXIO; + end = endswith(start, ".slice"); + if (!end) + return -ENXIO; + + *end = 0; + if (parse_uid(start, uid) < 0) + return -ENXIO; + + return 0; +} + +int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) { + _cleanup_free_ char *cgroup = NULL; + int r; + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_owner_uid(cgroup, uid); +} + +int cg_path_get_slice(const char *p, char **slice) { + const char *e = NULL; + + assert(p); + assert(slice); + + /* Finds the right-most slice unit from the beginning, but + * stops before we come to the first non-slice unit. */ + + for (;;) { + size_t n; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (!valid_slice_name(p, n)) { + + if (!e) { + char *s; + + s = strdup(SPECIAL_ROOT_SLICE); + if (!s) + return -ENOMEM; + + *slice = s; + return 0; + } + + return cg_path_decode_unit(e, slice); + } + + e = p; + p += n; + } +} + +int cg_pid_get_slice(pid_t pid, char **slice) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(slice); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_slice(cgroup, slice); +} + +int cg_path_get_user_slice(const char *p, char **slice) { + const char *t; + assert(p); + assert(slice); + + t = skip_user_prefix(p); + if (!t) + return -ENXIO; + + /* And now it looks pretty much the same as for a system + * slice, so let's just use the same parser from here on. */ + return cg_path_get_slice(t, slice); +} + +int cg_pid_get_user_slice(pid_t pid, char **slice) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(slice); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_user_slice(cgroup, slice); +} + +char *cg_escape(const char *p) { + bool need_prefix = false; + + /* This implements very minimal escaping for names to be used + * as file names in the cgroup tree: any name which might + * conflict with a kernel name or is prefixed with '_' is + * prefixed with a '_'. That way, when reading cgroup names it + * is sufficient to remove a single prefixing underscore if + * there is one. */ + + /* The return value of this function (unlike cg_unescape()) + * needs free()! */ + + if (IN_SET(p[0], 0, '_', '.') || + STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") || + startswith(p, "cgroup.")) + need_prefix = true; + else { + const char *dot; + + dot = strrchr(p, '.'); + if (dot) { + CGroupController c; + size_t l = dot - p; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + const char *n; + + n = cgroup_controller_to_string(c); + + if (l != strlen(n)) + continue; + + if (memcmp(p, n, l) != 0) + continue; + + need_prefix = true; + break; + } + } + } + + if (need_prefix) + return strappend("_", p); + + return strdup(p); +} + +char *cg_unescape(const char *p) { + assert(p); + + /* The return value of this function (unlike cg_escape()) + * doesn't need free()! */ + + if (p[0] == '_') + return (char*) p+1; + + return (char*) p; +} + +#define CONTROLLER_VALID \ + DIGITS LETTERS \ + "_" + +bool cg_controller_is_valid(const char *p) { + const char *t, *s; + + if (!p) + return false; + + if (streq(p, SYSTEMD_CGROUP_CONTROLLER)) + return true; + + s = startswith(p, "name="); + if (s) + p = s; + + if (IN_SET(*p, 0, '_')) + return false; + + for (t = p; *t; t++) + if (!strchr(CONTROLLER_VALID, *t)) + return false; + + if (t - p > FILENAME_MAX) + return false; + + return true; +} + +int cg_slice_to_path(const char *unit, char **ret) { + _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL; + const char *dash; + int r; + + assert(unit); + assert(ret); + + if (streq(unit, SPECIAL_ROOT_SLICE)) { + char *x; + + x = strdup(""); + if (!x) + return -ENOMEM; + *ret = x; + return 0; + } + + if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN)) + return -EINVAL; + + if (!endswith(unit, ".slice")) + return -EINVAL; + + r = unit_name_to_prefix(unit, &p); + if (r < 0) + return r; + + dash = strchr(p, '-'); + + /* Don't allow initial dashes */ + if (dash == p) + return -EINVAL; + + while (dash) { + _cleanup_free_ char *escaped = NULL; + char n[dash - p + sizeof(".slice")]; + +#if HAS_FEATURE_MEMORY_SANITIZER + /* msan doesn't instrument stpncpy, so it thinks + * n is later used unitialized: + * https://github.com/google/sanitizers/issues/926 + */ + zero(n); +#endif + + /* Don't allow trailing or double dashes */ + if (IN_SET(dash[1], 0, '-')) + return -EINVAL; + + strcpy(stpncpy(n, p, dash - p), ".slice"); + if (!unit_name_is_valid(n, UNIT_NAME_PLAIN)) + return -EINVAL; + + escaped = cg_escape(n); + if (!escaped) + return -ENOMEM; + + if (!strextend(&s, escaped, "/", NULL)) + return -ENOMEM; + + dash = strchr(dash+1, '-'); + } + + e = cg_escape(unit); + if (!e) + return -ENOMEM; + + if (!strextend(&s, e, NULL)) + return -ENOMEM; + + *ret = TAKE_PTR(s); + + return 0; +} + +int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, attribute, &p); + if (r < 0) + return r; + + return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER); +} + +int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, attribute, &p); + if (r < 0) + return r; + + return read_one_line_file(p, ret); +} + +int cg_get_keyed_attribute( + const char *controller, + const char *path, + const char *attribute, + char **keys, + char **ret_values) { + + _cleanup_free_ char *filename = NULL, *contents = NULL; + const char *p; + size_t n, i, n_done = 0; + char **v; + int r; + + /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with + * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of + * entries as 'keys'. On success each entry will be set to the value of the matching key. + * + * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */ + + r = cg_get_path(controller, path, attribute, &filename); + if (r < 0) + return r; + + r = read_full_file(filename, &contents, NULL); + if (r < 0) + return r; + + n = strv_length(keys); + if (n == 0) /* No keys to retrieve? That's easy, we are done then */ + return 0; + + /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */ + v = newa0(char*, n); + + for (p = contents; *p;) { + const char *w = NULL; + + for (i = 0; i < n; i++) + if (!v[i]) { + w = first_word(p, keys[i]); + if (w) + break; + } + + if (w) { + size_t l; + + l = strcspn(w, NEWLINE); + v[i] = strndup(w, l); + if (!v[i]) { + r = -ENOMEM; + goto fail; + } + + n_done++; + if (n_done >= n) + goto done; + + p = w + l; + } else + p += strcspn(p, NEWLINE); + + p += strspn(p, NEWLINE); + } + + r = -ENXIO; + +fail: + for (i = 0; i < n; i++) + free(v[i]); + + return r; + +done: + memcpy(ret_values, v, sizeof(char*) * n); + return 0; + +} + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { + CGroupController c; + CGroupMask done; + bool created; + int r; + + /* This one will create a cgroup in our private tree, but also + * duplicate it in the trees specified in mask, and remove it + * in all others. + * + * Returns 0 if the group already existed in the systemd hierarchy, + * 1 on success, negative otherwise. + */ + + /* First create the cgroup in our own hierarchy. */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); + if (r < 0) + return r; + created = r; + + /* If we are in the unified hierarchy, we are done now */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return created; + + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + + /* Otherwise, do the same in the other hierarchies */ + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + n = cgroup_controller_to_string(c); + if (FLAGS_SET(mask, bit)) + (void) cg_create(n, path); + else + (void) cg_trim(n, path, true); + + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return created; +} + +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r; + + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); + if (r < 0) + return r; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return 0; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (path_callback) + p = path_callback(bit, userdata); + if (!p) + p = path; + + (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return 0; +} + +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { + Iterator i; + void *pidp; + int r = 0; + + SET_FOREACH(pidp, pids, i) { + pid_t pid = PTR_TO_PID(pidp); + int q; + + q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r = 0, q; + + if (!path_equal(from, to)) { + r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); + if (r < 0) + return r; + } + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (to_callback) + p = to_callback(bit, userdata); + if (!p) + p = to; + + (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { + CGroupController c; + CGroupMask done; + int r, q; + + r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); + if (r < 0) + return r; + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_mask_to_string(CGroupMask mask, char **ret) { + _cleanup_free_ char *s = NULL; + size_t n = 0, allocated = 0; + bool space = false; + CGroupController c; + + assert(ret); + + if (mask == 0) { + *ret = NULL; + return 0; + } + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + const char *k; + size_t l; + + if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c))) + continue; + + k = cgroup_controller_to_string(c); + l = strlen(k); + + if (!GREEDY_REALLOC(s, allocated, n + space + l + 1)) + return -ENOMEM; + + if (space) + s[n] = ' '; + memcpy(s + n + space, k, l); + n += space + l; + + space = true; + } + + assert(s); + + s[n] = 0; + *ret = TAKE_PTR(s); + + return 0; +} + +int cg_mask_from_string(const char *value, CGroupMask *ret) { + CGroupMask m = 0; + + assert(ret); + assert(value); + + for (;;) { + _cleanup_free_ char *n = NULL; + CGroupController v; + int r; + + r = extract_first_word(&value, &n, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + v = cgroup_controller_from_string(n); + if (v < 0) + continue; + + m |= CGROUP_CONTROLLER_TO_MASK(v); + } + + *ret = m; + return 0; +} + +int cg_mask_supported(CGroupMask *ret) { + CGroupMask mask; + int r; + + /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that + * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz + * pseudo-controllers. */ + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) { + _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL; + + /* In the unified hierarchy we can read the supported + * and accessible controllers from a the top-level + * cgroup attribute */ + + r = cg_get_root_path(&root); + if (r < 0) + return r; + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path); + if (r < 0) + return r; + + r = read_one_line_file(path, &controllers); + if (r < 0) + return r; + + r = cg_mask_from_string(controllers, &mask); + if (r < 0) + return r; + + /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask + * everything else off. */ + mask &= CGROUP_MASK_V2; + + } else { + CGroupController c; + + /* In the legacy hierarchy, we check which hierarchies are mounted. */ + + mask = 0; + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(CGROUP_MASK_V1, bit)) + continue; + + n = cgroup_controller_to_string(c); + if (controller_is_accessible(n) >= 0) + mask |= bit; + } + } + + *ret = mask; + return 0; +} + +int cg_kernel_controllers(Set **ret) { + _cleanup_set_free_free_ Set *controllers = NULL; + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(ret); + + /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support + * and controllers that aren't currently accessible (because not mounted). This does not include "name=" + * pseudo-controllers. */ + + controllers = set_new(&string_hash_ops); + if (!controllers) + return -ENOMEM; + + f = fopen("/proc/cgroups", "re"); + if (!f) { + if (errno == ENOENT) { + *ret = NULL; + return 0; + } + + return -errno; + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + /* Ignore the header line */ + (void) read_line(f, (size_t) -1, NULL); + + for (;;) { + char *controller; + int enabled = 0; + + errno = 0; + if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) { + + if (feof(f)) + break; + + if (ferror(f) && errno > 0) + return -errno; + + return -EBADMSG; + } + + if (!enabled) { + free(controller); + continue; + } + + if (!cg_controller_is_valid(controller)) { + free(controller); + return -EBADMSG; + } + + r = set_consume(controllers, controller); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(controllers); + + return 0; +} + +static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN; + +/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This + * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on + * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains + * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools. + * + * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2 + * process management but disable the compat dual layout, we return %true on + * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified(). + */ +static thread_local bool unified_systemd_v232; + +static int cg_unified_update(void) { + + struct statfs fs; + + /* Checks if we support the unified hierarchy. Returns an + * error when the cgroup hierarchies aren't mounted yet or we + * have any other trouble determining if the unified hierarchy + * is supported. */ + + if (unified_cache >= CGROUP_UNIFIED_NONE) + return 0; + + if (statfs("/sys/fs/cgroup/", &fs) < 0) + return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m"); + + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy"); + unified_cache = CGROUP_UNIFIED_ALL; + } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller"); + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = false; + } else { + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m"); + + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)"); + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = true; + } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) { + log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy"); + unified_cache = CGROUP_UNIFIED_NONE; + } else { + log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy", + (unsigned long long) fs.f_type); + unified_cache = CGROUP_UNIFIED_NONE; + } + } + } else + return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM), + "Unknown filesystem type %llx mounted on /sys/fs/cgroup.", + (unsigned long long)fs.f_type); + + return 0; +} + +int cg_unified_controller(const char *controller) { + int r; + + r = cg_unified_update(); + if (r < 0) + return r; + + if (unified_cache == CGROUP_UNIFIED_NONE) + return false; + + if (unified_cache >= CGROUP_UNIFIED_ALL) + return true; + + return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER); +} + +int cg_all_unified(void) { + int r; + + r = cg_unified_update(); + if (r < 0) + return r; + + return unified_cache >= CGROUP_UNIFIED_ALL; +} + +int cg_hybrid_unified(void) { + int r; + + r = cg_unified_update(); + if (r < 0) + return r; + + return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232; +} + +int cg_unified_flush(void) { + unified_cache = CGROUP_UNIFIED_UNKNOWN; + + return cg_unified_update(); +} + +int cg_enable_everywhere( + CGroupMask supported, + CGroupMask mask, + const char *p, + CGroupMask *ret_result_mask) { + + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *fs = NULL; + CGroupController c; + CGroupMask ret = 0; + int r; + + assert(p); + + if (supported == 0) { + if (ret_result_mask) + *ret_result_mask = 0; + return 0; + } + + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) { + /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim + * complete success right away. (If you wonder why we return the full mask here, rather than zero: the + * caller tends to use the returned mask later on to compare if all controllers where properly joined, + * and if not requeues realization. This use is the primary purpose of the return value, hence let's + * minimize surprises here and reduce triggers for re-realization by always saying we fully + * succeeded.) */ + if (ret_result_mask) + *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with + * CGROUP_MASK_V2: The 'supported' mask + * might contain pure-V1 or BPF + * controllers, and we never want to + * claim that we could enable those with + * cgroup.subtree_control */ + return 0; + } + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); + if (r < 0) + return r; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(CGROUP_MASK_V2, bit)) + continue; + + if (!FLAGS_SET(supported, bit)) + continue; + + n = cgroup_controller_to_string(c); + { + char s[1 + strlen(n) + 1]; + + s[0] = FLAGS_SET(mask, bit) ? '+' : '-'; + strcpy(s + 1, n); + + if (!f) { + f = fopen(fs, "we"); + if (!f) + return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p); + } + + r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", + FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); + clearerr(f); + + /* If we can't turn off a controller, leave it on in the reported resulting mask. This + * happens for example when we attempt to turn off a controller up in the tree that is + * used down in the tree. */ + if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY + * only here, and not follow the same logic + * for other errors such as EINVAL or + * EOPNOTSUPP or anything else. That's + * because EBUSY indicates that the + * controllers is currently enabled and + * cannot be disabled because something down + * the hierarchy is still using it. Any other + * error most likely means something like "I + * never heard of this controller" or + * similar. In the former case it's hence + * safe to assume the controller is still on + * after the failed operation, while in the + * latter case it's safer to assume the + * controller is unknown and hence certainly + * not enabled. */ + ret |= bit; + } else { + /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ + if (FLAGS_SET(mask, bit)) + ret |= bit; + } + } + } + + /* Let's return the precise set of controllers now enabled for the cgroup. */ + if (ret_result_mask) + *ret_result_mask = ret; + + return 0; +} + +bool cg_is_unified_wanted(void) { + static thread_local int wanted = -1; + int r; + bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; + _cleanup_free_ char *c = NULL; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever + * was chosen for it. */ + if (cg_unified_flush() >= 0) + return (wanted = unified_cache >= CGROUP_UNIFIED_ALL); + + /* If we were explicitly passed systemd.unified_cgroup_hierarchy, + * respect that. */ + r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); + if (r > 0) + return (wanted = b); + + /* If we passed cgroup_no_v1=all with no other instructions, it seems + * highly unlikely that we want to use hybrid or legacy hierarchy. */ + r = proc_cmdline_get_key("cgroup_no_v1", 0, &c); + if (r > 0 && streq_ptr(c, "all")) + return (wanted = true); + + return (wanted = is_default); +} + +bool cg_is_legacy_wanted(void) { + static thread_local int wanted = -1; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* Check if we have cgroup v2 already mounted. */ + if (cg_unified_flush() >= 0 && + unified_cache == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, assume that at least partial legacy is wanted, + * since cgroup v2 should already be mounted at this point. */ + return (wanted = true); +} + +bool cg_is_hybrid_wanted(void) { + static thread_local int wanted = -1; + int r; + bool b; + const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; + /* We default to true if the default is "hybrid", obviously, + * but also when the default is "unified", because if we get + * called, it means that unified hierarchy was not mounted. */ + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever + * was chosen for it. */ + if (cg_unified_flush() >= 0 && + unified_cache == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, let's see what the kernel command line has to say. + * Since checking is expensive, cache a non-error result. */ + r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); + + /* The meaning of the kernel option is reversed wrt. to the return value + * of this function, hence the negation. */ + return (wanted = r > 0 ? !b : is_default); +} + +int cg_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = { + [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX, +}; + +static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = { + [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax", + [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax", + [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax", + [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType); + +int cg_cpu_shares_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_CPU_SHARES_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_blkio_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_BLKIO_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +bool is_cgroup_fs(const struct statfs *s) { + return is_fs_type(s, CGROUP_SUPER_MAGIC) || + is_fs_type(s, CGROUP2_SUPER_MAGIC); +} + +bool fd_is_cgroup_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_cgroup_fs(&s); +} + +static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { + [CGROUP_CONTROLLER_CPU] = "cpu", + [CGROUP_CONTROLLER_CPUACCT] = "cpuacct", + [CGROUP_CONTROLLER_IO] = "io", + [CGROUP_CONTROLLER_BLKIO] = "blkio", + [CGROUP_CONTROLLER_MEMORY] = "memory", + [CGROUP_CONTROLLER_DEVICES] = "devices", + [CGROUP_CONTROLLER_PIDS] = "pids", + [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall", + [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); + +CGroupMask get_cpu_accounting_mask(void) { + static CGroupMask needed_mask = (CGroupMask) -1; + + /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is + * provided externally from the CPU controller, which means we don't + * need to enable the CPU controller just to get metrics. This is good, + * because enabling the CPU controller comes at a minor performance + * hit, especially when it's propagated deep into large hierarchies. + * There's also no separate CPU accounting controller available within + * a unified hierarchy. + * + * This combination of factors results in the desired cgroup mask to + * enable for CPU accounting varying as follows: + * + * ╔═════════════════════╤═════════════════════╗ + * ║ Linux ≥4.15 │ Linux <4.15 ║ + * ╔═══════════════╬═════════════════════╪═════════════════════╣ + * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║ + * ╟───────────────╫─────────────────────┼─────────────────────╢ + * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║ + * ╚═══════════════╩═════════════════════╧═════════════════════╝ + * + * We check kernel version here instead of manually checking whether + * cpu.stat is present for every cgroup, as that check in itself would + * already be fairly expensive. + * + * Kernels where this patch has been backported will therefore have the + * CPU controller enabled unnecessarily. This is more expensive than + * necessary, but harmless. ☺️ + */ + + if (needed_mask == (CGroupMask) -1) { + if (cg_all_unified()) { + struct utsname u; + assert_se(uname(&u) >= 0); + + if (str_verscmp(u.release, "4.15") < 0) + needed_mask = CGROUP_MASK_CPU; + else + needed_mask = 0; + } else + needed_mask = CGROUP_MASK_CPUACCT; + } + + return needed_mask; +} + +bool cpu_accounting_is_cheap(void) { + return get_cpu_accounting_mask() == 0; +} diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h new file mode 100644 index 0000000..119b493 --- /dev/null +++ b/src/basic/cgroup-util.h @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <dirent.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <sys/statfs.h> +#include <sys/types.h> + +#include "def.h" +#include "set.h" + +#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified" +#define SYSTEMD_CGROUP_CONTROLLER "_systemd" + +/* An enum of well known cgroup controllers */ +typedef enum CGroupController { + /* Original cgroup controllers */ + CGROUP_CONTROLLER_CPU, + CGROUP_CONTROLLER_CPUACCT, /* v1 only */ + CGROUP_CONTROLLER_IO, /* v2 only */ + CGROUP_CONTROLLER_BLKIO, /* v1 only */ + CGROUP_CONTROLLER_MEMORY, + CGROUP_CONTROLLER_DEVICES, /* v1 only */ + CGROUP_CONTROLLER_PIDS, + + /* BPF-based pseudo-controllers, v2 only */ + CGROUP_CONTROLLER_BPF_FIREWALL, + CGROUP_CONTROLLER_BPF_DEVICES, + + _CGROUP_CONTROLLER_MAX, + _CGROUP_CONTROLLER_INVALID = -1, +} CGroupController; + +#define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c)) + +/* A bit mask of well known cgroup controllers */ +typedef enum CGroupMask { + CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU), + CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT), + CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO), + CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO), + CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY), + CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES), + CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS), + CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL), + CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES), + + /* All real cgroup v1 controllers */ + CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS, + + /* All real cgroup v2 controllers */ + CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS, + + /* All cgroup v2 BPF pseudo-controllers */ + CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES, + + _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 +} CGroupMask; + +static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) { + /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */ + + if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT)) + mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT); + + return mask; +} + +CGroupMask get_cpu_accounting_mask(void); +bool cpu_accounting_is_cheap(void); + +/* Special values for all weight knobs on unified hierarchy */ +#define CGROUP_WEIGHT_INVALID ((uint64_t) -1) +#define CGROUP_WEIGHT_MIN UINT64_C(1) +#define CGROUP_WEIGHT_MAX UINT64_C(10000) +#define CGROUP_WEIGHT_DEFAULT UINT64_C(100) + +#define CGROUP_LIMIT_MIN UINT64_C(0) +#define CGROUP_LIMIT_MAX ((uint64_t) -1) + +static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) { + return + x == CGROUP_WEIGHT_INVALID || + (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX); +} + +/* IO limits on unified hierarchy */ +typedef enum CGroupIOLimitType { + CGROUP_IO_RBPS_MAX, + CGROUP_IO_WBPS_MAX, + CGROUP_IO_RIOPS_MAX, + CGROUP_IO_WIOPS_MAX, + + _CGROUP_IO_LIMIT_TYPE_MAX, + _CGROUP_IO_LIMIT_TYPE_INVALID = -1 +} CGroupIOLimitType; + +extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX]; + +const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_; +CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_; + +/* Special values for the cpu.shares attribute */ +#define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1) +#define CGROUP_CPU_SHARES_MIN UINT64_C(2) +#define CGROUP_CPU_SHARES_MAX UINT64_C(262144) +#define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024) + +static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) { + return + x == CGROUP_CPU_SHARES_INVALID || + (x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX); +} + +/* Special values for the blkio.weight attribute */ +#define CGROUP_BLKIO_WEIGHT_INVALID ((uint64_t) -1) +#define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10) +#define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000) +#define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500) + +static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { + return + x == CGROUP_BLKIO_WEIGHT_INVALID || + (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX); +} + +/* Default resource limits */ +#define DEFAULT_TASKS_MAX_PERCENTAGE 15U /* 15% of PIDs, 4915 on default settings */ +#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */ + +typedef enum CGroupUnified { + CGROUP_UNIFIED_UNKNOWN = -1, + CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */ + CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */ + CGROUP_UNIFIED_ALL = 2, /* Both systemd and controllers on unified */ +} CGroupUnified; + +/* + * General rules: + * + * We accept named hierarchies in the syntax "foo" and "name=foo". + * + * We expect that named hierarchies do not conflict in name with a + * kernel hierarchy, modulo the "name=" prefix. + * + * We always generate "normalized" controller names, i.e. without the + * "name=" prefix. + * + * We require absolute cgroup paths. When returning, we will always + * generate paths with multiple adjacent / removed. + */ + +int cg_enumerate_processes(const char *controller, const char *path, FILE **_f); +int cg_read_pid(FILE *f, pid_t *_pid); +int cg_read_event(const char *controller, const char *path, const char *event, + char **val); + +int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d); +int cg_read_subgroup(DIR *d, char **fn); + +typedef enum CGroupFlags { + CGROUP_SIGCONT = 1 << 0, + CGROUP_IGNORE_SELF = 1 << 1, + CGROUP_REMOVE = 1 << 2, +} CGroupFlags; + +typedef void (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata); + +int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); +int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); + +int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); + +int cg_split_spec(const char *spec, char **controller, char **path); +int cg_mangle_path(const char *path, char **result); + +int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs); +int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs); + +int cg_pid_get_path(const char *controller, pid_t pid, char **path); + +int cg_trim(const char *controller, const char *path, bool delete_root); + +int cg_rmdir(const char *controller, const char *path); + +int cg_create(const char *controller, const char *path); +int cg_attach(const char *controller, const char *path, pid_t pid); +int cg_attach_fallback(const char *controller, const char *path, pid_t pid); +int cg_create_and_attach(const char *controller, const char *path, pid_t pid); + +int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); +int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); +int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values); + +int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); + +int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags); +int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size); + +int cg_install_release_agent(const char *controller, const char *agent); +int cg_uninstall_release_agent(const char *controller); + +int cg_is_empty(const char *controller, const char *path); +int cg_is_empty_recursive(const char *controller, const char *path); + +int cg_get_root_path(char **path); + +int cg_path_get_session(const char *path, char **session); +int cg_path_get_owner_uid(const char *path, uid_t *uid); +int cg_path_get_unit(const char *path, char **unit); +int cg_path_get_user_unit(const char *path, char **unit); +int cg_path_get_machine_name(const char *path, char **machine); +int cg_path_get_slice(const char *path, char **slice); +int cg_path_get_user_slice(const char *path, char **slice); + +int cg_shift_path(const char *cgroup, const char *cached_root, const char **shifted); +int cg_pid_get_path_shifted(pid_t pid, const char *cached_root, char **cgroup); + +int cg_pid_get_session(pid_t pid, char **session); +int cg_pid_get_owner_uid(pid_t pid, uid_t *uid); +int cg_pid_get_unit(pid_t pid, char **unit); +int cg_pid_get_user_unit(pid_t pid, char **unit); +int cg_pid_get_machine_name(pid_t pid, char **machine); +int cg_pid_get_slice(pid_t pid, char **slice); +int cg_pid_get_user_slice(pid_t pid, char **slice); + +int cg_path_decode_unit(const char *cgroup, char **unit); + +char *cg_escape(const char *p); +char *cg_unescape(const char *p) _pure_; + +bool cg_controller_is_valid(const char *p); + +int cg_slice_to_path(const char *unit, char **ret); + +typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata); + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); + +int cg_mask_supported(CGroupMask *ret); +int cg_mask_from_string(const char *s, CGroupMask *ret); +int cg_mask_to_string(CGroupMask mask, char **ret); + +int cg_kernel_controllers(Set **controllers); + +bool cg_ns_supported(void); + +int cg_all_unified(void); +int cg_hybrid_unified(void); +int cg_unified_controller(const char *controller); +int cg_unified_flush(void); + +bool cg_is_unified_wanted(void); +bool cg_is_legacy_wanted(void); +bool cg_is_hybrid_wanted(void); + +const char* cgroup_controller_to_string(CGroupController c) _const_; +CGroupController cgroup_controller_from_string(const char *s) _pure_; + +int cg_weight_parse(const char *s, uint64_t *ret); +int cg_cpu_shares_parse(const char *s, uint64_t *ret); +int cg_blkio_weight_parse(const char *s, uint64_t *ret); + +bool is_cgroup_fs(const struct statfs *s); +bool fd_is_cgroup_fs(int fd); diff --git a/src/basic/chattr-util.c b/src/basic/chattr-util.c new file mode 100644 index 0000000..235cfb9 --- /dev/null +++ b/src/basic/chattr-util.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <linux/fs.h> + +#include "chattr-util.h" +#include "fd-util.h" +#include "macro.h" + +int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous) { + unsigned old_attr, new_attr; + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + /* Explicitly check whether this is a regular file or + * directory. If it is anything else (such as a device node or + * fifo), then the ioctl will not hit the file systems but + * possibly drivers, where the ioctl might have different + * effects. Notably, DRM is using the same ioctl() number. */ + + if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode)) + return -ENOTTY; + + if (mask == 0 && !previous) + return 0; + + if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0) + return -errno; + + new_attr = (old_attr & ~mask) | (value & mask); + if (new_attr == old_attr) { + if (previous) + *previous = old_attr; + return 0; + } + + if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0) + return -errno; + + if (previous) + *previous = old_attr; + + return 1; +} + +int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous) { + _cleanup_close_ int fd = -1; + + assert(p); + + if (mask == 0) + return 0; + + fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return chattr_fd(fd, value, mask, previous); +} + +int read_attr_fd(int fd, unsigned *ret) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode)) + return -ENOTTY; + + if (ioctl(fd, FS_IOC_GETFLAGS, ret) < 0) + return -errno; + + return 0; +} + +int read_attr_path(const char *p, unsigned *ret) { + _cleanup_close_ int fd = -1; + + assert(p); + assert(ret); + + fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return read_attr_fd(fd, ret); +} diff --git a/src/basic/chattr-util.h b/src/basic/chattr-util.h new file mode 100644 index 0000000..7570bba --- /dev/null +++ b/src/basic/chattr-util.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous); +int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous); + +int read_attr_fd(int fd, unsigned *ret); +int read_attr_path(const char *p, unsigned *ret); diff --git a/src/basic/conf-files.c b/src/basic/conf-files.c new file mode 100644 index 0000000..b70c6e5 --- /dev/null +++ b/src/basic/conf-files.c @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "conf-files.h" +#include "def.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "path-util.h" +#include "set.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "util.h" + +static int files_add( + Hashmap *h, + Set *masked, + const char *suffix, + const char *root, + unsigned flags, + const char *path) { + + _cleanup_closedir_ DIR *dir = NULL; + const char *dirpath; + struct dirent *de; + int r; + + assert(h); + assert((flags & CONF_FILES_FILTER_MASKED) == 0 || masked); + assert(path); + + dirpath = prefix_roota(root, path); + + dir = opendir(dirpath); + if (!dir) { + if (errno == ENOENT) + return 0; + + return log_debug_errno(errno, "Failed to open directory '%s': %m", dirpath); + } + + FOREACH_DIRENT(de, dir, return -errno) { + struct stat st; + char *p, *key; + + /* Does this match the suffix? */ + if (suffix && !endswith(de->d_name, suffix)) + continue; + + /* Has this file already been found in an earlier directory? */ + if (hashmap_contains(h, de->d_name)) { + log_debug("Skipping overridden file '%s/%s'.", dirpath, de->d_name); + continue; + } + + /* Has this been masked in an earlier directory? */ + if ((flags & CONF_FILES_FILTER_MASKED) && set_contains(masked, de->d_name)) { + log_debug("File '%s/%s' is masked by previous entry.", dirpath, de->d_name); + continue; + } + + /* Read file metadata if we shall validate the check for file masks, for node types or whether the node is marked executable. */ + if (flags & (CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR|CONF_FILES_DIRECTORY|CONF_FILES_EXECUTABLE)) + if (fstatat(dirfd(dir), de->d_name, &st, 0) < 0) { + log_debug_errno(errno, "Failed to stat '%s/%s', ignoring: %m", dirpath, de->d_name); + continue; + } + + /* Is this a masking entry? */ + if ((flags & CONF_FILES_FILTER_MASKED)) + if (null_or_empty(&st)) { + /* Mark this one as masked */ + r = set_put_strdup(masked, de->d_name); + if (r < 0) + return r; + + log_debug("File '%s/%s' is a mask.", dirpath, de->d_name); + continue; + } + + /* Does this node have the right type? */ + if (flags & (CONF_FILES_REGULAR|CONF_FILES_DIRECTORY)) + if (!((flags & CONF_FILES_DIRECTORY) && S_ISDIR(st.st_mode)) && + !((flags & CONF_FILES_REGULAR) && S_ISREG(st.st_mode))) { + log_debug("Ignoring '%s/%s', as it is not a of the right type.", dirpath, de->d_name); + continue; + } + + /* Does this node have the executable bit set? */ + if (flags & CONF_FILES_EXECUTABLE) + /* As requested: check if the file is marked exectuable. Note that we don't check access(X_OK) + * here, as we care about whether the file is marked executable at all, and not whether it is + * executable for us, because if so, such errors are stuff we should log about. */ + + if ((st.st_mode & 0111) == 0) { /* not executable */ + log_debug("Ignoring '%s/%s', as it is not marked executable.", dirpath, de->d_name); + continue; + } + + if (flags & CONF_FILES_BASENAME) { + p = strdup(de->d_name); + if (!p) + return -ENOMEM; + + key = p; + } else { + p = strjoin(dirpath, "/", de->d_name); + if (!p) + return -ENOMEM; + + key = basename(p); + } + + r = hashmap_put(h, key, p); + if (r < 0) { + free(p); + return log_debug_errno(r, "Failed to add item to hashmap: %m"); + } + + assert(r > 0); + } + + return 0; +} + +static int base_cmp(char * const *a, char * const *b) { + return strcmp(basename(*a), basename(*b)); +} + +static int conf_files_list_strv_internal(char ***strv, const char *suffix, const char *root, unsigned flags, char **dirs) { + _cleanup_hashmap_free_ Hashmap *fh = NULL; + _cleanup_set_free_free_ Set *masked = NULL; + char **files, **p; + int r; + + assert(strv); + + /* This alters the dirs string array */ + if (!path_strv_resolve_uniq(dirs, root)) + return -ENOMEM; + + fh = hashmap_new(&path_hash_ops); + if (!fh) + return -ENOMEM; + + if (flags & CONF_FILES_FILTER_MASKED) { + masked = set_new(&path_hash_ops); + if (!masked) + return -ENOMEM; + } + + STRV_FOREACH(p, dirs) { + r = files_add(fh, masked, suffix, root, flags, *p); + if (r == -ENOMEM) + return r; + if (r < 0) + log_debug_errno(r, "Failed to search for files in %s, ignoring: %m", *p); + } + + files = hashmap_get_strv(fh); + if (!files) + return -ENOMEM; + + typesafe_qsort(files, hashmap_size(fh), base_cmp); + *strv = files; + + return 0; +} + +int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path) { + /* Insert a path into strv, at the place honouring the usual sorting rules: + * - we first compare by the basename + * - and then we compare by dirname, allowing just one file with the given + * basename. + * This means that we will + * - add a new entry if basename(path) was not on the list, + * - do nothing if an entry with higher priority was already present, + * - do nothing if our new entry matches the existing entry, + * - replace the existing entry if our new entry has higher priority. + */ + size_t i, n; + char *t; + int r; + + n = strv_length(*strv); + for (i = 0; i < n; i++) { + int c; + + c = base_cmp((char* const*) *strv + i, (char* const*) &path); + if (c == 0) { + char **dir; + + /* Oh, there already is an entry with a matching name (the last component). */ + + STRV_FOREACH(dir, dirs) { + _cleanup_free_ char *rdir = NULL; + char *p1, *p2; + + rdir = prefix_root(root, *dir); + if (!rdir) + return -ENOMEM; + + p1 = path_startswith((*strv)[i], rdir); + if (p1) + /* Existing entry with higher priority + * or same priority, no need to do anything. */ + return 0; + + p2 = path_startswith(path, *dir); + if (p2) { + /* Our new entry has higher priority */ + + t = prefix_root(root, path); + if (!t) + return log_oom(); + + return free_and_replace((*strv)[i], t); + } + } + + } else if (c > 0) + /* Following files have lower priority, let's go insert our + * new entry. */ + break; + + /* … we are not there yet, let's continue */ + } + + /* The new file has lower priority than all the existing entries */ + t = prefix_root(root, path); + if (!t) + return -ENOMEM; + + r = strv_insert(strv, i, t); + if (r < 0) + free(t); + + return r; +} + +int conf_files_list_strv(char ***strv, const char *suffix, const char *root, unsigned flags, const char* const* dirs) { + _cleanup_strv_free_ char **copy = NULL; + + assert(strv); + + copy = strv_copy((char**) dirs); + if (!copy) + return -ENOMEM; + + return conf_files_list_strv_internal(strv, suffix, root, flags, copy); +} + +int conf_files_list(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dir, ...) { + _cleanup_strv_free_ char **dirs = NULL; + va_list ap; + + assert(strv); + + va_start(ap, dir); + dirs = strv_new_ap(dir, ap); + va_end(ap); + + if (!dirs) + return -ENOMEM; + + return conf_files_list_strv_internal(strv, suffix, root, flags, dirs); +} + +int conf_files_list_nulstr(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dirs) { + _cleanup_strv_free_ char **d = NULL; + + assert(strv); + + d = strv_split_nulstr(dirs); + if (!d) + return -ENOMEM; + + return conf_files_list_strv_internal(strv, suffix, root, flags, d); +} + +int conf_files_list_with_replacement( + const char *root, + char **config_dirs, + const char *replacement, + char ***files, + char **replace_file) { + + _cleanup_strv_free_ char **f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + assert(config_dirs); + assert(files); + assert(replace_file || !replacement); + + r = conf_files_list_strv(&f, ".conf", root, 0, (const char* const*) config_dirs); + if (r < 0) + return log_error_errno(r, "Failed to enumerate config files: %m"); + + if (replacement) { + r = conf_files_insert(&f, root, config_dirs, replacement); + if (r < 0) + return log_error_errno(r, "Failed to extend config file list: %m"); + + p = prefix_root(root, replacement); + if (!p) + return log_oom(); + } + + *files = TAKE_PTR(f); + if (replace_file) + *replace_file = TAKE_PTR(p); + return 0; +} diff --git a/src/basic/conf-files.h b/src/basic/conf-files.h new file mode 100644 index 0000000..55ab326 --- /dev/null +++ b/src/basic/conf-files.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "macro.h" + +enum { + CONF_FILES_EXECUTABLE = 1 << 0, + CONF_FILES_REGULAR = 1 << 1, + CONF_FILES_DIRECTORY = 1 << 2, + CONF_FILES_BASENAME = 1 << 3, + CONF_FILES_FILTER_MASKED = 1 << 4, +}; + +int conf_files_list(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dir, ...) _sentinel_; +int conf_files_list_strv(char ***ret, const char *suffix, const char *root, unsigned flags, const char* const* dirs); +int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dirs); +int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path); +int conf_files_list_with_replacement( + const char *root, + char **config_dirs, + const char *replacement, + char ***files, + char **replace_file); diff --git a/src/basic/copy.c b/src/basic/copy.c new file mode 100644 index 0000000..46e02a3 --- /dev/null +++ b/src/basic/copy.c @@ -0,0 +1,936 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <sys/xattr.h> +#include <time.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "chattr-util.h" +#include "copy.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "macro.h" +#include "missing.h" +#include "mountpoint-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "umask-util.h" +#include "user-util.h" +#include "xattr-util.h" + +#define COPY_BUFFER_SIZE (16U*1024U) + +/* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the + * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in + * case of bind mount cycles and suchlike. */ +#define COPY_DEPTH_MAX 2048U + +static ssize_t try_copy_file_range( + int fd_in, loff_t *off_in, + int fd_out, loff_t *off_out, + size_t len, + unsigned flags) { + + static int have = -1; + ssize_t r; + + if (have == 0) + return -ENOSYS; + + r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags); + if (have < 0) + have = r >= 0 || errno != ENOSYS; + if (r < 0) + return -errno; + + return r; +} + +enum { + FD_IS_NO_PIPE, + FD_IS_BLOCKING_PIPE, + FD_IS_NONBLOCKING_PIPE, +}; + +static int fd_is_nonblock_pipe(int fd) { + struct stat st; + int flags; + + /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */ + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISFIFO(st.st_mode)) + return FD_IS_NO_PIPE; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE; +} + +int copy_bytes_full( + int fdf, int fdt, + uint64_t max_bytes, + CopyFlags copy_flags, + void **ret_remains, + size_t *ret_remains_size, + copy_progress_bytes_t progress, + void *userdata) { + + bool try_cfr = true, try_sendfile = true, try_splice = true; + int r, nonblock_pipe = -1; + size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */ + + assert(fdf >= 0); + assert(fdt >= 0); + + /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum + * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on + * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some + * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will + * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are + * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise + * these parameters if non-NULL are set to NULL. */ + + if (ret_remains) + *ret_remains = NULL; + if (ret_remains_size) + *ret_remains_size = 0; + + /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of + * source and destination first. */ + if ((copy_flags & COPY_REFLINK)) { + off_t foffset; + + foffset = lseek(fdf, 0, SEEK_CUR); + if (foffset >= 0) { + off_t toffset; + + toffset = lseek(fdt, 0, SEEK_CUR); + if (toffset >= 0) { + + if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX) + r = btrfs_reflink(fdf, fdt); /* full file reflink */ + else + r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */ + if (r >= 0) { + off_t t; + + /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */ + if (max_bytes == UINT64_MAX) { + + /* We cloned to the end of the source file, let's position the read + * pointer there, and query it at the same time. */ + t = lseek(fdf, 0, SEEK_END); + if (t < 0) + return -errno; + if (t < foffset) + return -ESPIPE; + + /* Let's adjust the destination file write pointer by the same number + * of bytes. */ + t = lseek(fdt, toffset + (t - foffset), SEEK_SET); + if (t < 0) + return -errno; + + return 0; /* we copied the whole thing, hence hit EOF, return 0 */ + } else { + t = lseek(fdf, foffset + max_bytes, SEEK_SET); + if (t < 0) + return -errno; + + t = lseek(fdt, toffset + max_bytes, SEEK_SET); + if (t < 0) + return -errno; + + return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */ + } + } + } + } + } + + for (;;) { + ssize_t n; + + if (max_bytes <= 0) + return 1; /* return > 0 if we hit the max_bytes limit */ + + if (max_bytes != UINT64_MAX && m > max_bytes) + m = max_bytes; + + /* First try copy_file_range(), unless we already tried */ + if (try_cfr) { + n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u); + if (n < 0) { + if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF)) + return n; + + try_cfr = false; + /* use fallback below */ + } else if (n == 0) /* EOF */ + break; + else + /* Success! */ + goto next; + } + + /* First try sendfile(), unless we already tried */ + if (try_sendfile) { + n = sendfile(fdt, fdf, NULL, m); + if (n < 0) { + if (!IN_SET(errno, EINVAL, ENOSYS)) + return -errno; + + try_sendfile = false; + /* use fallback below */ + } else if (n == 0) /* EOF */ + break; + else + /* Success! */ + goto next; + } + + /* Then try splice, unless we already tried. */ + if (try_splice) { + + /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file + * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the + * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and + * check if either of the specified fds are a pipe, and if so, let's pass the flag + * automatically, depending on O_NONBLOCK being set. + * + * Here's a twist though: when we use it to move data between two pipes of which one has + * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK + * behaviour. Hence in that case we can't use splice() and still guarantee systematic + * O_NONBLOCK behaviour, hence don't. */ + + if (nonblock_pipe < 0) { + int a, b; + + /* Check if either of these fds is a pipe, and if so non-blocking or not */ + a = fd_is_nonblock_pipe(fdf); + if (a < 0) + return a; + + b = fd_is_nonblock_pipe(fdt); + if (b < 0) + return b; + + if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) || + (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) || + (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE)) + + /* splice() only works if one of the fds is a pipe. If neither is, let's skip + * this step right-away. As mentioned above, if one of the two fds refers to a + * blocking pipe and the other to a non-blocking pipe, we can't use splice() + * either, hence don't try either. This hence means we can only use splice() if + * either only one of the two fds is a pipe, or if both are pipes with the same + * nonblocking flag setting. */ + + try_splice = false; + else + nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE; + } + } + + if (try_splice) { + n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0); + if (n < 0) { + if (!IN_SET(errno, EINVAL, ENOSYS)) + return -errno; + + try_splice = false; + /* use fallback below */ + } else if (n == 0) /* EOF */ + break; + else + /* Success! */ + goto next; + } + + /* As a fallback just copy bits by hand */ + { + uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf; + ssize_t z; + + n = read(fdf, buf, sizeof buf); + if (n < 0) + return -errno; + if (n == 0) /* EOF */ + break; + + z = (size_t) n; + do { + ssize_t k; + + k = write(fdt, p, z); + if (k < 0) { + r = -errno; + + if (ret_remains) { + void *copy; + + copy = memdup(p, z); + if (!copy) + return -ENOMEM; + + *ret_remains = copy; + } + + if (ret_remains_size) + *ret_remains_size = z; + + return r; + } + + assert(k <= z); + z -= k; + p += k; + } while (z > 0); + } + + next: + if (progress) { + r = progress(n, userdata); + if (r < 0) + return r; + } + + if (max_bytes != (uint64_t) -1) { + assert(max_bytes >= (uint64_t) n); + max_bytes -= n; + } + + /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, + * so reduce our maximum by the amount we already copied, + * but don't go below our copy buffer size, unless we are + * close the limit of bytes we are allowed to copy. */ + m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n); + } + + return 0; /* return 0 if we hit EOF earlier than the size limit */ +} + +static int fd_copy_symlink( + int df, + const char *from, + const struct stat *st, + int dt, + const char *to, + uid_t override_uid, + gid_t override_gid, + CopyFlags copy_flags) { + + _cleanup_free_ char *target = NULL; + int r; + + assert(from); + assert(st); + assert(to); + + r = readlinkat_malloc(df, from, &target); + if (r < 0) + return r; + + if (symlinkat(target, dt, to) < 0) + return -errno; + + if (fchownat(dt, to, + uid_is_valid(override_uid) ? override_uid : st->st_uid, + gid_is_valid(override_gid) ? override_gid : st->st_gid, + AT_SYMLINK_NOFOLLOW) < 0) + return -errno; + + return 0; +} + +static int fd_copy_regular( + int df, + const char *from, + const struct stat *st, + int dt, + const char *to, + uid_t override_uid, + gid_t override_gid, + CopyFlags copy_flags, + copy_progress_bytes_t progress, + void *userdata) { + + _cleanup_close_ int fdf = -1, fdt = -1; + struct timespec ts[2]; + int r, q; + + assert(from); + assert(st); + assert(to); + + fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fdf < 0) + return -errno; + + fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777); + if (fdt < 0) + return -errno; + + r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress, userdata); + if (r < 0) { + (void) unlinkat(dt, to, 0); + return r; + } + + if (fchown(fdt, + uid_is_valid(override_uid) ? override_uid : st->st_uid, + gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0) + r = -errno; + + if (fchmod(fdt, st->st_mode & 07777) < 0) + r = -errno; + + ts[0] = st->st_atim; + ts[1] = st->st_mtim; + (void) futimens(fdt, ts); + (void) copy_xattr(fdf, fdt); + + q = close(fdt); + fdt = -1; + + if (q < 0) { + r = -errno; + (void) unlinkat(dt, to, 0); + } + + return r; +} + +static int fd_copy_fifo( + int df, + const char *from, + const struct stat *st, + int dt, + const char *to, + uid_t override_uid, + gid_t override_gid, + CopyFlags copy_flags) { + int r; + + assert(from); + assert(st); + assert(to); + + r = mkfifoat(dt, to, st->st_mode & 07777); + if (r < 0) + return -errno; + + if (fchownat(dt, to, + uid_is_valid(override_uid) ? override_uid : st->st_uid, + gid_is_valid(override_gid) ? override_gid : st->st_gid, + AT_SYMLINK_NOFOLLOW) < 0) + r = -errno; + + if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0) + r = -errno; + + return r; +} + +static int fd_copy_node( + int df, + const char *from, + const struct stat *st, + int dt, + const char *to, + uid_t override_uid, + gid_t override_gid, + CopyFlags copy_flags) { + int r; + + assert(from); + assert(st); + assert(to); + + r = mknodat(dt, to, st->st_mode, st->st_rdev); + if (r < 0) + return -errno; + + if (fchownat(dt, to, + uid_is_valid(override_uid) ? override_uid : st->st_uid, + gid_is_valid(override_gid) ? override_gid : st->st_gid, + AT_SYMLINK_NOFOLLOW) < 0) + r = -errno; + + if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0) + r = -errno; + + return r; +} + +static int fd_copy_directory( + int df, + const char *from, + const struct stat *st, + int dt, + const char *to, + dev_t original_device, + unsigned depth_left, + uid_t override_uid, + gid_t override_gid, + CopyFlags copy_flags, + const char *display_path, + copy_progress_path_t progress_path, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + _cleanup_close_ int fdf = -1, fdt = -1; + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + bool exists, created; + int r; + + assert(st); + assert(to); + + if (depth_left == 0) + return -ENAMETOOLONG; + + if (from) + fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + else + fdf = fcntl(df, F_DUPFD_CLOEXEC, 3); + if (fdf < 0) + return -errno; + + d = fdopendir(fdf); + if (!d) + return -errno; + fdf = -1; + + exists = false; + if (copy_flags & COPY_MERGE_EMPTY) { + r = dir_is_empty_at(dt, to); + if (r < 0 && r != -ENOENT) + return r; + else if (r == 1) + exists = true; + } + + if (exists) + created = false; + else { + r = mkdirat(dt, to, st->st_mode & 07777); + if (r >= 0) + created = true; + else if (errno == EEXIST && (copy_flags & COPY_MERGE)) + created = false; + else + return -errno; + } + + fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fdt < 0) + return -errno; + + r = 0; + + FOREACH_DIRENT_ALL(de, d, return -errno) { + const char *child_display_path = NULL; + _cleanup_free_ char *dp = NULL; + struct stat buf; + int q; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) { + r = -errno; + continue; + } + + if (progress_path) { + if (display_path) + child_display_path = dp = strjoin(display_path, "/", de->d_name); + else + child_display_path = de->d_name; + + r = progress_path(child_display_path, &buf, userdata); + if (r < 0) + return r; + } + + if (S_ISDIR(buf.st_mode)) { + /* + * Don't descend into directories on other file systems, if this is requested. We do a simple + * .st_dev check here, which basically comes for free. Note that we do this check only on + * directories, not other kind of file system objects, for two reason: + * + * • The kernel's overlayfs pseudo file system that overlays multiple real file systems + * propagates the .st_dev field of the file system a file originates from all the way up + * through the stack to stat(). It doesn't do that for directories however. This means that + * comparing .st_dev on non-directories suggests that they all are mount points. To avoid + * confusion we hence avoid relying on this check for regular files. + * + * • The main reason we do this check at all is to protect ourselves from bind mount cycles, + * where we really want to avoid descending down in all eternity. However the .st_dev check + * is usually not sufficient for this protection anyway, as bind mount cycles from the same + * file system onto itself can't be detected that way. (Note we also do a recursion depth + * check, which is probably the better protection in this regard, which is why + * COPY_SAME_MOUNT is optional). + */ + + if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) { + if (buf.st_dev != original_device) + continue; + + r = fd_is_mount_point(dirfd(d), de->d_name, 0); + if (r < 0) + return r; + if (r > 0) + continue; + } + + q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, child_display_path, progress_path, progress_bytes, userdata); + } else if (S_ISREG(buf.st_mode)) + q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, progress_bytes, userdata); + else if (S_ISLNK(buf.st_mode)) + q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags); + else if (S_ISFIFO(buf.st_mode)) + q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags); + else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode)) + q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags); + else + q = -EOPNOTSUPP; + + if (q == -EEXIST && (copy_flags & COPY_MERGE)) + q = 0; + + if (q < 0) + r = q; + } + + if (created) { + struct timespec ut[2] = { + st->st_atim, + st->st_mtim + }; + + if (fchown(fdt, + uid_is_valid(override_uid) ? override_uid : st->st_uid, + gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0) + r = -errno; + + if (fchmod(fdt, st->st_mode & 07777) < 0) + r = -errno; + + (void) copy_xattr(dirfd(d), fdt); + (void) futimens(fdt, ut); + } + + return r; +} + +int copy_tree_at_full( + int fdf, + const char *from, + int fdt, + const char *to, + uid_t override_uid, + gid_t override_gid, + CopyFlags copy_flags, + copy_progress_path_t progress_path, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + struct stat st; + + assert(from); + assert(to); + + if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0) + return -errno; + + if (S_ISREG(st.st_mode)) + return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, progress_bytes, userdata); + else if (S_ISDIR(st.st_mode)) + return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, progress_path, progress_bytes, userdata); + else if (S_ISLNK(st.st_mode)) + return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags); + else if (S_ISFIFO(st.st_mode)) + return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags); + else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode)) + return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags); + else + return -EOPNOTSUPP; +} + +int copy_directory_fd_full( + int dirfd, + const char *to, + CopyFlags copy_flags, + copy_progress_path_t progress_path, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + struct stat st; + + assert(dirfd >= 0); + assert(to); + + if (fstat(dirfd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -ENOTDIR; + + return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, progress_path, progress_bytes, userdata); +} + +int copy_directory_full( + const char *from, + const char *to, + CopyFlags copy_flags, + copy_progress_path_t progress_path, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + struct stat st; + + assert(from); + assert(to); + + if (lstat(from, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -ENOTDIR; + + return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, progress_path, progress_bytes, userdata); +} + +int copy_file_fd_full( + const char *from, + int fdt, + CopyFlags copy_flags, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + _cleanup_close_ int fdf = -1; + int r; + + assert(from); + assert(fdt >= 0); + + fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fdf < 0) + return -errno; + + r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata); + + (void) copy_times(fdf, fdt); + (void) copy_xattr(fdf, fdt); + + return r; +} + +int copy_file_full( + const char *from, + const char *to, + int flags, + mode_t mode, + unsigned chattr_flags, + CopyFlags copy_flags, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + int fdt = -1, r; + + assert(from); + assert(to); + + RUN_WITH_UMASK(0000) { + fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, mode); + if (fdt < 0) + return -errno; + } + + if (chattr_flags != 0) + (void) chattr_fd(fdt, chattr_flags, (unsigned) -1, NULL); + + r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata); + if (r < 0) { + close(fdt); + (void) unlink(to); + return r; + } + + if (close(fdt) < 0) { + unlink_noerrno(to); + return -errno; + } + + return 0; +} + +int copy_file_atomic_full( + const char *from, + const char *to, + mode_t mode, + unsigned chattr_flags, + CopyFlags copy_flags, + copy_progress_bytes_t progress_bytes, + void *userdata) { + + _cleanup_(unlink_and_freep) char *t = NULL; + _cleanup_close_ int fdt = -1; + int r; + + assert(from); + assert(to); + + /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not + * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system + * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file + * system right-away and unconditionally which we then can renameat() to the right name after we completed + * writing it. */ + + if (copy_flags & COPY_REPLACE) { + r = tempfn_random(to, NULL, &t); + if (r < 0) + return r; + + fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600); + if (fdt < 0) { + t = mfree(t); + return -errno; + } + } else { + fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t); + if (fdt < 0) + return fdt; + } + + if (chattr_flags != 0) + (void) chattr_fd(fdt, chattr_flags, (unsigned) -1, NULL); + + r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata); + if (r < 0) + return r; + + if (fchmod(fdt, mode) < 0) + return -errno; + + if (copy_flags & COPY_REPLACE) { + if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0) + return -errno; + } else { + r = link_tmpfile(fdt, t, to); + if (r < 0) + return r; + } + + t = mfree(t); + return 0; +} + +int copy_times(int fdf, int fdt) { + struct timespec ut[2]; + struct stat st; + usec_t crtime = 0; + + assert(fdf >= 0); + assert(fdt >= 0); + + if (fstat(fdf, &st) < 0) + return -errno; + + ut[0] = st.st_atim; + ut[1] = st.st_mtim; + + if (futimens(fdt, ut) < 0) + return -errno; + + if (fd_getcrtime(fdf, &crtime) >= 0) + (void) fd_setcrtime(fdt, crtime); + + return 0; +} + +int copy_xattr(int fdf, int fdt) { + _cleanup_free_ char *bufa = NULL, *bufb = NULL; + size_t sza = 100, szb = 100; + ssize_t n; + int ret = 0; + const char *p; + + for (;;) { + bufa = malloc(sza); + if (!bufa) + return -ENOMEM; + + n = flistxattr(fdf, bufa, sza); + if (n == 0) + return 0; + if (n > 0) + break; + if (errno != ERANGE) + return -errno; + + sza *= 2; + + bufa = mfree(bufa); + } + + p = bufa; + while (n > 0) { + size_t l; + + l = strlen(p); + assert(l < (size_t) n); + + if (startswith(p, "user.")) { + ssize_t m; + + if (!bufb) { + bufb = malloc(szb); + if (!bufb) + return -ENOMEM; + } + + m = fgetxattr(fdf, p, bufb, szb); + if (m < 0) { + if (errno == ERANGE) { + szb *= 2; + bufb = mfree(bufb); + continue; + } + + return -errno; + } + + if (fsetxattr(fdt, p, bufb, m, 0) < 0) + ret = -errno; + } + + p += l + 1; + n -= l + 1; + } + + return ret; +} diff --git a/src/basic/copy.h b/src/basic/copy.h new file mode 100644 index 0000000..f677021 --- /dev/null +++ b/src/basic/copy.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <fcntl.h> +#include <inttypes.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/stat.h> +#include <sys/types.h> + +typedef enum CopyFlags { + COPY_REFLINK = 1 << 0, /* Try to reflink */ + COPY_MERGE = 1 << 1, /* Merge existing trees with our new one to copy */ + COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */ + COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */ + COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */ +} CopyFlags; + +typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata); +typedef int (*copy_progress_path_t)(const char *path, const struct stat *st, void *userdata); + +int copy_file_fd_full(const char *from, int to, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata); +static inline int copy_file_fd(const char *from, int to, CopyFlags copy_flags) { + return copy_file_fd_full(from, to, copy_flags, NULL, NULL); +} + +int copy_file_full(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata); +static inline int copy_file(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) { + return copy_file_full(from, to, open_flags, mode, chattr_flags, copy_flags, NULL, NULL); +} + +int copy_file_atomic_full(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata); +static inline int copy_file_atomic(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) { + return copy_file_atomic_full(from, to, mode, chattr_flags, copy_flags, NULL, NULL); +} + +int copy_tree_at_full(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata); +static inline int copy_tree_at(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) { + return copy_tree_at_full(fdf, from, fdt, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL); +} +static inline int copy_tree(const char *from, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) { + return copy_tree_at_full(AT_FDCWD, from, AT_FDCWD, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL); +} + +int copy_directory_fd_full(int dirfd, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata); +static inline int copy_directory_fd(int dirfd, const char *to, CopyFlags copy_flags) { + return copy_directory_fd_full(dirfd, to, copy_flags, NULL, NULL, NULL); +} + +int copy_directory_full(const char *from, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata); +static inline int copy_directory(const char *from, const char *to, CopyFlags copy_flags) { + return copy_directory_full(from, to, copy_flags, NULL, NULL, NULL); +} + +int copy_bytes_full(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags, void **ret_remains, size_t *ret_remains_size, copy_progress_bytes_t progress, void *userdata); +static inline int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) { + return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL); +} + +int copy_times(int fdf, int fdt); +int copy_xattr(int fdf, int fdt); diff --git a/src/basic/def.h b/src/basic/def.h new file mode 100644 index 0000000..5be018d --- /dev/null +++ b/src/basic/def.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "util.h" + +#define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC) +#define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC) +#define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC) + +#define DEFAULT_START_LIMIT_INTERVAL (10*USEC_PER_SEC) +#define DEFAULT_START_LIMIT_BURST 5 + +/* The default time after which exit-on-idle services exit. This + * should be kept lower than the watchdog timeout, because otherwise + * the watchdog pings will keep the loop busy. */ +#define DEFAULT_EXIT_USEC (30*USEC_PER_SEC) + +/* The default value for the net.unix.max_dgram_qlen sysctl */ +#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL + +#define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT +#define SIGNALS_IGNORE SIGPIPE + +#if HAVE_SPLIT_USR +#define KBD_KEYMAP_DIRS \ + "/usr/share/keymaps/\0" \ + "/usr/share/kbd/keymaps/\0" \ + "/usr/lib/kbd/keymaps/\0" \ + "/lib/kbd/keymaps/\0" +#else +#define KBD_KEYMAP_DIRS \ + "/usr/share/keymaps/\0" \ + "/usr/share/kbd/keymaps/\0" \ + "/usr/lib/kbd/keymaps/\0" +#endif + +/* Note that we use the new /run prefix here (instead of /var/run) since we require them to be aliases and that way we + * become independent of /var being mounted */ +#define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket" +#define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus" + +#define PLYMOUTH_SOCKET { \ + .un.sun_family = AF_UNIX, \ + .un.sun_path = "\0/org/freedesktop/plymouthd", \ + } + +#define NOTIFY_FD_MAX 768 +#define NOTIFY_BUFFER_MAX PIPE_BUF + +#if HAVE_SPLIT_USR +# define _CONF_PATHS_SPLIT_USR_NULSTR(n) "/lib/" n "\0" +# define _CONF_PATHS_SPLIT_USR(n) , "/lib/" n +#else +# define _CONF_PATHS_SPLIT_USR_NULSTR(n) +# define _CONF_PATHS_SPLIT_USR(n) +#endif + +/* Return a nulstr for a standard cascade of configuration paths, + * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr() + * to implement drop-in directories for extending configuration + * files. */ +#define CONF_PATHS_NULSTR(n) \ + "/etc/" n "\0" \ + "/run/" n "\0" \ + "/usr/local/lib/" n "\0" \ + "/usr/lib/" n "\0" \ + _CONF_PATHS_SPLIT_USR_NULSTR(n) + +#define CONF_PATHS_STRV(n) \ + STRV_MAKE( \ + "/etc/" n, \ + "/run/" n, \ + "/usr/local/lib/" n, \ + "/usr/lib/" n \ + _CONF_PATHS_SPLIT_USR(n)) + +#define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL) diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c new file mode 100644 index 0000000..5fcdf24 --- /dev/null +++ b/src/basic/device-nodes.c @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include "device-nodes.h" +#include "utf8.h" + +int whitelisted_char_for_devnode(char c, const char *white) { + + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + strchr("#+-.:=@_", c) != NULL || + (white != NULL && strchr(white, c) != NULL)) + return 1; + + return 0; +} + +int encode_devnode_name(const char *str, char *str_enc, size_t len) { + size_t i, j; + + if (!str || !str_enc) + return -EINVAL; + + for (i = 0, j = 0; str[i] != '\0'; i++) { + int seqlen; + + seqlen = utf8_encoded_valid_unichar(&str[i]); + if (seqlen > 1) { + + if (len-j < (size_t)seqlen) + return -EINVAL; + + memcpy(&str_enc[j], &str[i], seqlen); + j += seqlen; + i += (seqlen-1); + + } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) { + + if (len-j < 4) + return -EINVAL; + + sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]); + j += 4; + + } else { + if (len-j < 1) + return -EINVAL; + + str_enc[j] = str[i]; + j++; + } + } + + if (len-j < 1) + return -EINVAL; + + str_enc[j] = '\0'; + return 0; +} diff --git a/src/basic/device-nodes.h b/src/basic/device-nodes.h new file mode 100644 index 0000000..3840e6d --- /dev/null +++ b/src/basic/device-nodes.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stddef.h> +#include <sys/types.h> + +#include "macro.h" +#include "stdio-util.h" + +int encode_devnode_name(const char *str, char *str_enc, size_t len); +int whitelisted_char_for_devnode(char c, const char *additional); + +#define DEV_NUM_PATH_MAX \ + (STRLEN("/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t)) +#define xsprintf_dev_num_path(buf, type, devno) \ + xsprintf(buf, "/dev/%s/%u:%u", type, major(devno), minor(devno)) diff --git a/src/basic/dirent-util.c b/src/basic/dirent-util.c new file mode 100644 index 0000000..d1d2c0e --- /dev/null +++ b/src/basic/dirent-util.c @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <fcntl.h> +#include <sys/stat.h> + +#include "dirent-util.h" +#include "path-util.h" +#include "string-util.h" + +int dirent_ensure_type(DIR *d, struct dirent *de) { + struct stat st; + + assert(d); + assert(de); + + if (de->d_type != DT_UNKNOWN) + return 0; + + if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) + return -errno; + + de->d_type = + S_ISREG(st.st_mode) ? DT_REG : + S_ISDIR(st.st_mode) ? DT_DIR : + S_ISLNK(st.st_mode) ? DT_LNK : + S_ISFIFO(st.st_mode) ? DT_FIFO : + S_ISSOCK(st.st_mode) ? DT_SOCK : + S_ISCHR(st.st_mode) ? DT_CHR : + S_ISBLK(st.st_mode) ? DT_BLK : + DT_UNKNOWN; + + return 0; +} + +bool dirent_is_file(const struct dirent *de) { + assert(de); + + if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN)) + return false; + + if (hidden_or_backup_file(de->d_name)) + return false; + + return true; +} + +bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) { + assert(de); + + if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN)) + return false; + + if (de->d_name[0] == '.') + return false; + + if (!suffix) + return true; + + return endswith(de->d_name, suffix); +} + +struct dirent* readdir_no_dot(DIR *dirp) { + struct dirent* d; + + for (;;) { + d = readdir(dirp); + if (d && dot_or_dot_dot(d->d_name)) + continue; + return d; + } +} diff --git a/src/basic/dirent-util.h b/src/basic/dirent-util.h new file mode 100644 index 0000000..b1b8767 --- /dev/null +++ b/src/basic/dirent-util.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <dirent.h> +#include <errno.h> +#include <stdbool.h> + +#include "macro.h" +#include "path-util.h" + +int dirent_ensure_type(DIR *d, struct dirent *de); + +bool dirent_is_file(const struct dirent *de) _pure_; +bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) _pure_; + +struct dirent* readdir_no_dot(DIR *dirp); + +#define FOREACH_DIRENT(de, d, on_error) \ + for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \ + if (!de) { \ + if (errno > 0) { \ + on_error; \ + } \ + break; \ + } else if (hidden_or_backup_file((de)->d_name)) \ + continue; \ + else + +#define FOREACH_DIRENT_ALL(de, d, on_error) \ + for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \ + if (!de) { \ + if (errno > 0) { \ + on_error; \ + } \ + break; \ + } else diff --git a/src/basic/env-file.c b/src/basic/env-file.c new file mode 100644 index 0000000..7f10f9a --- /dev/null +++ b/src/basic/env-file.c @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdio_ext.h> + +#include "alloc-util.h" +#include "env-file.h" +#include "env-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "utf8.h" + +static int parse_env_file_internal( + FILE *f, + const char *fname, + int (*push) (const char *filename, unsigned line, + const char *key, char *value, void *userdata, int *n_pushed), + void *userdata, + int *n_pushed) { + + size_t key_alloc = 0, n_key = 0, value_alloc = 0, n_value = 0, last_value_whitespace = (size_t) -1, last_key_whitespace = (size_t) -1; + _cleanup_free_ char *contents = NULL, *key = NULL, *value = NULL; + unsigned line = 1; + char *p; + int r; + + enum { + PRE_KEY, + KEY, + PRE_VALUE, + VALUE, + VALUE_ESCAPE, + SINGLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE_ESCAPE, + COMMENT, + COMMENT_ESCAPE + } state = PRE_KEY; + + if (f) + r = read_full_stream(f, &contents, NULL); + else + r = read_full_file(fname, &contents, NULL); + if (r < 0) + return r; + + for (p = contents; *p; p++) { + char c = *p; + + switch (state) { + + case PRE_KEY: + if (strchr(COMMENTS, c)) + state = COMMENT; + else if (!strchr(WHITESPACE, c)) { + state = KEY; + last_key_whitespace = (size_t) -1; + + if (!GREEDY_REALLOC(key, key_alloc, n_key+2)) + return -ENOMEM; + + key[n_key++] = c; + } + break; + + case KEY: + if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + n_key = 0; + } else if (c == '=') { + state = PRE_VALUE; + last_value_whitespace = (size_t) -1; + } else { + if (!strchr(WHITESPACE, c)) + last_key_whitespace = (size_t) -1; + else if (last_key_whitespace == (size_t) -1) + last_key_whitespace = n_key; + + if (!GREEDY_REALLOC(key, key_alloc, n_key+2)) + return -ENOMEM; + + key[n_key++] = c; + } + + break; + + case PRE_VALUE: + if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + key[n_key] = 0; + + if (value) + value[n_value] = 0; + + /* strip trailing whitespace from key */ + if (last_key_whitespace != (size_t) -1) + key[last_key_whitespace] = 0; + + r = push(fname, line, key, value, userdata, n_pushed); + if (r < 0) + return r; + + n_key = 0; + value = NULL; + value_alloc = n_value = 0; + + } else if (c == '\'') + state = SINGLE_QUOTE_VALUE; + else if (c == '"') + state = DOUBLE_QUOTE_VALUE; + else if (c == '\\') + state = VALUE_ESCAPE; + else if (!strchr(WHITESPACE, c)) { + state = VALUE; + + if (!GREEDY_REALLOC(value, value_alloc, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case VALUE: + if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + + key[n_key] = 0; + + if (value) + value[n_value] = 0; + + /* Chomp off trailing whitespace from value */ + if (last_value_whitespace != (size_t) -1) + value[last_value_whitespace] = 0; + + /* strip trailing whitespace from key */ + if (last_key_whitespace != (size_t) -1) + key[last_key_whitespace] = 0; + + r = push(fname, line, key, value, userdata, n_pushed); + if (r < 0) + return r; + + n_key = 0; + value = NULL; + value_alloc = n_value = 0; + + } else if (c == '\\') { + state = VALUE_ESCAPE; + last_value_whitespace = (size_t) -1; + } else { + if (!strchr(WHITESPACE, c)) + last_value_whitespace = (size_t) -1; + else if (last_value_whitespace == (size_t) -1) + last_value_whitespace = n_value; + + if (!GREEDY_REALLOC(value, value_alloc, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case VALUE_ESCAPE: + state = VALUE; + + if (!strchr(NEWLINE, c)) { + /* Escaped newlines we eat up entirely */ + if (!GREEDY_REALLOC(value, value_alloc, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + break; + + case SINGLE_QUOTE_VALUE: + if (c == '\'') + state = PRE_VALUE; + else { + if (!GREEDY_REALLOC(value, value_alloc, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case DOUBLE_QUOTE_VALUE: + if (c == '"') + state = PRE_VALUE; + else if (c == '\\') + state = DOUBLE_QUOTE_VALUE_ESCAPE; + else { + if (!GREEDY_REALLOC(value, value_alloc, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case DOUBLE_QUOTE_VALUE_ESCAPE: + state = DOUBLE_QUOTE_VALUE; + + if (c == '"') { + if (!GREEDY_REALLOC(value, value_alloc, n_value+2)) + return -ENOMEM; + value[n_value++] = '"'; + } else if (!strchr(NEWLINE, c)) { + if (!GREEDY_REALLOC(value, value_alloc, n_value+3)) + return -ENOMEM; + value[n_value++] = '\\'; + value[n_value++] = c; + } + + break; + + case COMMENT: + if (c == '\\') + state = COMMENT_ESCAPE; + else if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + } + break; + + case COMMENT_ESCAPE: + state = COMMENT; + break; + } + } + + if (IN_SET(state, + PRE_VALUE, + VALUE, + VALUE_ESCAPE, + SINGLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE_ESCAPE)) { + + key[n_key] = 0; + + if (value) + value[n_value] = 0; + + if (state == VALUE) + if (last_value_whitespace != (size_t) -1) + value[last_value_whitespace] = 0; + + /* strip trailing whitespace from key */ + if (last_key_whitespace != (size_t) -1) + key[last_key_whitespace] = 0; + + r = push(fname, line, key, value, userdata, n_pushed); + if (r < 0) + return r; + + value = NULL; + } + + return 0; +} + +static int check_utf8ness_and_warn( + const char *filename, unsigned line, + const char *key, char *value) { + + if (!utf8_is_valid(key)) { + _cleanup_free_ char *p = NULL; + + p = utf8_escape_invalid(key); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "%s:%u: invalid UTF-8 in key '%s', ignoring.", + strna(filename), line, p); + } + + if (value && !utf8_is_valid(value)) { + _cleanup_free_ char *p = NULL; + + p = utf8_escape_invalid(value); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.", + strna(filename), line, key, p); + } + + return 0; +} + +static int parse_env_file_push( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata, + int *n_pushed) { + + const char *k; + va_list aq, *ap = userdata; + int r; + + r = check_utf8ness_and_warn(filename, line, key, value); + if (r < 0) + return r; + + va_copy(aq, *ap); + + while ((k = va_arg(aq, const char *))) { + char **v; + + v = va_arg(aq, char **); + + if (streq(key, k)) { + va_end(aq); + free(*v); + *v = value; + + if (n_pushed) + (*n_pushed)++; + + return 1; + } + } + + va_end(aq); + free(value); + + return 0; +} + +int parse_env_filev( + FILE *f, + const char *fname, + va_list ap) { + + int r, n_pushed = 0; + va_list aq; + + va_copy(aq, ap); + r = parse_env_file_internal(f, fname, parse_env_file_push, &aq, &n_pushed); + va_end(aq); + if (r < 0) + return r; + + return n_pushed; +} + +int parse_env_file_sentinel( + FILE *f, + const char *fname, + ...) { + + va_list ap; + int r; + + va_start(ap, fname); + r = parse_env_filev(f, fname, ap); + va_end(ap); + + return r; +} + +static int load_env_file_push( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata, + int *n_pushed) { + char ***m = userdata; + char *p; + int r; + + r = check_utf8ness_and_warn(filename, line, key, value); + if (r < 0) + return r; + + p = strjoin(key, "=", value); + if (!p) + return -ENOMEM; + + r = strv_env_replace(m, p); + if (r < 0) { + free(p); + return r; + } + + if (n_pushed) + (*n_pushed)++; + + free(value); + return 0; +} + +int load_env_file(FILE *f, const char *fname, char ***rl) { + char **m = NULL; + int r; + + r = parse_env_file_internal(f, fname, load_env_file_push, &m, NULL); + if (r < 0) { + strv_free(m); + return r; + } + + *rl = m; + return 0; +} + +static int load_env_file_push_pairs( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata, + int *n_pushed) { + char ***m = userdata; + int r; + + r = check_utf8ness_and_warn(filename, line, key, value); + if (r < 0) + return r; + + r = strv_extend(m, key); + if (r < 0) + return -ENOMEM; + + if (!value) { + r = strv_extend(m, ""); + if (r < 0) + return -ENOMEM; + } else { + r = strv_push(m, value); + if (r < 0) + return r; + } + + if (n_pushed) + (*n_pushed)++; + + return 0; +} + +int load_env_file_pairs(FILE *f, const char *fname, char ***rl) { + char **m = NULL; + int r; + + r = parse_env_file_internal(f, fname, load_env_file_push_pairs, &m, NULL); + if (r < 0) { + strv_free(m); + return r; + } + + *rl = m; + return 0; +} + +static int merge_env_file_push( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata, + int *n_pushed) { + + char ***env = userdata; + char *expanded_value; + + assert(env); + + if (!value) { + log_error("%s:%u: invalid syntax (around \"%s\"), ignoring.", strna(filename), line, key); + return 0; + } + + if (!env_name_is_valid(key)) { + log_error("%s:%u: invalid variable name \"%s\", ignoring.", strna(filename), line, key); + free(value); + return 0; + } + + expanded_value = replace_env(value, *env, + REPLACE_ENV_USE_ENVIRONMENT| + REPLACE_ENV_ALLOW_BRACELESS| + REPLACE_ENV_ALLOW_EXTENDED); + if (!expanded_value) + return -ENOMEM; + + free_and_replace(value, expanded_value); + + return load_env_file_push(filename, line, key, value, env, n_pushed); +} + +int merge_env_file( + char ***env, + FILE *f, + const char *fname) { + + /* NOTE: this function supports braceful and braceless variable expansions, + * plus "extended" substitutions, unlike other exported parsing functions. + */ + + return parse_env_file_internal(f, fname, merge_env_file_push, env, NULL); +} + +static void write_env_var(FILE *f, const char *v) { + const char *p; + + p = strchr(v, '='); + if (!p) { + /* Fallback */ + fputs_unlocked(v, f); + fputc_unlocked('\n', f); + return; + } + + p++; + fwrite_unlocked(v, 1, p-v, f); + + if (string_has_cc(p, NULL) || chars_intersect(p, WHITESPACE SHELL_NEED_QUOTES)) { + fputc_unlocked('"', f); + + for (; *p; p++) { + if (strchr(SHELL_NEED_ESCAPE, *p)) + fputc_unlocked('\\', f); + + fputc_unlocked(*p, f); + } + + fputc_unlocked('"', f); + } else + fputs_unlocked(p, f); + + fputc_unlocked('\n', f); +} + +int write_env_file(const char *fname, char **l) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + char **i; + int r; + + assert(fname); + + r = fopen_temporary(fname, &f, &p); + if (r < 0) + return r; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + (void) fchmod_umask(fileno(f), 0644); + + STRV_FOREACH(i, l) + write_env_var(f, *i); + + r = fflush_and_check(f); + if (r >= 0) { + if (rename(p, fname) >= 0) + return 0; + + r = -errno; + } + + unlink(p); + return r; +} diff --git a/src/basic/env-file.h b/src/basic/env-file.h new file mode 100644 index 0000000..e1ca195 --- /dev/null +++ b/src/basic/env-file.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdarg.h> +#include <stdio.h> + +#include "macro.h" + +int parse_env_filev(FILE *f, const char *fname, va_list ap); +int parse_env_file_sentinel(FILE *f, const char *fname, ...) _sentinel_; +#define parse_env_file(f, fname, ...) parse_env_file_sentinel(f, fname, __VA_ARGS__, NULL) +int load_env_file(FILE *f, const char *fname, char ***l); +int load_env_file_pairs(FILE *f, const char *fname, char ***l); + +int merge_env_file(char ***env, FILE *f, const char *fname); + +int write_env_file(const char *fname, char **l); diff --git a/src/basic/env-util.c b/src/basic/env-util.c new file mode 100644 index 0000000..fd449dc --- /dev/null +++ b/src/basic/env-util.c @@ -0,0 +1,752 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "env-util.h" +#include "escape.h" +#include "extract-word.h" +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +#define VALID_CHARS_ENV_NAME \ + DIGITS LETTERS \ + "_" + +static bool env_name_is_valid_n(const char *e, size_t n) { + const char *p; + + if (!e) + return false; + + if (n <= 0) + return false; + + if (e[0] >= '0' && e[0] <= '9') + return false; + + /* POSIX says the overall size of the environment block cannot + * be > ARG_MAX, an individual assignment hence cannot be + * either. Discounting the equal sign and trailing NUL this + * hence leaves ARG_MAX-2 as longest possible variable + * name. */ + if (n > (size_t) sysconf(_SC_ARG_MAX) - 2) + return false; + + for (p = e; p < e + n; p++) + if (!strchr(VALID_CHARS_ENV_NAME, *p)) + return false; + + return true; +} + +bool env_name_is_valid(const char *e) { + if (!e) + return false; + + return env_name_is_valid_n(e, strlen(e)); +} + +bool env_value_is_valid(const char *e) { + if (!e) + return false; + + if (!utf8_is_valid(e)) + return false; + + /* bash allows tabs and newlines in environment variables, and so + * should we */ + if (string_has_cc(e, "\t\n")) + return false; + + /* POSIX says the overall size of the environment block cannot + * be > ARG_MAX, an individual assignment hence cannot be + * either. Discounting the shortest possible variable name of + * length 1, the equal sign and trailing NUL this hence leaves + * ARG_MAX-3 as longest possible variable value. */ + if (strlen(e) > (size_t) sysconf(_SC_ARG_MAX) - 3) + return false; + + return true; +} + +bool env_assignment_is_valid(const char *e) { + const char *eq; + + eq = strchr(e, '='); + if (!eq) + return false; + + if (!env_name_is_valid_n(e, eq - e)) + return false; + + if (!env_value_is_valid(eq + 1)) + return false; + + /* POSIX says the overall size of the environment block cannot + * be > ARG_MAX, hence the individual variable assignments + * cannot be either, but let's leave room for one trailing NUL + * byte. */ + if (strlen(e) > (size_t) sysconf(_SC_ARG_MAX) - 1) + return false; + + return true; +} + +bool strv_env_is_valid(char **e) { + char **p, **q; + + STRV_FOREACH(p, e) { + size_t k; + + if (!env_assignment_is_valid(*p)) + return false; + + /* Check if there are duplicate assignments */ + k = strcspn(*p, "="); + STRV_FOREACH(q, p + 1) + if (strneq(*p, *q, k) && (*q)[k] == '=') + return false; + } + + return true; +} + +bool strv_env_name_is_valid(char **l) { + char **p; + + STRV_FOREACH(p, l) { + if (!env_name_is_valid(*p)) + return false; + + if (strv_contains(p + 1, *p)) + return false; + } + + return true; +} + +bool strv_env_name_or_assignment_is_valid(char **l) { + char **p; + + STRV_FOREACH(p, l) { + if (!env_assignment_is_valid(*p) && !env_name_is_valid(*p)) + return false; + + if (strv_contains(p + 1, *p)) + return false; + } + + return true; +} + +static int env_append(char **r, char ***k, char **a) { + assert(r); + assert(k); + assert(*k >= r); + + if (!a) + return 0; + + /* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k' + * to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv. + * + * This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it. + * + * This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */ + + for (; *a; a++) { + char **j, *c; + size_t n; + + n = strcspn(*a, "="); + if ((*a)[n] == '=') + n++; + + for (j = r; j < *k; j++) + if (strneq(*j, *a, n)) + break; + + c = strdup(*a); + if (!c) + return -ENOMEM; + + if (j >= *k) { /* Append to the end? */ + (*k)[0] = c; + (*k)[1] = NULL; + (*k)++; + } else + free_and_replace(*j, c); /* Override existing item */ + } + + return 0; +} + +char **strv_env_merge(size_t n_lists, ...) { + _cleanup_strv_free_ char **ret = NULL; + size_t n = 0, i; + char **l, **k; + va_list ap; + + /* Merges an arbitrary number of environment sets */ + + va_start(ap, n_lists); + for (i = 0; i < n_lists; i++) { + l = va_arg(ap, char**); + n += strv_length(l); + } + va_end(ap); + + ret = new(char*, n+1); + if (!ret) + return NULL; + + *ret = NULL; + k = ret; + + va_start(ap, n_lists); + for (i = 0; i < n_lists; i++) { + l = va_arg(ap, char**); + if (env_append(ret, &k, l) < 0) { + va_end(ap); + return NULL; + } + } + va_end(ap); + + return TAKE_PTR(ret); +} + +static bool env_match(const char *t, const char *pattern) { + assert(t); + assert(pattern); + + /* pattern a matches string a + * a matches a= + * a matches a=b + * a= matches a= + * a=b matches a=b + * a= does not match a + * a=b does not match a= + * a=b does not match a + * a=b does not match a=c */ + + if (streq(t, pattern)) + return true; + + if (!strchr(pattern, '=')) { + size_t l = strlen(pattern); + + return strneq(t, pattern, l) && t[l] == '='; + } + + return false; +} + +static bool env_entry_has_name(const char *entry, const char *name) { + const char *t; + + assert(entry); + assert(name); + + t = startswith(entry, name); + if (!t) + return false; + + return *t == '='; +} + +char **strv_env_delete(char **x, size_t n_lists, ...) { + size_t n, i = 0; + char **k, **r; + va_list ap; + + /* Deletes every entry from x that is mentioned in the other + * string lists */ + + n = strv_length(x); + + r = new(char*, n+1); + if (!r) + return NULL; + + STRV_FOREACH(k, x) { + size_t v; + + va_start(ap, n_lists); + for (v = 0; v < n_lists; v++) { + char **l, **j; + + l = va_arg(ap, char**); + STRV_FOREACH(j, l) + if (env_match(*k, *j)) + goto skip; + } + va_end(ap); + + r[i] = strdup(*k); + if (!r[i]) { + strv_free(r); + return NULL; + } + + i++; + continue; + + skip: + va_end(ap); + } + + r[i] = NULL; + + assert(i <= n); + + return r; +} + +char **strv_env_unset(char **l, const char *p) { + + char **f, **t; + + if (!l) + return NULL; + + assert(p); + + /* Drops every occurrence of the env var setting p in the + * string list. Edits in-place. */ + + for (f = t = l; *f; f++) { + + if (env_match(*f, p)) { + free(*f); + continue; + } + + *(t++) = *f; + } + + *t = NULL; + return l; +} + +char **strv_env_unset_many(char **l, ...) { + char **f, **t; + + if (!l) + return NULL; + + /* Like strv_env_unset() but applies many at once. Edits in-place. */ + + for (f = t = l; *f; f++) { + bool found = false; + const char *p; + va_list ap; + + va_start(ap, l); + + while ((p = va_arg(ap, const char*))) { + if (env_match(*f, p)) { + found = true; + break; + } + } + + va_end(ap); + + if (found) { + free(*f); + continue; + } + + *(t++) = *f; + } + + *t = NULL; + return l; +} + +int strv_env_replace(char ***l, char *p) { + const char *t, *name; + char **f; + int r; + + assert(p); + + /* Replace first occurrence of the env var or add a new one in the string list. Drop other occurrences. Edits + * in-place. Does not copy p. p must be a valid key=value assignment. + */ + + t = strchr(p, '='); + if (!t) + return -EINVAL; + + name = strndupa(p, t - p); + + STRV_FOREACH(f, *l) + if (env_entry_has_name(*f, name)) { + free_and_replace(*f, p); + strv_env_unset(f + 1, *f); + return 0; + } + + /* We didn't find a match, we need to append p or create a new strv */ + r = strv_push(l, p); + if (r < 0) + return r; + + return 1; +} + +char **strv_env_set(char **x, const char *p) { + _cleanup_strv_free_ char **ret = NULL; + size_t n, m; + char **k; + + /* Overrides the env var setting of p, returns a new copy */ + + n = strv_length(x); + m = n + 2; + if (m < n) /* overflow? */ + return NULL; + + ret = new(char*, m); + if (!ret) + return NULL; + + *ret = NULL; + k = ret; + + if (env_append(ret, &k, x) < 0) + return NULL; + + if (env_append(ret, &k, STRV_MAKE(p)) < 0) + return NULL; + + return TAKE_PTR(ret); +} + +char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) { + char **i; + + assert(name); + + if (k <= 0) + return NULL; + + STRV_FOREACH_BACKWARDS(i, l) + if (strneq(*i, name, k) && + (*i)[k] == '=') + return *i + k + 1; + + if (flags & REPLACE_ENV_USE_ENVIRONMENT) { + const char *t; + + t = strndupa(name, k); + return getenv(t); + }; + + return NULL; +} + +char *strv_env_get(char **l, const char *name) { + assert(name); + + return strv_env_get_n(l, name, strlen(name), 0); +} + +char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) { + char **p, **q; + int k = 0; + + STRV_FOREACH(p, e) { + size_t n; + bool duplicate = false; + + if (!env_assignment_is_valid(*p)) { + if (invalid_callback) + invalid_callback(*p, userdata); + free(*p); + continue; + } + + n = strcspn(*p, "="); + STRV_FOREACH(q, p + 1) + if (strneq(*p, *q, n) && (*q)[n] == '=') { + duplicate = true; + break; + } + + if (duplicate) { + free(*p); + continue; + } + + e[k++] = *p; + } + + if (e) + e[k] = NULL; + + return e; +} + +char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { + enum { + WORD, + CURLY, + VARIABLE, + VARIABLE_RAW, + TEST, + DEFAULT_VALUE, + ALTERNATE_VALUE, + } state = WORD; + + const char *e, *word = format, *test_value; + char *k; + _cleanup_free_ char *r = NULL; + size_t i, len; + int nest = 0; + + assert(format); + + for (e = format, i = 0; *e && i < n; e ++, i ++) + switch (state) { + + case WORD: + if (*e == '$') + state = CURLY; + break; + + case CURLY: + if (*e == '{') { + k = strnappend(r, word, e-word-1); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e-1; + state = VARIABLE; + nest++; + } else if (*e == '$') { + k = strnappend(r, word, e-word); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e+1; + state = WORD; + + } else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_CHARS_ENV_NAME, *e)) { + k = strnappend(r, word, e-word-1); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e-1; + state = VARIABLE_RAW; + + } else + state = WORD; + break; + + case VARIABLE: + if (*e == '}') { + const char *t; + + t = strv_env_get_n(env, word+2, e-word-2, flags); + + k = strappend(r, t); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e+1; + state = WORD; + } else if (*e == ':') { + if (!(flags & REPLACE_ENV_ALLOW_EXTENDED)) + /* Treat this as unsupported syntax, i.e. do no replacement */ + state = WORD; + else { + len = e-word-2; + state = TEST; + } + } + break; + + case TEST: + if (*e == '-') + state = DEFAULT_VALUE; + else if (*e == '+') + state = ALTERNATE_VALUE; + else { + state = WORD; + break; + } + + test_value = e+1; + break; + + case DEFAULT_VALUE: /* fall through */ + case ALTERNATE_VALUE: + assert(flags & REPLACE_ENV_ALLOW_EXTENDED); + + if (*e == '{') { + nest++; + break; + } + + if (*e != '}') + break; + + nest--; + if (nest == 0) { + const char *t; + _cleanup_free_ char *v = NULL; + + t = strv_env_get_n(env, word+2, len, flags); + + if (t && state == ALTERNATE_VALUE) + t = v = replace_env_n(test_value, e-test_value, env, flags); + else if (!t && state == DEFAULT_VALUE) + t = v = replace_env_n(test_value, e-test_value, env, flags); + + k = strappend(r, t); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e+1; + state = WORD; + } + break; + + case VARIABLE_RAW: + assert(flags & REPLACE_ENV_ALLOW_BRACELESS); + + if (!strchr(VALID_CHARS_ENV_NAME, *e)) { + const char *t; + + t = strv_env_get_n(env, word+1, e-word-1, flags); + + k = strappend(r, t); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e--; + i--; + state = WORD; + } + break; + } + + if (state == VARIABLE_RAW) { + const char *t; + + assert(flags & REPLACE_ENV_ALLOW_BRACELESS); + + t = strv_env_get_n(env, word+1, e-word-1, flags); + return strappend(r, t); + } else + return strnappend(r, word, e-word); +} + +char **replace_env_argv(char **argv, char **env) { + char **ret, **i; + size_t k = 0, l = 0; + + l = strv_length(argv); + + ret = new(char*, l+1); + if (!ret) + return NULL; + + STRV_FOREACH(i, argv) { + + /* If $FOO appears as single word, replace it by the split up variable */ + if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) { + char *e; + char **w, **m = NULL; + size_t q; + + e = strv_env_get(env, *i+1); + if (e) { + int r; + + r = strv_split_extract(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_QUOTES); + if (r < 0) { + ret[k] = NULL; + strv_free(ret); + return NULL; + } + } else + m = NULL; + + q = strv_length(m); + l = l + q - 1; + + w = reallocarray(ret, l + 1, sizeof(char *)); + if (!w) { + ret[k] = NULL; + strv_free(ret); + strv_free(m); + return NULL; + } + + ret = w; + if (m) { + memcpy(ret + k, m, q * sizeof(char*)); + free(m); + } + + k += q; + continue; + } + + /* If ${FOO} appears as part of a word, replace it by the variable as-is */ + ret[k] = replace_env(*i, env, 0); + if (!ret[k]) { + strv_free(ret); + return NULL; + } + k++; + } + + ret[k] = NULL; + return ret; +} + +int getenv_bool(const char *p) { + const char *e; + + e = getenv(p); + if (!e) + return -ENXIO; + + return parse_boolean(e); +} + +int getenv_bool_secure(const char *p) { + const char *e; + + e = secure_getenv(p); + if (!e) + return -ENXIO; + + return parse_boolean(e); +} diff --git a/src/basic/env-util.h b/src/basic/env-util.h new file mode 100644 index 0000000..d54f996 --- /dev/null +++ b/src/basic/env-util.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> + +#include "macro.h" +#include "string.h" + +bool env_name_is_valid(const char *e); +bool env_value_is_valid(const char *e); +bool env_assignment_is_valid(const char *e); + +enum { + REPLACE_ENV_USE_ENVIRONMENT = 1 << 0, + REPLACE_ENV_ALLOW_BRACELESS = 1 << 1, + REPLACE_ENV_ALLOW_EXTENDED = 1 << 2, +}; + +char *replace_env_n(const char *format, size_t n, char **env, unsigned flags); +char **replace_env_argv(char **argv, char **env); + +static inline char *replace_env(const char *format, char **env, unsigned flags) { + return replace_env_n(format, strlen(format), env, flags); +} + +bool strv_env_is_valid(char **e); +#define strv_env_clean(l) strv_env_clean_with_callback(l, NULL, NULL) +char **strv_env_clean_with_callback(char **l, void (*invalid_callback)(const char *p, void *userdata), void *userdata); + +bool strv_env_name_is_valid(char **l); +bool strv_env_name_or_assignment_is_valid(char **l); + +char **strv_env_merge(size_t n_lists, ...); +char **strv_env_delete(char **x, size_t n_lists, ...); /* New copy */ + +char **strv_env_set(char **x, const char *p); /* New copy ... */ +char **strv_env_unset(char **l, const char *p); /* In place ... */ +char **strv_env_unset_many(char **l, ...) _sentinel_; +int strv_env_replace(char ***l, char *p); /* In place ... */ + +char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) _pure_; +char *strv_env_get(char **x, const char *n) _pure_; + +int getenv_bool(const char *p); +int getenv_bool_secure(const char *p); diff --git a/src/basic/errno-list.c b/src/basic/errno-list.c new file mode 100644 index 0000000..44cc570 --- /dev/null +++ b/src/basic/errno-list.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <string.h> + +#include "errno-list.h" +#include "macro.h" + +static const struct errno_name* lookup_errno(register const char *str, + register GPERF_LEN_TYPE len); + +#include "errno-from-name.h" +#include "errno-to-name.h" + +const char *errno_to_name(int id) { + + if (id < 0) + id = -id; + + if ((size_t) id >= ELEMENTSOF(errno_names)) + return NULL; + + return errno_names[id]; +} + +int errno_from_name(const char *name) { + const struct errno_name *sc; + + assert(name); + + sc = lookup_errno(name, strlen(name)); + if (!sc) + return -EINVAL; + + assert(sc->id > 0); + return sc->id; +} diff --git a/src/basic/errno-list.h b/src/basic/errno-list.h new file mode 100644 index 0000000..9c639b4 --- /dev/null +++ b/src/basic/errno-list.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +/* + * MAX_ERRNO is defined as 4095 in linux/err.h + * We use the same value here. + */ +#define ERRNO_MAX 4095 + +const char *errno_to_name(int id); +int errno_from_name(const char *name); +static inline bool errno_is_valid(int n) { + return n > 0 && n <= ERRNO_MAX; +} diff --git a/src/basic/errno-to-name.awk b/src/basic/errno-to-name.awk new file mode 100644 index 0000000..0878aba --- /dev/null +++ b/src/basic/errno-to-name.awk @@ -0,0 +1,9 @@ +BEGIN{ + print "static const char* const errno_names[] = { " +} +!/EDEADLOCK/ && !/EWOULDBLOCK/ && !/ENOTSUP/ { + printf " [%s] = \"%s\",\n", $1, $1 +} +END{ + print "};" +} diff --git a/src/basic/escape.c b/src/basic/escape.c new file mode 100644 index 0000000..5f71515 --- /dev/null +++ b/src/basic/escape.c @@ -0,0 +1,506 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "escape.h" +#include "hexdecoct.h" +#include "macro.h" +#include "utf8.h" + +int cescape_char(char c, char *buf) { + char *buf_old = buf; + + /* Needs space for 4 characters in the buffer */ + + switch (c) { + + case '\a': + *(buf++) = '\\'; + *(buf++) = 'a'; + break; + case '\b': + *(buf++) = '\\'; + *(buf++) = 'b'; + break; + case '\f': + *(buf++) = '\\'; + *(buf++) = 'f'; + break; + case '\n': + *(buf++) = '\\'; + *(buf++) = 'n'; + break; + case '\r': + *(buf++) = '\\'; + *(buf++) = 'r'; + break; + case '\t': + *(buf++) = '\\'; + *(buf++) = 't'; + break; + case '\v': + *(buf++) = '\\'; + *(buf++) = 'v'; + break; + case '\\': + *(buf++) = '\\'; + *(buf++) = '\\'; + break; + case '"': + *(buf++) = '\\'; + *(buf++) = '"'; + break; + case '\'': + *(buf++) = '\\'; + *(buf++) = '\''; + break; + + default: + /* For special chars we prefer octal over + * hexadecimal encoding, simply because glib's + * g_strescape() does the same */ + if ((c < ' ') || (c >= 127)) { + *(buf++) = '\\'; + *(buf++) = octchar((unsigned char) c >> 6); + *(buf++) = octchar((unsigned char) c >> 3); + *(buf++) = octchar((unsigned char) c); + } else + *(buf++) = c; + break; + } + + return buf - buf_old; +} + +char *cescape_length(const char *s, size_t n) { + const char *f; + char *r, *t; + + assert(s || n == 0); + + /* Does C style string escaping. May be reversed with + * cunescape(). */ + + r = new(char, n*4 + 1); + if (!r) + return NULL; + + for (f = s, t = r; f < s + n; f++) + t += cescape_char(*f, t); + + *t = 0; + + return r; +} + +char *cescape(const char *s) { + assert(s); + + return cescape_length(s, strlen(s)); +} + +int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) { + int r = 1; + + assert(p); + assert(ret); + + /* Unescapes C style. Returns the unescaped character in ret. + * Sets *eight_bit to true if the escaped sequence either fits in + * one byte in UTF-8 or is a non-unicode literal byte and should + * instead be copied directly. + */ + + if (length != (size_t) -1 && length < 1) + return -EINVAL; + + switch (p[0]) { + + case 'a': + *ret = '\a'; + break; + case 'b': + *ret = '\b'; + break; + case 'f': + *ret = '\f'; + break; + case 'n': + *ret = '\n'; + break; + case 'r': + *ret = '\r'; + break; + case 't': + *ret = '\t'; + break; + case 'v': + *ret = '\v'; + break; + case '\\': + *ret = '\\'; + break; + case '"': + *ret = '"'; + break; + case '\'': + *ret = '\''; + break; + + case 's': + /* This is an extension of the XDG syntax files */ + *ret = ' '; + break; + + case 'x': { + /* hexadecimal encoding */ + int a, b; + + if (length != (size_t) -1 && length < 3) + return -EINVAL; + + a = unhexchar(p[1]); + if (a < 0) + return -EINVAL; + + b = unhexchar(p[2]); + if (b < 0) + return -EINVAL; + + /* Don't allow NUL bytes */ + if (a == 0 && b == 0) + return -EINVAL; + + *ret = (a << 4U) | b; + *eight_bit = true; + r = 3; + break; + } + + case 'u': { + /* C++11 style 16bit unicode */ + + int a[4]; + size_t i; + uint32_t c; + + if (length != (size_t) -1 && length < 5) + return -EINVAL; + + for (i = 0; i < 4; i++) { + a[i] = unhexchar(p[1 + i]); + if (a[i] < 0) + return a[i]; + } + + c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3]; + + /* Don't allow 0 chars */ + if (c == 0) + return -EINVAL; + + *ret = c; + r = 5; + break; + } + + case 'U': { + /* C++11 style 32bit unicode */ + + int a[8]; + size_t i; + char32_t c; + + if (length != (size_t) -1 && length < 9) + return -EINVAL; + + for (i = 0; i < 8; i++) { + a[i] = unhexchar(p[1 + i]); + if (a[i] < 0) + return a[i]; + } + + c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) | + ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7]; + + /* Don't allow 0 chars */ + if (c == 0) + return -EINVAL; + + /* Don't allow invalid code points */ + if (!unichar_is_valid(c)) + return -EINVAL; + + *ret = c; + r = 9; + break; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + /* octal encoding */ + int a, b, c; + char32_t m; + + if (length != (size_t) -1 && length < 3) + return -EINVAL; + + a = unoctchar(p[0]); + if (a < 0) + return -EINVAL; + + b = unoctchar(p[1]); + if (b < 0) + return -EINVAL; + + c = unoctchar(p[2]); + if (c < 0) + return -EINVAL; + + /* don't allow NUL bytes */ + if (a == 0 && b == 0 && c == 0) + return -EINVAL; + + /* Don't allow bytes above 255 */ + m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c; + if (m > 255) + return -EINVAL; + + *ret = m; + *eight_bit = true; + r = 3; + break; + } + + default: + return -EINVAL; + } + + return r; +} + +int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) { + char *r, *t; + const char *f; + size_t pl; + + assert(s); + assert(ret); + + /* Undoes C style string escaping, and optionally prefixes it. */ + + pl = strlen_ptr(prefix); + + r = new(char, pl+length+1); + if (!r) + return -ENOMEM; + + if (prefix) + memcpy(r, prefix, pl); + + for (f = s, t = r + pl; f < s + length; f++) { + size_t remaining; + bool eight_bit = false; + char32_t u; + int k; + + remaining = s + length - f; + assert(remaining > 0); + + if (*f != '\\') { + /* A literal, copy verbatim */ + *(t++) = *f; + continue; + } + + if (remaining == 1) { + if (flags & UNESCAPE_RELAX) { + /* A trailing backslash, copy verbatim */ + *(t++) = *f; + continue; + } + + free(r); + return -EINVAL; + } + + k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit); + if (k < 0) { + if (flags & UNESCAPE_RELAX) { + /* Invalid escape code, let's take it literal then */ + *(t++) = '\\'; + continue; + } + + free(r); + return k; + } + + f += k; + if (eight_bit) + /* One byte? Set directly as specified */ + *(t++) = u; + else + /* Otherwise encode as multi-byte UTF-8 */ + t += utf8_encode_unichar(t, u); + } + + *t = 0; + + *ret = r; + return t - r; +} + +int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) { + return cunescape_length_with_prefix(s, length, NULL, flags, ret); +} + +int cunescape(const char *s, UnescapeFlags flags, char **ret) { + return cunescape_length(s, strlen(s), flags, ret); +} + +char *xescape(const char *s, const char *bad) { + char *r, *t; + const char *f; + + /* Escapes all chars in bad, in addition to \ and all special + * chars, in \xFF style escaping. May be reversed with + * cunescape(). */ + + r = new(char, strlen(s) * 4 + 1); + if (!r) + return NULL; + + for (f = s, t = r; *f; f++) { + + if ((*f < ' ') || (*f >= 127) || + (*f == '\\') || strchr(bad, *f)) { + *(t++) = '\\'; + *(t++) = 'x'; + *(t++) = hexchar(*f >> 4); + *(t++) = hexchar(*f); + } else + *(t++) = *f; + } + + *t = 0; + + return r; +} + +char *octescape(const char *s, size_t len) { + char *r, *t; + const char *f; + + /* Escapes all chars in bad, in addition to \ and " chars, + * in \nnn style escaping. */ + + r = new(char, len * 4 + 1); + if (!r) + return NULL; + + for (f = s, t = r; f < s + len; f++) { + + if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) { + *(t++) = '\\'; + *(t++) = '0' + (*f >> 6); + *(t++) = '0' + ((*f >> 3) & 8); + *(t++) = '0' + (*f & 8); + } else + *(t++) = *f; + } + + *t = 0; + + return r; + +} + +static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) { + assert(bad); + + for (; *s; s++) { + if (escape_tab_nl && IN_SET(*s, '\n', '\t')) { + *(t++) = '\\'; + *(t++) = *s == '\n' ? 'n' : 't'; + continue; + } + + if (*s == '\\' || strchr(bad, *s)) + *(t++) = '\\'; + + *(t++) = *s; + } + + return t; +} + +char *shell_escape(const char *s, const char *bad) { + char *r, *t; + + r = new(char, strlen(s)*2+1); + if (!r) + return NULL; + + t = strcpy_backslash_escaped(r, s, bad, false); + *t = 0; + + return r; +} + +char* shell_maybe_quote(const char *s, EscapeStyle style) { + const char *p; + char *r, *t; + + assert(s); + + /* Encloses a string in quotes if necessary to make it OK as a shell + * string. Note that we treat benign UTF-8 characters as needing + * escaping too, but that should be OK. */ + + for (p = s; *p; p++) + if (*p <= ' ' || + *p >= 127 || + strchr(SHELL_NEED_QUOTES, *p)) + break; + + if (!*p) + return strdup(s); + + r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1); + if (!r) + return NULL; + + t = r; + if (style == ESCAPE_BACKSLASH) + *(t++) = '"'; + else if (style == ESCAPE_POSIX) { + *(t++) = '$'; + *(t++) = '\''; + } else + assert_not_reached("Bad EscapeStyle"); + + t = mempcpy(t, s, p - s); + + if (style == ESCAPE_BACKSLASH) + t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false); + else + t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true); + + if (style == ESCAPE_BACKSLASH) + *(t++) = '"'; + else + *(t++) = '\''; + *t = 0; + + return r; +} diff --git a/src/basic/escape.h b/src/basic/escape.h new file mode 100644 index 0000000..5156209 --- /dev/null +++ b/src/basic/escape.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> +#include <uchar.h> + +#include "string-util.h" +#include "missing_type.h" + +/* What characters are special in the shell? */ +/* must be escaped outside and inside double-quotes */ +#define SHELL_NEED_ESCAPE "\"\\`$" + +/* Those that can be escaped or double-quoted. + * + * Stricly speaking, ! does not need to be escaped, except in interactive + * mode, but let's be extra nice to the user and quote ! in case this + * output is ever used in interactive mode. */ +#define SHELL_NEED_QUOTES SHELL_NEED_ESCAPE GLOB_CHARS "'()<>|&;!" + +/* Note that we assume control characters would need to be escaped too in + * addition to the "special" characters listed here, if they appear in the + * string. Current users disallow control characters. Also '"' shall not + * be escaped. + */ +#define SHELL_NEED_ESCAPE_POSIX "\\\'" + +typedef enum UnescapeFlags { + UNESCAPE_RELAX = 1, +} UnescapeFlags; + +typedef enum EscapeStyle { + ESCAPE_BACKSLASH = 1, + ESCAPE_POSIX = 2, +} EscapeStyle; + +char *cescape(const char *s); +char *cescape_length(const char *s, size_t n); +int cescape_char(char c, char *buf); + +int cunescape(const char *s, UnescapeFlags flags, char **ret); +int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret); +int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret); +int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit); + +char *xescape(const char *s, const char *bad); +char *octescape(const char *s, size_t len); + +char *shell_escape(const char *s, const char *bad); +char* shell_maybe_quote(const char *s, EscapeStyle style); diff --git a/src/basic/ether-addr-util.c b/src/basic/ether-addr-util.c new file mode 100644 index 0000000..e875696 --- /dev/null +++ b/src/basic/ether-addr-util.c @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <net/ethernet.h> +#include <stdio.h> +#include <sys/types.h> + +#include "ether-addr-util.h" +#include "macro.h" +#include "string-util.h" + +char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]) { + assert(addr); + assert(buffer); + + /* Like ether_ntoa() but uses %02x instead of %x to print + * ethernet addresses, which makes them look less funny. Also, + * doesn't use a static buffer. */ + + sprintf(buffer, "%02x:%02x:%02x:%02x:%02x:%02x", + addr->ether_addr_octet[0], + addr->ether_addr_octet[1], + addr->ether_addr_octet[2], + addr->ether_addr_octet[3], + addr->ether_addr_octet[4], + addr->ether_addr_octet[5]); + + return buffer; +} + +int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b) { + return memcmp(a, b, ETH_ALEN); +} + +static void ether_addr_hash_func(const struct ether_addr *p, struct siphash *state) { + siphash24_compress(p, sizeof(struct ether_addr), state); +} + +DEFINE_HASH_OPS(ether_addr_hash_ops, struct ether_addr, ether_addr_hash_func, ether_addr_compare); + +int ether_addr_from_string(const char *s, struct ether_addr *ret) { + size_t pos = 0, n, field; + char sep = '\0'; + const char *hex = HEXDIGITS, *hexoff; + size_t x; + bool touched; + +#define parse_fields(v) \ + for (field = 0; field < ELEMENTSOF(v); field++) { \ + touched = false; \ + for (n = 0; n < (2 * sizeof(v[0])); n++) { \ + if (s[pos] == '\0') \ + break; \ + hexoff = strchr(hex, s[pos]); \ + if (!hexoff) \ + break; \ + assert(hexoff >= hex); \ + x = hexoff - hex; \ + if (x >= 16) \ + x -= 6; /* A-F */ \ + assert(x < 16); \ + touched = true; \ + v[field] <<= 4; \ + v[field] += x; \ + pos++; \ + } \ + if (!touched) \ + return -EINVAL; \ + if (field < (ELEMENTSOF(v)-1)) { \ + if (s[pos] != sep) \ + return -EINVAL; \ + else \ + pos++; \ + } \ + } + + assert(s); + assert(ret); + + s += strspn(s, WHITESPACE); + sep = s[strspn(s, hex)]; + + if (sep == '.') { + uint16_t shorts[3] = { 0 }; + + parse_fields(shorts); + + if (s[pos] != '\0') + return -EINVAL; + + for (n = 0; n < ELEMENTSOF(shorts); n++) { + ret->ether_addr_octet[2*n] = ((shorts[n] & (uint16_t)0xff00) >> 8); + ret->ether_addr_octet[2*n + 1] = (shorts[n] & (uint16_t)0x00ff); + } + + } else if (IN_SET(sep, ':', '-')) { + struct ether_addr out = ETHER_ADDR_NULL; + + parse_fields(out.ether_addr_octet); + + if (s[pos] != '\0') + return -EINVAL; + + for (n = 0; n < ELEMENTSOF(out.ether_addr_octet); n++) + ret->ether_addr_octet[n] = out.ether_addr_octet[n]; + + } else + return -EINVAL; + + return 0; +} diff --git a/src/basic/ether-addr-util.h b/src/basic/ether-addr-util.h new file mode 100644 index 0000000..4e44b30 --- /dev/null +++ b/src/basic/ether-addr-util.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <net/ethernet.h> +#include <stdbool.h> + +#include "hash-funcs.h" + +#define ETHER_ADDR_FORMAT_STR "%02X%02X%02X%02X%02X%02X" +#define ETHER_ADDR_FORMAT_VAL(x) (x).ether_addr_octet[0], (x).ether_addr_octet[1], (x).ether_addr_octet[2], (x).ether_addr_octet[3], (x).ether_addr_octet[4], (x).ether_addr_octet[5] + +#define ETHER_ADDR_TO_STRING_MAX (3*6) +char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]); + +int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b); +static inline bool ether_addr_equal(const struct ether_addr *a, const struct ether_addr *b) { + return ether_addr_compare(a, b) == 0; +} + +#define ETHER_ADDR_NULL ((const struct ether_addr){}) + +static inline bool ether_addr_is_null(const struct ether_addr *addr) { + return ether_addr_equal(addr, ÐER_ADDR_NULL); +} + +int ether_addr_from_string(const char *s, struct ether_addr *ret); + +extern const struct hash_ops ether_addr_hash_ops; diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c new file mode 100644 index 0000000..a861b56 --- /dev/null +++ b/src/basic/extract-word.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <syslog.h> + +#include "alloc-util.h" +#include "escape.h" +#include "extract-word.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "utf8.h" + +int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { + _cleanup_free_ char *s = NULL; + size_t allocated = 0, sz = 0; + char c; + int r; + + char quote = 0; /* 0 or ' or " */ + bool backslash = false; /* whether we've just seen a backslash */ + + assert(p); + assert(ret); + + /* Bail early if called after last value or with no input */ + if (!*p) + goto finish; + c = **p; + + if (!separators) + separators = WHITESPACE; + + /* Parses the first word of a string, and returns it in + * *ret. Removes all quotes in the process. When parsing fails + * (because of an uneven number of quotes or similar), leaves + * the pointer *p at the first invalid character. */ + + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) + if (!GREEDY_REALLOC(s, allocated, sz+1)) + return -ENOMEM; + + for (;; (*p)++, c = **p) { + if (c == 0) + goto finish_force_terminate; + else if (strchr(separators, c)) { + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { + (*p)++; + goto finish_force_next; + } + } else { + /* We found a non-blank character, so we will always + * want to return a string (even if it is empty), + * allocate it here. */ + if (!GREEDY_REALLOC(s, allocated, sz+1)) + return -ENOMEM; + break; + } + } + + for (;; (*p)++, c = **p) { + if (backslash) { + if (!GREEDY_REALLOC(s, allocated, sz+7)) + return -ENOMEM; + + if (c == 0) { + if ((flags & EXTRACT_CUNESCAPE_RELAX) && + (!quote || flags & EXTRACT_RELAX)) { + /* If we find an unquoted trailing backslash and we're in + * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the + * output. + * + * Unbalanced quotes will only be allowed in EXTRACT_RELAX + * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them. + */ + s[sz++] = '\\'; + goto finish_force_terminate; + } + if (flags & EXTRACT_RELAX) + goto finish_force_terminate; + return -EINVAL; + } + + if (flags & EXTRACT_CUNESCAPE) { + bool eight_bit = false; + char32_t u; + + r = cunescape_one(*p, (size_t) -1, &u, &eight_bit); + if (r < 0) { + if (flags & EXTRACT_CUNESCAPE_RELAX) { + s[sz++] = '\\'; + s[sz++] = c; + } else + return -EINVAL; + } else { + (*p) += r - 1; + + if (eight_bit) + s[sz++] = u; + else + sz += utf8_encode_unichar(s + sz, u); + } + } else + s[sz++] = c; + + backslash = false; + + } else if (quote) { /* inside either single or double quotes */ + for (;; (*p)++, c = **p) { + if (c == 0) { + if (flags & EXTRACT_RELAX) + goto finish_force_terminate; + return -EINVAL; + } else if (c == quote) { /* found the end quote */ + quote = 0; + break; + } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { + backslash = true; + break; + } else { + if (!GREEDY_REALLOC(s, allocated, sz+2)) + return -ENOMEM; + + s[sz++] = c; + } + } + + } else { + for (;; (*p)++, c = **p) { + if (c == 0) + goto finish_force_terminate; + else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_QUOTES)) { + quote = c; + break; + } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { + backslash = true; + break; + } else if (strchr(separators, c)) { + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { + (*p)++; + goto finish_force_next; + } + /* Skip additional coalesced separators. */ + for (;; (*p)++, c = **p) { + if (c == 0) + goto finish_force_terminate; + if (!strchr(separators, c)) + break; + } + goto finish; + + } else { + if (!GREEDY_REALLOC(s, allocated, sz+2)) + return -ENOMEM; + + s[sz++] = c; + } + } + } + } + +finish_force_terminate: + *p = NULL; +finish: + if (!s) { + *p = NULL; + *ret = NULL; + return 0; + } + +finish_force_next: + s[sz] = 0; + *ret = TAKE_PTR(s); + + return 1; +} + +int extract_first_word_and_warn( + const char **p, + char **ret, + const char *separators, + ExtractFlags flags, + const char *unit, + const char *filename, + unsigned line, + const char *rvalue) { + + /* Try to unquote it, if it fails, warn about it and try again + * but this time using EXTRACT_CUNESCAPE_RELAX to keep the + * backslashes verbatim in invalid escape sequences. */ + + const char *save; + int r; + + save = *p; + r = extract_first_word(p, ret, separators, flags); + if (r >= 0) + return r; + + if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) { + + /* Retry it with EXTRACT_CUNESCAPE_RELAX. */ + *p = save; + r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX); + if (r >= 0) { + /* It worked this time, hence it must have been an invalid escape sequence. */ + log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret); + return r; + } + + /* If it's still EINVAL; then it must be unbalanced quoting, report this. */ + if (r == -EINVAL) + return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue); + } + + /* Can be any error, report it */ + return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue); +} + +/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing + * an object that undergoes default argument promotion as an argument to va_start). + * Let's make sure that ExtractFlags fits into an unsigned int. */ +assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned)); + +int extract_many_words(const char **p, const char *separators, unsigned flags, ...) { + va_list ap; + char **l; + int n = 0, i, c, r; + + /* Parses a number of words from a string, stripping any + * quotes if necessary. */ + + assert(p); + + /* Count how many words are expected */ + va_start(ap, flags); + for (;;) { + if (!va_arg(ap, char **)) + break; + n++; + } + va_end(ap); + + if (n <= 0) + return 0; + + /* Read all words into a temporary array */ + l = newa0(char*, n); + for (c = 0; c < n; c++) { + + r = extract_first_word(p, &l[c], separators, flags); + if (r < 0) { + int j; + + for (j = 0; j < c; j++) + free(l[j]); + + return r; + } + + if (r == 0) + break; + } + + /* If we managed to parse all words, return them in the passed + * in parameters */ + va_start(ap, flags); + for (i = 0; i < n; i++) { + char **v; + + v = va_arg(ap, char **); + assert(v); + + *v = l[i]; + } + va_end(ap); + + return c; +} diff --git a/src/basic/extract-word.h b/src/basic/extract-word.h new file mode 100644 index 0000000..705ebbe --- /dev/null +++ b/src/basic/extract-word.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "macro.h" + +typedef enum ExtractFlags { + EXTRACT_RELAX = 1 << 0, + EXTRACT_CUNESCAPE = 1 << 1, + EXTRACT_CUNESCAPE_RELAX = 1 << 2, + EXTRACT_QUOTES = 1 << 3, + EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 4, + EXTRACT_RETAIN_ESCAPE = 1 << 5, +} ExtractFlags; + +int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags); +int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue); +int extract_many_words(const char **p, const char *separators, unsigned flags, ...) _sentinel_; diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c new file mode 100644 index 0000000..3e6ef5a --- /dev/null +++ b/src/basic/fd-util.c @@ -0,0 +1,967 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "copy.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "macro.h" +#include "memfd-util.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "util.h" +#include "tmpfile-util.h" + +int close_nointr(int fd) { + assert(fd >= 0); + + if (close(fd) >= 0) + return 0; + + /* + * Just ignore EINTR; a retry loop is the wrong thing to do on + * Linux. + * + * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html + * https://bugzilla.gnome.org/show_bug.cgi?id=682819 + * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR + * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain + */ + if (errno == EINTR) + return 0; + + return -errno; +} + +int safe_close(int fd) { + + /* + * Like close_nointr() but cannot fail. Guarantees errno is + * unchanged. Is a NOP with negative fds passed, and returns + * -1, so that it can be used in this syntax: + * + * fd = safe_close(fd); + */ + + if (fd >= 0) { + PROTECT_ERRNO; + + /* The kernel might return pretty much any error code + * via close(), but the fd will be closed anyway. The + * only condition we want to check for here is whether + * the fd was invalid at all... */ + + assert_se(close_nointr(fd) != -EBADF); + } + + return -1; +} + +void safe_close_pair(int p[static 2]) { + assert(p); + + if (p[0] == p[1]) { + /* Special case pairs which use the same fd in both + * directions... */ + p[0] = p[1] = safe_close(p[0]); + return; + } + + p[0] = safe_close(p[0]); + p[1] = safe_close(p[1]); +} + +void close_many(const int fds[], size_t n_fd) { + size_t i; + + assert(fds || n_fd <= 0); + + for (i = 0; i < n_fd; i++) + safe_close(fds[i]); +} + +int fclose_nointr(FILE *f) { + assert(f); + + /* Same as close_nointr(), but for fclose() */ + + if (fclose(f) == 0) + return 0; + + if (errno == EINTR) + return 0; + + return -errno; +} + +FILE* safe_fclose(FILE *f) { + + /* Same as safe_close(), but for fclose() */ + + if (f) { + PROTECT_ERRNO; + + assert_se(fclose_nointr(f) != -EBADF); + } + + return NULL; +} + +DIR* safe_closedir(DIR *d) { + + if (d) { + PROTECT_ERRNO; + + assert_se(closedir(d) >= 0 || errno != EBADF); + } + + return NULL; +} + +int fd_nonblock(int fd, bool nonblock) { + int flags, nflags; + + assert(fd >= 0); + + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) + return -errno; + + if (nonblock) + nflags = flags | O_NONBLOCK; + else + nflags = flags & ~O_NONBLOCK; + + if (nflags == flags) + return 0; + + if (fcntl(fd, F_SETFL, nflags) < 0) + return -errno; + + return 0; +} + +int fd_cloexec(int fd, bool cloexec) { + int flags, nflags; + + assert(fd >= 0); + + flags = fcntl(fd, F_GETFD, 0); + if (flags < 0) + return -errno; + + if (cloexec) + nflags = flags | FD_CLOEXEC; + else + nflags = flags & ~FD_CLOEXEC; + + if (nflags == flags) + return 0; + + if (fcntl(fd, F_SETFD, nflags) < 0) + return -errno; + + return 0; +} + +_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) { + size_t i; + + assert(n_fdset == 0 || fdset); + + for (i = 0; i < n_fdset; i++) + if (fdset[i] == fd) + return true; + + return false; +} + +static int get_max_fd(void) { + struct rlimit rl; + rlim_t m; + + /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary + * and INT_MAX as upper boundary. */ + + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) + return -errno; + + m = MAX(rl.rlim_cur, rl.rlim_max); + if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */ + return FD_SETSIZE-1; + + if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can + * never be above INT_MAX */ + return INT_MAX; + + return (int) (m - 1); +} + +int close_all_fds(const int except[], size_t n_except) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + + assert(n_except == 0 || except); + + d = opendir("/proc/self/fd"); + if (!d) { + int fd, max_fd; + + /* When /proc isn't available (for example in chroots) the fallback is brute forcing through + * the fd table */ + + max_fd = get_max_fd(); + if (max_fd < 0) + return max_fd; + + for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) { + int q; + + if (fd_in_set(fd, except, n_except)) + continue; + + q = close_nointr(fd); + if (q < 0 && q != -EBADF && r >= 0) + r = q; + } + + return r; + } + + FOREACH_DIRENT(de, d, return -errno) { + int fd = -1, q; + + if (safe_atoi(de->d_name, &fd) < 0) + /* Let's better ignore this, just in case */ + continue; + + if (fd < 3) + continue; + + if (fd == dirfd(d)) + continue; + + if (fd_in_set(fd, except, n_except)) + continue; + + q = close_nointr(fd); + if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */ + r = q; + } + + return r; +} + +int same_fd(int a, int b) { + struct stat sta, stb; + pid_t pid; + int r, fa, fb; + + assert(a >= 0); + assert(b >= 0); + + /* Compares two file descriptors. Note that semantics are + * quite different depending on whether we have kcmp() or we + * don't. If we have kcmp() this will only return true for + * dup()ed file descriptors, but not otherwise. If we don't + * have kcmp() this will also return true for two fds of the same + * file, created by separate open() calls. Since we use this + * call mostly for filtering out duplicates in the fd store + * this difference hopefully doesn't matter too much. */ + + if (a == b) + return true; + + /* Try to use kcmp() if we have it. */ + pid = getpid_cached(); + r = kcmp(pid, pid, KCMP_FILE, a, b); + if (r == 0) + return true; + if (r > 0) + return false; + if (!IN_SET(errno, ENOSYS, EACCES, EPERM)) + return -errno; + + /* We don't have kcmp(), use fstat() instead. */ + if (fstat(a, &sta) < 0) + return -errno; + + if (fstat(b, &stb) < 0) + return -errno; + + if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT)) + return false; + + /* We consider all device fds different, since two device fds + * might refer to quite different device contexts even though + * they share the same inode and backing dev_t. */ + + if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode)) + return false; + + if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino) + return false; + + /* The fds refer to the same inode on disk, let's also check + * if they have the same fd flags. This is useful to + * distinguish the read and write side of a pipe created with + * pipe(). */ + fa = fcntl(a, F_GETFL); + if (fa < 0) + return -errno; + + fb = fcntl(b, F_GETFL); + if (fb < 0) + return -errno; + + return fa == fb; +} + +void cmsg_close_all(struct msghdr *mh) { + struct cmsghdr *cmsg; + + assert(mh); + + CMSG_FOREACH(cmsg, mh) + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) + close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int)); +} + +bool fdname_is_valid(const char *s) { + const char *p; + + /* Validates a name for $LISTEN_FDNAMES. We basically allow + * everything ASCII that's not a control character. Also, as + * special exception the ":" character is not allowed, as we + * use that as field separator in $LISTEN_FDNAMES. + * + * Note that the empty string is explicitly allowed + * here. However, we limit the length of the names to 255 + * characters. */ + + if (!s) + return false; + + for (p = s; *p; p++) { + if (*p < ' ') + return false; + if (*p >= 127) + return false; + if (*p == ':') + return false; + } + + return p - s < 256; +} + +int fd_get_path(int fd, char **ret) { + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + int r; + + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + r = readlink_malloc(procfs_path, ret); + if (r == -ENOENT) { + /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make + * things debuggable and distuingish the two. */ + + if (access("/proc/self/fd/", F_OK) < 0) + /* /proc is not available or not set up properly, we're most likely in some chroot + * environment. */ + return errno == ENOENT ? -EOPNOTSUPP : -errno; + + return -EBADF; /* The directory exists, hence it's the fd that doesn't. */ + } + + return r; +} + +int move_fd(int from, int to, int cloexec) { + int r; + + /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If + * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned + * off, if it is > 0 it is turned on. */ + + if (from < 0) + return -EBADF; + if (to < 0) + return -EBADF; + + if (from == to) { + + if (cloexec >= 0) { + r = fd_cloexec(to, cloexec); + if (r < 0) + return r; + } + + return to; + } + + if (cloexec < 0) { + int fl; + + fl = fcntl(from, F_GETFD, 0); + if (fl < 0) + return -errno; + + cloexec = !!(fl & FD_CLOEXEC); + } + + r = dup3(from, to, cloexec ? O_CLOEXEC : 0); + if (r < 0) + return -errno; + + assert(r == to); + + safe_close(from); + + return to; +} + +int acquire_data_fd(const void *data, size_t size, unsigned flags) { + + _cleanup_close_pair_ int pipefds[2] = { -1, -1 }; + char pattern[] = "/dev/shm/data-fd-XXXXXX"; + _cleanup_close_ int fd = -1; + int isz = 0, r; + ssize_t n; + off_t f; + + assert(data || size == 0); + + /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more + * complex than I wish it was. But here's why: + * + * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them + * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14. + * + * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining + * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged + * clients can only bump their size to a system-wide limit, which might be quite low. + * + * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from + * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via + * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs. + * + * d) Finally, we try creating a regular file in /dev/shm, which we then delete. + * + * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I + * figure. */ + + if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) { + /* As a special case, return /dev/null if we have been called for an empty data block */ + r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (r < 0) + return -errno; + + return r; + } + + if ((flags & ACQUIRE_NO_MEMFD) == 0) { + fd = memfd_new("data-fd"); + if (fd < 0) + goto try_pipe; + + n = write(fd, data, size); + if (n < 0) + return -errno; + if ((size_t) n != size) + return -EIO; + + f = lseek(fd, 0, SEEK_SET); + if (f != 0) + return -errno; + + r = memfd_set_sealed(fd); + if (r < 0) + return r; + + return TAKE_FD(fd); + } + +try_pipe: + if ((flags & ACQUIRE_NO_PIPE) == 0) { + if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0) + return -errno; + + isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0); + if (isz < 0) + return -errno; + + if ((size_t) isz < size) { + isz = (int) size; + if (isz < 0 || (size_t) isz != size) + return -E2BIG; + + /* Try to bump the pipe size */ + (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz); + + /* See if that worked */ + isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0); + if (isz < 0) + return -errno; + + if ((size_t) isz < size) + goto try_dev_shm; + } + + n = write(pipefds[1], data, size); + if (n < 0) + return -errno; + if ((size_t) n != size) + return -EIO; + + (void) fd_nonblock(pipefds[0], false); + + return TAKE_FD(pipefds[0]); + } + +try_dev_shm: + if ((flags & ACQUIRE_NO_TMPFILE) == 0) { + fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500); + if (fd < 0) + goto try_dev_shm_without_o_tmpfile; + + n = write(fd, data, size); + if (n < 0) + return -errno; + if ((size_t) n != size) + return -EIO; + + /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */ + return fd_reopen(fd, O_RDONLY|O_CLOEXEC); + } + +try_dev_shm_without_o_tmpfile: + if ((flags & ACQUIRE_NO_REGULAR) == 0) { + fd = mkostemp_safe(pattern); + if (fd < 0) + return fd; + + n = write(fd, data, size); + if (n < 0) { + r = -errno; + goto unlink_and_return; + } + if ((size_t) n != size) { + r = -EIO; + goto unlink_and_return; + } + + /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */ + r = open(pattern, O_RDONLY|O_CLOEXEC); + if (r < 0) + r = -errno; + + unlink_and_return: + (void) unlink(pattern); + return r; + } + + return -EOPNOTSUPP; +} + +/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */ +#define DATA_FD_MEMORY_LIMIT (64U*1024U) + +/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */ +#define DATA_FD_TMP_LIMIT (1024U*1024U) + +int fd_duplicate_data_fd(int fd) { + + _cleanup_close_ int copy_fd = -1, tmp_fd = -1; + _cleanup_free_ void *remains = NULL; + size_t remains_size = 0; + const char *td; + struct stat st; + int r; + + /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but + * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be + * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported + * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in + * /var/tmp. */ + + if (fstat(fd, &st) < 0) + return -errno; + + /* For now, let's only accept regular files, sockets, pipes and char devices */ + if (S_ISDIR(st.st_mode)) + return -EISDIR; + if (S_ISLNK(st.st_mode)) + return -ELOOP; + if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode)) + return -EBADFD; + + /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note + * that we use the reported regular file size only as a hint, given that there are plenty special files in + * /proc and /sys which report a zero file size but can be read from. */ + + if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) { + + /* Try a memfd first */ + copy_fd = memfd_new("data-fd"); + if (copy_fd >= 0) { + off_t f; + + r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0); + if (r < 0) + return r; + + f = lseek(copy_fd, 0, SEEK_SET); + if (f != 0) + return -errno; + + if (r == 0) { + /* Did it fit into the limit? If so, we are done. */ + r = memfd_set_sealed(copy_fd); + if (r < 0) + return r; + + return TAKE_FD(copy_fd); + } + + /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */ + + } else { + _cleanup_(close_pairp) int pipefds[2] = { -1, -1 }; + int isz; + + /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather + * then block indefinitely when we hit the pipe size limit */ + + if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0) + return -errno; + + isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0); + if (isz < 0) + return -errno; + + /* Try to enlarge the pipe size if necessary */ + if ((size_t) isz < DATA_FD_MEMORY_LIMIT) { + + (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT); + + isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0); + if (isz < 0) + return -errno; + } + + if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) { + + r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL); + if (r < 0 && r != -EAGAIN) + return r; /* If we get EAGAIN it could be because of the source or because of + * the destination fd, we can't know, as sendfile() and friends won't + * tell us. Hence, treat this as reason to fall back, just to be + * sure. */ + if (r == 0) { + /* Everything fit in, yay! */ + (void) fd_nonblock(pipefds[0], false); + + return TAKE_FD(pipefds[0]); + } + + /* Things didn't fit in. But we read data into the pipe, let's remember that, so that + * when writing the new file we incorporate this first. */ + copy_fd = TAKE_FD(pipefds[0]); + } + } + } + + /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */ + if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) && + (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) { + off_t f; + + tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC); + if (tmp_fd < 0) + return tmp_fd; + + if (copy_fd >= 0) { + /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the + * temporary file first. */ + + r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0); + if (r < 0) + return r; + + assert(r == 0); + } + + if (remains_size > 0) { + /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the + * failed copy operation, let's flush them out next. */ + + r = loop_write(tmp_fd, remains, remains_size, false); + if (r < 0) + return r; + } + + r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK); + if (r < 0) + return r; + if (r == 0) + goto finish; /* Yay, it fit in */ + + /* It didn't fit in. Let's not forget to use what we already used */ + f = lseek(tmp_fd, 0, SEEK_SET); + if (f != 0) + return -errno; + + safe_close(copy_fd); + copy_fd = TAKE_FD(tmp_fd); + + remains = mfree(remains); + remains_size = 0; + } + + /* As last fallback use /var/tmp */ + r = var_tmp_dir(&td); + if (r < 0) + return r; + + tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC); + if (tmp_fd < 0) + return tmp_fd; + + if (copy_fd >= 0) { + /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this + * into the temporary file first. */ + r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK); + if (r < 0) + return r; + + assert(r == 0); + } + + if (remains_size > 0) { + /* Then, copy in any read but not yet written bytes. */ + r = loop_write(tmp_fd, remains, remains_size, false); + if (r < 0) + return r; + } + + /* Copy in the rest */ + r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK); + if (r < 0) + return r; + + assert(r == 0); + +finish: + /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the + * file again */ + + return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC); +} + +int fd_move_above_stdio(int fd) { + int flags, copy; + PROTECT_ERRNO; + + /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of + * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is + * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that + * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as + * stdin/stdout/stderr of unrelated code. + * + * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by + * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has + * been closed before. + * + * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an + * error we simply return the original file descriptor, and we do not touch errno. */ + + if (fd < 0 || fd > 2) + return fd; + + flags = fcntl(fd, F_GETFD, 0); + if (flags < 0) + return fd; + + if (flags & FD_CLOEXEC) + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + else + copy = fcntl(fd, F_DUPFD, 3); + if (copy < 0) + return fd; + + assert(copy > 2); + + (void) close(fd); + return copy; +} + +int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) { + + int fd[3] = { /* Put together an array of fds we work on */ + original_input_fd, + original_output_fd, + original_error_fd + }; + + int r, i, + null_fd = -1, /* if we open /dev/null, we store the fd to it here */ + copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */ + bool null_readable, null_writable; + + /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is + * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as + * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be + * on. + * + * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on + * failure! Thus, callers should assume that when this function returns the input fds are invalidated. + * + * Note that when this function fails stdin/stdout/stderr might remain half set up! + * + * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for + * stdin/stdout/stderr). */ + + null_readable = original_input_fd < 0; + null_writable = original_output_fd < 0 || original_error_fd < 0; + + /* First step, open /dev/null once, if we need it */ + if (null_readable || null_writable) { + + /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */ + null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR : + null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC); + if (null_fd < 0) { + r = -errno; + goto finish; + } + + /* If this fd is in the 0…2 range, let's move it out of it */ + if (null_fd < 3) { + int copy; + + copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */ + if (copy < 0) { + r = -errno; + goto finish; + } + + safe_close(null_fd); + null_fd = copy; + } + } + + /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */ + for (i = 0; i < 3; i++) { + + if (fd[i] < 0) + fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */ + else if (fd[i] != i && fd[i] < 3) { + /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */ + copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */ + if (copy_fd[i] < 0) { + r = -errno; + goto finish; + } + + fd[i] = copy_fd[i]; + } + } + + /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we + * have freedom to move them around. If the fds already were at the right places then the specific fds are + * -1. Let's now move them to the right places. This is the point of no return. */ + for (i = 0; i < 3; i++) { + + if (fd[i] == i) { + + /* fd is already in place, but let's make sure O_CLOEXEC is off */ + r = fd_cloexec(i, false); + if (r < 0) + goto finish; + + } else { + assert(fd[i] > 2); + + if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */ + r = -errno; + goto finish; + } + } + } + + r = 0; + +finish: + /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same + * fd passed in multiple times. */ + safe_close_above_stdio(original_input_fd); + if (original_output_fd != original_input_fd) + safe_close_above_stdio(original_output_fd); + if (original_error_fd != original_input_fd && original_error_fd != original_output_fd) + safe_close_above_stdio(original_error_fd); + + /* Close the copies we moved > 2 */ + for (i = 0; i < 3; i++) + safe_close(copy_fd[i]); + + /* Close our null fd, if it's > 2 */ + safe_close_above_stdio(null_fd); + + return r; +} + +int fd_reopen(int fd, int flags) { + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + int new_fd; + + /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to + * turn O_RDWR fds into O_RDONLY fds. + * + * This doesn't work on sockets (since they cannot be open()ed, ever). + * + * This implicitly resets the file read index to 0. */ + + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + new_fd = open(procfs_path, flags); + if (new_fd < 0) + return -errno; + + return new_fd; +} + +int read_nr_open(void) { + _cleanup_free_ char *nr_open = NULL; + int r; + + /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the + * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */ + + r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open); + if (r < 0) + log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m"); + else { + int v; + + r = safe_atoi(nr_open, &v); + if (r < 0) + log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open); + else + return v; + } + + /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */ + return 1024 * 1024; +} diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h new file mode 100644 index 0000000..4085a24 --- /dev/null +++ b/src/basic/fd-util.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <dirent.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/socket.h> + +#include "macro.h" + +/* Make sure we can distinguish fd 0 and NULL */ +#define FD_TO_PTR(fd) INT_TO_PTR((fd)+1) +#define PTR_TO_FD(p) (PTR_TO_INT(p)-1) + +int close_nointr(int fd); +int safe_close(int fd); +void safe_close_pair(int p[static 2]); + +static inline int safe_close_above_stdio(int fd) { + if (fd < 3) /* Don't close stdin/stdout/stderr, but still invalidate the fd by returning -1 */ + return -1; + + return safe_close(fd); +} + +void close_many(const int fds[], size_t n_fd); + +int fclose_nointr(FILE *f); +FILE* safe_fclose(FILE *f); +DIR* safe_closedir(DIR *f); + +static inline void closep(int *fd) { + safe_close(*fd); +} + +static inline void close_pairp(int (*p)[2]) { + safe_close_pair(*p); +} + +static inline void fclosep(FILE **f) { + safe_fclose(*f); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, pclose); +DEFINE_TRIVIAL_CLEANUP_FUNC(DIR*, closedir); + +#define _cleanup_close_ _cleanup_(closep) +#define _cleanup_fclose_ _cleanup_(fclosep) +#define _cleanup_pclose_ _cleanup_(pclosep) +#define _cleanup_closedir_ _cleanup_(closedirp) +#define _cleanup_close_pair_ _cleanup_(close_pairp) + +int fd_nonblock(int fd, bool nonblock); +int fd_cloexec(int fd, bool cloexec); + +int close_all_fds(const int except[], size_t n_except); + +int same_fd(int a, int b); + +void cmsg_close_all(struct msghdr *mh); + +bool fdname_is_valid(const char *s); + +int fd_get_path(int fd, char **ret); + +int move_fd(int from, int to, int cloexec); + +enum { + ACQUIRE_NO_DEV_NULL = 1 << 0, + ACQUIRE_NO_MEMFD = 1 << 1, + ACQUIRE_NO_PIPE = 1 << 2, + ACQUIRE_NO_TMPFILE = 1 << 3, + ACQUIRE_NO_REGULAR = 1 << 4, +}; + +int acquire_data_fd(const void *data, size_t size, unsigned flags); + +int fd_duplicate_data_fd(int fd); + +/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */ +/* The kernel sends e.g., EHOSTUNREACH or ENONET to userspace in some ICMP error cases. + * See the icmp_err_convert[] in net/ipv4/icmp.c in the kernel sources */ +#define ERRNO_IS_DISCONNECT(r) \ + IN_SET(r, \ + ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, \ + ENETUNREACH, EHOSTUNREACH, ENOPROTOOPT, EHOSTDOWN, ENONET) + +/* Resource exhaustion, could be our fault or general system trouble */ +#define ERRNO_IS_RESOURCE(r) \ + IN_SET(r, ENOMEM, EMFILE, ENFILE) + +int fd_move_above_stdio(int fd); + +int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd); + +static inline int make_null_stdio(void) { + return rearrange_stdio(-1, -1, -1); +} + +/* Like TAKE_PTR() but for file descriptors, resetting them to -1 */ +#define TAKE_FD(fd) \ + ({ \ + int _fd_ = (fd); \ + (fd) = -1; \ + _fd_; \ + }) + +int fd_reopen(int fd, int flags); + +int read_nr_open(void); diff --git a/src/basic/fileio.c b/src/basic/fileio.c new file mode 100644 index 0000000..e18b842 --- /dev/null +++ b/src/basic/fileio.c @@ -0,0 +1,822 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "tmpfile-util.h" + +#define READ_FULL_BYTES_MAX (4U*1024U*1024U) + +int write_string_stream_ts( + FILE *f, + const char *line, + WriteStringFileFlags flags, + struct timespec *ts) { + + bool needs_nl; + int r; + + assert(f); + assert(line); + + if (ferror(f)) + return -EIO; + + needs_nl = !(flags & WRITE_STRING_FILE_AVOID_NEWLINE) && !endswith(line, "\n"); + + if (needs_nl && (flags & WRITE_STRING_FILE_DISABLE_BUFFER)) { + /* If STDIO buffering was disabled, then let's append the newline character to the string itself, so + * that the write goes out in one go, instead of two */ + + line = strjoina(line, "\n"); + needs_nl = false; + } + + if (fputs(line, f) == EOF) + return -errno; + + if (needs_nl) + if (fputc('\n', f) == EOF) + return -errno; + + if (flags & WRITE_STRING_FILE_SYNC) + r = fflush_sync_and_check(f); + else + r = fflush_and_check(f); + if (r < 0) + return r; + + if (ts) { + struct timespec twice[2] = {*ts, *ts}; + + if (futimens(fileno(f), twice) < 0) + return -errno; + } + + return 0; +} + +static int write_string_file_atomic( + const char *fn, + const char *line, + WriteStringFileFlags flags, + struct timespec *ts) { + + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + assert(fn); + assert(line); + + r = fopen_temporary(fn, &f, &p); + if (r < 0) + return r; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + (void) fchmod_umask(fileno(f), 0644); + + r = write_string_stream_ts(f, line, flags, ts); + if (r < 0) + goto fail; + + if (rename(p, fn) < 0) { + r = -errno; + goto fail; + } + + return 0; + +fail: + (void) unlink(p); + return r; +} + +int write_string_file_ts( + const char *fn, + const char *line, + WriteStringFileFlags flags, + struct timespec *ts) { + + _cleanup_fclose_ FILE *f = NULL; + int q, r; + + assert(fn); + assert(line); + + /* We don't know how to verify whether the file contents was already on-disk. */ + assert(!((flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE) && (flags & WRITE_STRING_FILE_SYNC))); + + if (flags & WRITE_STRING_FILE_ATOMIC) { + assert(flags & WRITE_STRING_FILE_CREATE); + + r = write_string_file_atomic(fn, line, flags, ts); + if (r < 0) + goto fail; + + return r; + } else + assert(!ts); + + if (flags & WRITE_STRING_FILE_CREATE) { + f = fopen(fn, "we"); + if (!f) { + r = -errno; + goto fail; + } + } else { + int fd; + + /* We manually build our own version of fopen(..., "we") that + * works without O_CREAT */ + fd = open(fn, O_WRONLY|O_CLOEXEC|O_NOCTTY | ((flags & WRITE_STRING_FILE_NOFOLLOW) ? O_NOFOLLOW : 0)); + if (fd < 0) { + r = -errno; + goto fail; + } + + f = fdopen(fd, "w"); + if (!f) { + r = -errno; + safe_close(fd); + goto fail; + } + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + if (flags & WRITE_STRING_FILE_DISABLE_BUFFER) + setvbuf(f, NULL, _IONBF, 0); + + r = write_string_stream_ts(f, line, flags, ts); + if (r < 0) + goto fail; + + return 0; + +fail: + if (!(flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE)) + return r; + + f = safe_fclose(f); + + /* OK, the operation failed, but let's see if the right + * contents in place already. If so, eat up the error. */ + + q = verify_file(fn, line, !(flags & WRITE_STRING_FILE_AVOID_NEWLINE)); + if (q <= 0) + return r; + + return 0; +} + +int write_string_filef( + const char *fn, + WriteStringFileFlags flags, + const char *format, ...) { + + _cleanup_free_ char *p = NULL; + va_list ap; + int r; + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return write_string_file(fn, p, flags); +} + +int read_one_line_file(const char *fn, char **line) { + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(fn); + assert(line); + + f = fopen(fn, "re"); + if (!f) + return -errno; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + r = read_line(f, LONG_LINE_MAX, line); + return r < 0 ? r : 0; +} + +int verify_file(const char *fn, const char *blob, bool accept_extra_nl) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *buf = NULL; + size_t l, k; + + assert(fn); + assert(blob); + + l = strlen(blob); + + if (accept_extra_nl && endswith(blob, "\n")) + accept_extra_nl = false; + + buf = malloc(l + accept_extra_nl + 1); + if (!buf) + return -ENOMEM; + + f = fopen(fn, "re"); + if (!f) + return -errno; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + /* We try to read one byte more than we need, so that we know whether we hit eof */ + errno = 0; + k = fread(buf, 1, l + accept_extra_nl + 1, f); + if (ferror(f)) + return errno > 0 ? -errno : -EIO; + + if (k != l && k != l + accept_extra_nl) + return 0; + if (memcmp(buf, blob, l) != 0) + return 0; + if (k > l && buf[l] != '\n') + return 0; + + return 1; +} + +int read_full_stream( + FILE *f, + char **ret_contents, + size_t *ret_size) { + + _cleanup_free_ char *buf = NULL; + struct stat st; + size_t n, l; + int fd; + + assert(f); + assert(ret_contents); + + n = LINE_MAX; /* Start size */ + + fd = fileno(f); + if (fd >= 0) { /* If the FILE* object is backed by an fd (as opposed to memory or such, see fmemopen(), let's + * optimize our buffering) */ + + if (fstat(fileno(f), &st) < 0) + return -errno; + + if (S_ISREG(st.st_mode)) { + + /* Safety check */ + if (st.st_size > READ_FULL_BYTES_MAX) + return -E2BIG; + + /* Start with the right file size, but be prepared for files from /proc which generally report a file + * size of 0. Note that we increase the size to read here by one, so that the first read attempt + * already makes us notice the EOF. */ + if (st.st_size > 0) + n = st.st_size + 1; + } + } + + l = 0; + for (;;) { + char *t; + size_t k; + + t = realloc(buf, n + 1); + if (!t) + return -ENOMEM; + + buf = t; + errno = 0; + k = fread(buf + l, 1, n - l, f); + if (k > 0) + l += k; + + if (ferror(f)) + return errno > 0 ? -errno : -EIO; + + if (feof(f)) + break; + + /* We aren't expecting fread() to return a short read outside + * of (error && eof), assert buffer is full and enlarge buffer. + */ + assert(l == n); + + /* Safety check */ + if (n >= READ_FULL_BYTES_MAX) + return -E2BIG; + + n = MIN(n * 2, READ_FULL_BYTES_MAX); + } + + if (!ret_size) { + /* Safety check: if the caller doesn't want to know the size of what we just read it will rely on the + * trailing NUL byte. But if there's an embedded NUL byte, then we should refuse operation as otherwise + * there'd be ambiguity about what we just read. */ + + if (memchr(buf, 0, l)) + return -EBADMSG; + } + + buf[l] = 0; + *ret_contents = TAKE_PTR(buf); + + if (ret_size) + *ret_size = l; + + return 0; +} + +int read_full_file(const char *fn, char **contents, size_t *size) { + _cleanup_fclose_ FILE *f = NULL; + + assert(fn); + assert(contents); + + f = fopen(fn, "re"); + if (!f) + return -errno; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + return read_full_stream(f, contents, size); +} + +int executable_is_script(const char *path, char **interpreter) { + _cleanup_free_ char *line = NULL; + size_t len; + char *ans; + int r; + + assert(path); + + r = read_one_line_file(path, &line); + if (r == -ENOBUFS) /* First line overly long? if so, then it's not a script */ + return 0; + if (r < 0) + return r; + + if (!startswith(line, "#!")) + return 0; + + ans = strstrip(line + 2); + len = strcspn(ans, " \t"); + + if (len == 0) + return 0; + + ans = strndup(ans, len); + if (!ans) + return -ENOMEM; + + *interpreter = ans; + return 1; +} + +/** + * Retrieve one field from a file like /proc/self/status. pattern + * should not include whitespace or the delimiter (':'). pattern matches only + * the beginning of a line. Whitespace before ':' is skipped. Whitespace and + * zeros after the ':' will be skipped. field must be freed afterwards. + * terminator specifies the terminating characters of the field value (not + * included in the value). + */ +int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field) { + _cleanup_free_ char *status = NULL; + char *t, *f; + size_t len; + int r; + + assert(terminator); + assert(filename); + assert(pattern); + assert(field); + + r = read_full_file(filename, &status, NULL); + if (r < 0) + return r; + + t = status; + + do { + bool pattern_ok; + + do { + t = strstr(t, pattern); + if (!t) + return -ENOENT; + + /* Check that pattern occurs in beginning of line. */ + pattern_ok = (t == status || t[-1] == '\n'); + + t += strlen(pattern); + + } while (!pattern_ok); + + t += strspn(t, " \t"); + if (!*t) + return -ENOENT; + + } while (*t != ':'); + + t++; + + if (*t) { + t += strspn(t, " \t"); + + /* Also skip zeros, because when this is used for + * capabilities, we don't want the zeros. This way the + * same capability set always maps to the same string, + * irrespective of the total capability set size. For + * other numbers it shouldn't matter. */ + t += strspn(t, "0"); + /* Back off one char if there's nothing but whitespace + and zeros */ + if (!*t || isspace(*t)) + t--; + } + + len = strcspn(t, terminator); + + f = strndup(t, len); + if (!f) + return -ENOMEM; + + *field = f; + return 0; +} + +DIR *xopendirat(int fd, const char *name, int flags) { + int nfd; + DIR *d; + + assert(!(flags & O_CREAT)); + + nfd = openat(fd, name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|flags, 0); + if (nfd < 0) + return NULL; + + d = fdopendir(nfd); + if (!d) { + safe_close(nfd); + return NULL; + } + + return d; +} + +static int search_and_fopen_internal(const char *path, const char *mode, const char *root, char **search, FILE **_f) { + char **i; + + assert(path); + assert(mode); + assert(_f); + + if (!path_strv_resolve_uniq(search, root)) + return -ENOMEM; + + STRV_FOREACH(i, search) { + _cleanup_free_ char *p = NULL; + FILE *f; + + if (root) + p = strjoin(root, *i, "/", path); + else + p = strjoin(*i, "/", path); + if (!p) + return -ENOMEM; + + f = fopen(p, mode); + if (f) { + *_f = f; + return 0; + } + + if (errno != ENOENT) + return -errno; + } + + return -ENOENT; +} + +int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f) { + _cleanup_strv_free_ char **copy = NULL; + + assert(path); + assert(mode); + assert(_f); + + if (path_is_absolute(path)) { + FILE *f; + + f = fopen(path, mode); + if (f) { + *_f = f; + return 0; + } + + return -errno; + } + + copy = strv_copy((char**) search); + if (!copy) + return -ENOMEM; + + return search_and_fopen_internal(path, mode, root, copy, _f); +} + +int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f) { + _cleanup_strv_free_ char **s = NULL; + + if (path_is_absolute(path)) { + FILE *f; + + f = fopen(path, mode); + if (f) { + *_f = f; + return 0; + } + + return -errno; + } + + s = strv_split_nulstr(search); + if (!s) + return -ENOMEM; + + return search_and_fopen_internal(path, mode, root, s, _f); +} + +int fflush_and_check(FILE *f) { + assert(f); + + errno = 0; + fflush(f); + + if (ferror(f)) + return errno > 0 ? -errno : -EIO; + + return 0; +} + +int fflush_sync_and_check(FILE *f) { + int r; + + assert(f); + + r = fflush_and_check(f); + if (r < 0) + return r; + + if (fsync(fileno(f)) < 0) + return -errno; + + r = fsync_directory_of_file(fileno(f)); + if (r < 0) + return r; + + return 0; +} + +int write_timestamp_file_atomic(const char *fn, usec_t n) { + char ln[DECIMAL_STR_MAX(n)+2]; + + /* Creates a "timestamp" file, that contains nothing but a + * usec_t timestamp, formatted in ASCII. */ + + if (n <= 0 || n >= USEC_INFINITY) + return -ERANGE; + + xsprintf(ln, USEC_FMT "\n", n); + + return write_string_file(fn, ln, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC); +} + +int read_timestamp_file(const char *fn, usec_t *ret) { + _cleanup_free_ char *ln = NULL; + uint64_t t; + int r; + + r = read_one_line_file(fn, &ln); + if (r < 0) + return r; + + r = safe_atou64(ln, &t); + if (r < 0) + return r; + + if (t <= 0 || t >= (uint64_t) USEC_INFINITY) + return -ERANGE; + + *ret = (usec_t) t; + return 0; +} + +int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space) { + int r; + + assert(s); + + /* Outputs the specified string with fputs(), but optionally prefixes it with a separator. The *space parameter + * when specified shall initially point to a boolean variable initialized to false. It is set to true after the + * first invocation. This call is supposed to be use in loops, where a separator shall be inserted between each + * element, but not before the first one. */ + + if (!f) + f = stdout; + + if (space) { + if (!separator) + separator = " "; + + if (*space) { + r = fputs(separator, f); + if (r < 0) + return r; + } + + *space = true; + } + + return fputs(s, f); +} + +/* A bitmask of the EOL markers we know */ +typedef enum EndOfLineMarker { + EOL_NONE = 0, + EOL_ZERO = 1 << 0, /* \0 (aka NUL) */ + EOL_TEN = 1 << 1, /* \n (aka NL, aka LF) */ + EOL_THIRTEEN = 1 << 2, /* \r (aka CR) */ +} EndOfLineMarker; + +static EndOfLineMarker categorize_eol(char c, ReadLineFlags flags) { + + if (!IN_SET(flags, READ_LINE_ONLY_NUL)) { + if (c == '\n') + return EOL_TEN; + if (c == '\r') + return EOL_THIRTEEN; + } + + if (c == '\0') + return EOL_ZERO; + + return EOL_NONE; +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, funlockfile); + +int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret) { + size_t n = 0, allocated = 0, count = 0; + _cleanup_free_ char *buffer = NULL; + int r; + + assert(f); + + /* Something like a bounded version of getline(). + * + * Considers EOF, \n, \r and \0 end of line delimiters (or combinations of these), and does not include these + * delimiters in the string returned. Specifically, recognizes the following combinations of markers as line + * endings: + * + * • \n (UNIX) + * • \r (old MacOS) + * • \0 (C strings) + * • \n\0 + * • \r\0 + * • \r\n (Windows) + * • \n\r + * • \r\n\0 + * • \n\r\0 + * + * Returns the number of bytes read from the files (i.e. including delimiters — this hence usually differs from + * the number of characters in the returned string). When EOF is hit, 0 is returned. + * + * The input parameter limit is the maximum numbers of characters in the returned string, i.e. excluding + * delimiters. If the limit is hit we fail and return -ENOBUFS. + * + * If a line shall be skipped ret may be initialized as NULL. */ + + if (ret) { + if (!GREEDY_REALLOC(buffer, allocated, 1)) + return -ENOMEM; + } + + { + _unused_ _cleanup_(funlockfilep) FILE *flocked = f; + EndOfLineMarker previous_eol = EOL_NONE; + flockfile(f); + + for (;;) { + EndOfLineMarker eol; + char c; + + if (n >= limit) + return -ENOBUFS; + + if (count >= INT_MAX) /* We couldn't return the counter anymore as "int", hence refuse this */ + return -ENOBUFS; + + r = safe_fgetc(f, &c); + if (r < 0) + return r; + if (r == 0) /* EOF is definitely EOL */ + break; + + eol = categorize_eol(c, flags); + + if (FLAGS_SET(previous_eol, EOL_ZERO) || + (eol == EOL_NONE && previous_eol != EOL_NONE) || + (eol != EOL_NONE && (previous_eol & eol) != 0)) { + /* Previous char was a NUL? This is not an EOL, but the previous char was? This type of + * EOL marker has been seen right before? In either of these three cases we are + * done. But first, let's put this character back in the queue. (Note that we have to + * cast this to (unsigned char) here as ungetc() expects a positive 'int', and if we + * are on an architecture where 'char' equals 'signed char' we need to ensure we don't + * pass a negative value here. That said, to complicate things further ungetc() is + * actually happy with most negative characters and implicitly casts them back to + * positive ones as needed, except for \xff (aka -1, aka EOF), which it refuses. What a + * godawful API!) */ + assert_se(ungetc((unsigned char) c, f) != EOF); + break; + } + + count++; + + if (eol != EOL_NONE) { + previous_eol |= eol; + continue; + } + + if (ret) { + if (!GREEDY_REALLOC(buffer, allocated, n + 2)) + return -ENOMEM; + + buffer[n] = c; + } + + n++; + } + } + + if (ret) { + buffer[n] = 0; + + *ret = TAKE_PTR(buffer); + } + + return (int) count; +} + +int safe_fgetc(FILE *f, char *ret) { + int k; + + assert(f); + + /* A safer version of plain fgetc(): let's propagate the error that happened while reading as such, and + * separate the EOF condition from the byte read, to avoid those confusion signed/unsigned issues fgetc() + * has. */ + + errno = 0; + k = fgetc(f); + if (k == EOF) { + if (ferror(f)) + return errno > 0 ? -errno : -EIO; + + if (ret) + *ret = 0; + + return 0; + } + + if (ret) + *ret = k; + + return 1; +} diff --git a/src/basic/fileio.h b/src/basic/fileio.h new file mode 100644 index 0000000..53e3f4e --- /dev/null +++ b/src/basic/fileio.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <dirent.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +#include "macro.h" +#include "time-util.h" + +#define LONG_LINE_MAX (1U*1024U*1024U) + +typedef enum { + WRITE_STRING_FILE_CREATE = 1 << 0, + WRITE_STRING_FILE_ATOMIC = 1 << 1, + WRITE_STRING_FILE_AVOID_NEWLINE = 1 << 2, + WRITE_STRING_FILE_VERIFY_ON_FAILURE = 1 << 3, + WRITE_STRING_FILE_SYNC = 1 << 4, + WRITE_STRING_FILE_DISABLE_BUFFER = 1 << 5, + WRITE_STRING_FILE_NOFOLLOW = 1 << 6, + + /* And before you wonder, why write_string_file_atomic_label_ts() is a separate function instead of just one + more flag here: it's about linking: we don't want to pull -lselinux into all users of write_string_file() + and friends. */ + +} WriteStringFileFlags; + +int write_string_stream_ts(FILE *f, const char *line, WriteStringFileFlags flags, struct timespec *ts); +static inline int write_string_stream(FILE *f, const char *line, WriteStringFileFlags flags) { + return write_string_stream_ts(f, line, flags, NULL); +} +int write_string_file_ts(const char *fn, const char *line, WriteStringFileFlags flags, struct timespec *ts); +static inline int write_string_file(const char *fn, const char *line, WriteStringFileFlags flags) { + return write_string_file_ts(fn, line, flags, NULL); +} + +int write_string_filef(const char *fn, WriteStringFileFlags flags, const char *format, ...) _printf_(3, 4); + +int read_one_line_file(const char *fn, char **line); +int read_full_file(const char *fn, char **contents, size_t *size); +int read_full_stream(FILE *f, char **contents, size_t *size); + +int verify_file(const char *fn, const char *blob, bool accept_extra_nl); + +int executable_is_script(const char *path, char **interpreter); + +int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field); + +DIR *xopendirat(int dirfd, const char *name, int flags); + +int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f); +int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f); + +int fflush_and_check(FILE *f); +int fflush_sync_and_check(FILE *f); + +int write_timestamp_file_atomic(const char *fn, usec_t n); +int read_timestamp_file(const char *fn, usec_t *ret); + +int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space); + +typedef enum ReadLineFlags { + READ_LINE_ONLY_NUL = 1 << 0, +} ReadLineFlags; + +int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret); + +static inline int read_line(FILE *f, size_t limit, char **ret) { + return read_line_full(f, limit, 0, ret); +} + +static inline int read_nul_string(FILE *f, size_t limit, char **ret) { + return read_line_full(f, limit, READ_LINE_ONLY_NUL, ret); +} + +int safe_fgetc(FILE *f, char *ret); diff --git a/src/basic/format-util.h b/src/basic/format-util.h new file mode 100644 index 0000000..dece5d3 --- /dev/null +++ b/src/basic/format-util.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> + +#if SIZEOF_PID_T == 4 +# define PID_PRI PRIi32 +#elif SIZEOF_PID_T == 2 +# define PID_PRI PRIi16 +#else +# error Unknown pid_t size +#endif +#define PID_FMT "%" PID_PRI + +#if SIZEOF_UID_T == 4 +# define UID_FMT "%" PRIu32 +#elif SIZEOF_UID_T == 2 +# define UID_FMT "%" PRIu16 +#else +# error Unknown uid_t size +#endif + +#if SIZEOF_GID_T == 4 +# define GID_FMT "%" PRIu32 +#elif SIZEOF_GID_T == 2 +# define GID_FMT "%" PRIu16 +#else +# error Unknown gid_t size +#endif + +#if SIZEOF_TIME_T == 8 +# define PRI_TIME PRIi64 +#elif SIZEOF_TIME_T == 4 +# define PRI_TIME "li" +#else +# error Unknown time_t size +#endif + +#if defined __x86_64__ && defined __ILP32__ +# define PRI_TIMEX PRIi64 +#else +# define PRI_TIMEX "li" +#endif + +#if SIZEOF_RLIM_T == 8 +# define RLIM_FMT "%" PRIu64 +#elif SIZEOF_RLIM_T == 4 +# define RLIM_FMT "%" PRIu32 +#else +# error Unknown rlim_t size +#endif + +#if SIZEOF_DEV_T == 8 +# define DEV_FMT "%" PRIu64 +#elif SIZEOF_DEV_T == 4 +# define DEV_FMT "%" PRIu32 +#else +# error Unknown dev_t size +#endif + +#if SIZEOF_INO_T == 8 +# define INO_FMT "%" PRIu64 +#elif SIZEOF_INO_T == 4 +# define INO_FMT "%" PRIu32 +#else +# error Unknown ino_t size +#endif diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c new file mode 100644 index 0000000..f25bf2c --- /dev/null +++ b/src/basic/fs-util.c @@ -0,0 +1,1358 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <linux/magic.h> +#include <time.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "mkdir.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "user-util.h" +#include "util.h" + +int unlink_noerrno(const char *path) { + PROTECT_ERRNO; + int r; + + r = unlink(path); + if (r < 0) + return -errno; + + return 0; +} + +int rmdir_parents(const char *path, const char *stop) { + size_t l; + int r = 0; + + assert(path); + assert(stop); + + l = strlen(path); + + /* Skip trailing slashes */ + while (l > 0 && path[l-1] == '/') + l--; + + while (l > 0) { + char *t; + + /* Skip last component */ + while (l > 0 && path[l-1] != '/') + l--; + + /* Skip trailing slashes */ + while (l > 0 && path[l-1] == '/') + l--; + + if (l <= 0) + break; + + t = strndup(path, l); + if (!t) + return -ENOMEM; + + if (path_startswith(stop, t)) { + free(t); + return 0; + } + + r = rmdir(t); + free(t); + + if (r < 0) + if (errno != ENOENT) + return -errno; + } + + return 0; +} + +int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) { + int r; + + /* Try the ideal approach first */ + if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0) + return 0; + + /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented, + * fall back to a different method. */ + if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY)) + return -errno; + + /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems + * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we + * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */ + if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) { + + if (unlinkat(olddirfd, oldpath, 0) < 0) { + r = -errno; /* Backup errno before the following unlinkat() alters it */ + (void) unlinkat(newdirfd, newpath, 0); + return r; + } + + return 0; + } + + if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM)) /* FAT returns EPERM on link()… */ + return -errno; + + /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fallback to the racy TOCTOU + * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */ + + if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) + return -EEXIST; + if (errno != ENOENT) + return -errno; + + if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0) + return -errno; + + return 0; +} + +int readlinkat_malloc(int fd, const char *p, char **ret) { + size_t l = FILENAME_MAX+1; + int r; + + assert(p); + assert(ret); + + for (;;) { + char *c; + ssize_t n; + + c = new(char, l); + if (!c) + return -ENOMEM; + + n = readlinkat(fd, p, c, l-1); + if (n < 0) { + r = -errno; + free(c); + return r; + } + + if ((size_t) n < l-1) { + c[n] = 0; + *ret = c; + return 0; + } + + free(c); + l *= 2; + } +} + +int readlink_malloc(const char *p, char **ret) { + return readlinkat_malloc(AT_FDCWD, p, ret); +} + +int readlink_value(const char *p, char **ret) { + _cleanup_free_ char *link = NULL; + char *value; + int r; + + r = readlink_malloc(p, &link); + if (r < 0) + return r; + + value = basename(link); + if (!value) + return -ENOENT; + + value = strdup(value); + if (!value) + return -ENOMEM; + + *ret = value; + + return 0; +} + +int readlink_and_make_absolute(const char *p, char **r) { + _cleanup_free_ char *target = NULL; + char *k; + int j; + + assert(p); + assert(r); + + j = readlink_malloc(p, &target); + if (j < 0) + return j; + + k = file_in_same_dir(p, target); + if (!k) + return -ENOMEM; + + *r = k; + return 0; +} + +int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) { + char fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + _cleanup_close_ int fd = -1; + assert(path); + + /* Under the assumption that we are running privileged we first change the access mode and only then hand out + * ownership to avoid a window where access is too open. */ + + fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change mode/owner + * on the same file */ + if (fd < 0) + return -errno; + + xsprintf(fd_path, "/proc/self/fd/%i", fd); + + if (mode != MODE_INVALID) { + + if ((mode & S_IFMT) != 0) { + struct stat st; + + if (stat(fd_path, &st) < 0) + return -errno; + + if ((mode & S_IFMT) != (st.st_mode & S_IFMT)) + return -EINVAL; + } + + if (chmod(fd_path, mode & 07777) < 0) + return -errno; + } + + if (uid != UID_INVALID || gid != GID_INVALID) + if (chown(fd_path, uid, gid) < 0) + return -errno; + + return 0; +} + +int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) { + /* Under the assumption that we are running privileged we first change the access mode and only then hand out + * ownership to avoid a window where access is too open. */ + + if (mode != MODE_INVALID) { + + if ((mode & S_IFMT) != 0) { + struct stat st; + + if (fstat(fd, &st) < 0) + return -errno; + + if ((mode & S_IFMT) != (st.st_mode & S_IFMT)) + return -EINVAL; + } + + if (fchmod(fd, mode & 0777) < 0) + return -errno; + } + + if (uid != UID_INVALID || gid != GID_INVALID) + if (fchown(fd, uid, gid) < 0) + return -errno; + + return 0; +} + +int fchmod_umask(int fd, mode_t m) { + mode_t u; + int r; + + u = umask(0777); + r = fchmod(fd, m & (~u)) < 0 ? -errno : 0; + umask(u); + + return r; +} + +int fchmod_opath(int fd, mode_t m) { + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + + /* This function operates also on fd that might have been opened with + * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like + * fchownat() does. */ + + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + if (chmod(procfs_path, m) < 0) + return -errno; + + return 0; +} + +int fd_warn_permissions(const char *path, int fd) { + struct stat st; + + if (fstat(fd, &st) < 0) + return -errno; + + if (st.st_mode & 0111) + log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path); + + if (st.st_mode & 0002) + log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path); + + if (getpid_cached() == 1 && (st.st_mode & 0044) != 0044) + log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path); + + return 0; +} + +int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) { + char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + _cleanup_close_ int fd = -1; + int r, ret = 0; + + assert(path); + + /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink + * itself which is updated, not its target + * + * Returns the first error we encounter, but tries to apply as much as possible. */ + + if (parents) + (void) mkdir_parents(path, 0755); + + /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in + * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and + * won't trigger any driver magic or so. */ + fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (fd < 0) { + if (errno != ENOENT) + return -errno; + + /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file + * here, and nothing else */ + fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode); + if (fd < 0) + return -errno; + } + + /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode, + * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is + * something fchown(), fchmod(), futimensat() don't allow. */ + xsprintf(fdpath, "/proc/self/fd/%i", fd); + + if (mode != MODE_INVALID) + if (chmod(fdpath, mode) < 0) + ret = -errno; + + if (uid_is_valid(uid) || gid_is_valid(gid)) + if (chown(fdpath, uid, gid) < 0 && ret >= 0) + ret = -errno; + + if (stamp != USEC_INFINITY) { + struct timespec ts[2]; + + timespec_store(&ts[0], stamp); + ts[1] = ts[0]; + r = utimensat(AT_FDCWD, fdpath, ts, 0); + } else + r = utimensat(AT_FDCWD, fdpath, NULL, 0); + if (r < 0 && ret >= 0) + return -errno; + + return ret; +} + +int touch(const char *path) { + return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID); +} + +int symlink_idempotent(const char *from, const char *to, bool make_relative) { + _cleanup_free_ char *relpath = NULL; + int r; + + assert(from); + assert(to); + + if (make_relative) { + _cleanup_free_ char *parent = NULL; + + parent = dirname_malloc(to); + if (!parent) + return -ENOMEM; + + r = path_make_relative(parent, from, &relpath); + if (r < 0) + return r; + + from = relpath; + } + + if (symlink(from, to) < 0) { + _cleanup_free_ char *p = NULL; + + if (errno != EEXIST) + return -errno; + + r = readlink_malloc(to, &p); + if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */ + return -EEXIST; + if (r < 0) /* Any other error? In that case propagate it as is */ + return r; + + if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */ + return -EEXIST; + } + + return 0; +} + +int symlink_atomic(const char *from, const char *to) { + _cleanup_free_ char *t = NULL; + int r; + + assert(from); + assert(to); + + r = tempfn_random(to, NULL, &t); + if (r < 0) + return r; + + if (symlink(from, t) < 0) + return -errno; + + if (rename(t, to) < 0) { + unlink_noerrno(t); + return -errno; + } + + return 0; +} + +int mknod_atomic(const char *path, mode_t mode, dev_t dev) { + _cleanup_free_ char *t = NULL; + int r; + + assert(path); + + r = tempfn_random(path, NULL, &t); + if (r < 0) + return r; + + if (mknod(t, mode, dev) < 0) + return -errno; + + if (rename(t, path) < 0) { + unlink_noerrno(t); + return -errno; + } + + return 0; +} + +int mkfifo_atomic(const char *path, mode_t mode) { + _cleanup_free_ char *t = NULL; + int r; + + assert(path); + + r = tempfn_random(path, NULL, &t); + if (r < 0) + return r; + + if (mkfifo(t, mode) < 0) + return -errno; + + if (rename(t, path) < 0) { + unlink_noerrno(t); + return -errno; + } + + return 0; +} + +int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) { + _cleanup_free_ char *t = NULL; + int r; + + assert(path); + + if (path_is_absolute(path)) + return mkfifo_atomic(path, mode); + + /* We're only interested in the (random) filename. */ + r = tempfn_random_child("", NULL, &t); + if (r < 0) + return r; + + if (mkfifoat(dirfd, t, mode) < 0) + return -errno; + + if (renameat(dirfd, t, dirfd, path) < 0) { + unlink_noerrno(t); + return -errno; + } + + return 0; +} + +int get_files_in_directory(const char *path, char ***list) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + size_t bufsize = 0, n = 0; + _cleanup_strv_free_ char **l = NULL; + + assert(path); + + /* Returns all files in a directory in *list, and the number + * of files as return value. If list is NULL returns only the + * number. */ + + d = opendir(path); + if (!d) + return -errno; + + FOREACH_DIRENT_ALL(de, d, return -errno) { + dirent_ensure_type(d, de); + + if (!dirent_is_file(de)) + continue; + + if (list) { + /* one extra slot is needed for the terminating NULL */ + if (!GREEDY_REALLOC(l, bufsize, n + 2)) + return -ENOMEM; + + l[n] = strdup(de->d_name); + if (!l[n]) + return -ENOMEM; + + l[++n] = NULL; + } else + n++; + } + + if (list) + *list = TAKE_PTR(l); + + return n; +} + +static int getenv_tmp_dir(const char **ret_path) { + const char *n; + int r, ret = 0; + + assert(ret_path); + + /* We use the same order of environment variables python uses in tempfile.gettempdir(): + * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */ + FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") { + const char *e; + + e = secure_getenv(n); + if (!e) + continue; + if (!path_is_absolute(e)) { + r = -ENOTDIR; + goto next; + } + if (!path_is_normalized(e)) { + r = -EPERM; + goto next; + } + + r = is_dir(e, true); + if (r < 0) + goto next; + if (r == 0) { + r = -ENOTDIR; + goto next; + } + + *ret_path = e; + return 1; + + next: + /* Remember first error, to make this more debuggable */ + if (ret >= 0) + ret = r; + } + + if (ret < 0) + return ret; + + *ret_path = NULL; + return ret; +} + +static int tmp_dir_internal(const char *def, const char **ret) { + const char *e; + int r, k; + + assert(def); + assert(ret); + + r = getenv_tmp_dir(&e); + if (r > 0) { + *ret = e; + return 0; + } + + k = is_dir(def, true); + if (k == 0) + k = -ENOTDIR; + if (k < 0) + return r < 0 ? r : k; + + *ret = def; + return 0; +} + +int var_tmp_dir(const char **ret) { + + /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus + * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is + * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR, + * making it a variable that overrides all temporary file storage locations. */ + + return tmp_dir_internal("/var/tmp", ret); +} + +int tmp_dir(const char **ret) { + + /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually + * backed by an in-memory file system: /tmp. */ + + return tmp_dir_internal("/tmp", ret); +} + +int unlink_or_warn(const char *filename) { + if (unlink(filename) < 0 && errno != ENOENT) + /* If the file doesn't exist and the fs simply was read-only (in which + * case unlink() returns EROFS even if the file doesn't exist), don't + * complain */ + if (errno != EROFS || access(filename, F_OK) >= 0) + return log_error_errno(errno, "Failed to remove \"%s\": %m", filename); + + return 0; +} + +int inotify_add_watch_fd(int fd, int what, uint32_t mask) { + char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + int r; + + /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */ + xsprintf(path, "/proc/self/fd/%i", what); + + r = inotify_add_watch(fd, path, mask); + if (r < 0) + return -errno; + + return r; +} + +static bool unsafe_transition(const struct stat *a, const struct stat *b) { + /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to + * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files + * making us believe we read something safe even though it isn't safe in the specific context we open it in. */ + + if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */ + return false; + + return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */ +} + +static int log_unsafe_transition(int a, int b, const char *path, unsigned flags) { + _cleanup_free_ char *n1 = NULL, *n2 = NULL; + + if (!FLAGS_SET(flags, CHASE_WARN)) + return -ENOLINK; + + (void) fd_get_path(a, &n1); + (void) fd_get_path(b, &n2); + + return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK), + "Detected unsafe path transition %s %s %s during canonicalization of %s.", + n1, special_glyph(SPECIAL_GLYPH_ARROW), n2, path); +} + +static int log_autofs_mount_point(int fd, const char *path, unsigned flags) { + _cleanup_free_ char *n1 = NULL; + + if (!FLAGS_SET(flags, CHASE_WARN)) + return -EREMOTE; + + (void) fd_get_path(fd, &n1); + + return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE), + "Detected autofs mount point %s during canonicalization of %s.", + n1, path); +} + +int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret) { + _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL; + _cleanup_close_ int fd = -1; + unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */ + struct stat previous_stat; + bool exists = true; + char *todo; + int r; + + assert(path); + + /* Either the file may be missing, or we return an fd to the final object, but both make no sense */ + if (FLAGS_SET(flags, CHASE_NONEXISTENT | CHASE_OPEN)) + return -EINVAL; + + if (FLAGS_SET(flags, CHASE_STEP | CHASE_OPEN)) + return -EINVAL; + + if (isempty(path)) + return -EINVAL; + + /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following + * symlinks relative to a root directory, instead of the root of the host. + * + * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following + * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is + * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first + * prefixed accordingly. + * + * Algorithmically this operates on two path buffers: "done" are the components of the path we already + * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to + * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning + * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no + * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races + * at a minimum. + * + * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got + * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this + * function what to do when encountering a symlink with an absolute path as directory: prefix it by the + * specified path. + * + * There are three ways to invoke this function: + * + * 1. Without CHASE_STEP or CHASE_OPEN: in this case the path is resolved and the normalized path is returned + * in `ret`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set 0 is returned if the file + * doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set >= 0 is returned if the destination was + * found, -ENOENT if it doesn't. + * + * 2. With CHASE_OPEN: in this case the destination is opened after chasing it as O_PATH and this file + * descriptor is returned as return value. This is useful to open files relative to some root + * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using + * fd_reopen() or such) before it can be used for reading/writing. CHASE_OPEN may not be combined with + * CHASE_NONEXISTENT. + * + * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first + * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if + * a caller wants to trace the a path through the file system verbosely. Returns < 0 on error, > 0 if the + * path is fully normalized, and == 0 for each normalization step. This may be combined with + * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found. + * + * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from + * unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If + * CHASE_WARN is also set a warning describing the unsafe transition is emitted. + * + * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, the path normalization is + * aborted and -EREMOTE is returned. If CHASE_WARN is also set a warning showing the path of the mount point + * is emitted. + * + * */ + + /* A root directory of "/" or "" is identical to none */ + if (empty_or_root(original_root)) + original_root = NULL; + + if (!original_root && !ret && (flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_OPEN|CHASE_STEP)) == CHASE_OPEN) { + /* Shortcut the CHASE_OPEN case if the caller isn't interested in the actual path and has no root set + * and doesn't care about any of the other special features we provide either. */ + r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0)); + if (r < 0) + return -errno; + + return r; + } + + if (original_root) { + r = path_make_absolute_cwd(original_root, &root); + if (r < 0) + return r; + + if (flags & CHASE_PREFIX_ROOT) { + + /* We don't support relative paths in combination with a root directory */ + if (!path_is_absolute(path)) + return -EINVAL; + + path = prefix_roota(root, path); + } + } + + r = path_make_absolute_cwd(path, &buffer); + if (r < 0) + return r; + + fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + if (flags & CHASE_SAFE) { + if (fstat(fd, &previous_stat) < 0) + return -errno; + } + + todo = buffer; + for (;;) { + _cleanup_free_ char *first = NULL; + _cleanup_close_ int child = -1; + struct stat st; + size_t n, m; + + /* Determine length of first component in the path */ + n = strspn(todo, "/"); /* The slashes */ + m = n + strcspn(todo + n, "/"); /* The entire length of the component */ + + /* Extract the first component. */ + first = strndup(todo, m); + if (!first) + return -ENOMEM; + + todo += m; + + /* Empty? Then we reached the end. */ + if (isempty(first)) + break; + + /* Just a single slash? Then we reached the end. */ + if (path_equal(first, "/")) { + /* Preserve the trailing slash */ + + if (flags & CHASE_TRAIL_SLASH) + if (!strextend(&done, "/", NULL)) + return -ENOMEM; + + break; + } + + /* Just a dot? Then let's eat this up. */ + if (path_equal(first, "/.")) + continue; + + /* Two dots? Then chop off the last bit of what we already found out. */ + if (path_equal(first, "/..")) { + _cleanup_free_ char *parent = NULL; + _cleanup_close_ int fd_parent = -1; + + /* If we already are at the top, then going up will not change anything. This is in-line with + * how the kernel handles this. */ + if (empty_or_root(done)) + continue; + + parent = dirname_malloc(done); + if (!parent) + return -ENOMEM; + + /* Don't allow this to leave the root dir. */ + if (root && + path_startswith(done, root) && + !path_startswith(parent, root)) + continue; + + free_and_replace(done, parent); + + if (flags & CHASE_STEP) + goto chased_one; + + fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd_parent < 0) + return -errno; + + if (flags & CHASE_SAFE) { + if (fstat(fd_parent, &st) < 0) + return -errno; + + if (unsafe_transition(&previous_stat, &st)) + return log_unsafe_transition(fd, fd_parent, path, flags); + + previous_stat = st; + } + + safe_close(fd); + fd = TAKE_FD(fd_parent); + + continue; + } + + /* Otherwise let's see what this is. */ + child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (child < 0) { + + if (errno == ENOENT && + (flags & CHASE_NONEXISTENT) && + (isempty(todo) || path_is_normalized(todo))) { + + /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return + * what we got so far. But don't allow this if the remaining path contains "../ or "./" + * or something else weird. */ + + /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */ + if (streq_ptr(done, "/")) + *done = '\0'; + + if (!strextend(&done, first, todo, NULL)) + return -ENOMEM; + + exists = false; + break; + } + + return -errno; + } + + if (fstat(child, &st) < 0) + return -errno; + if ((flags & CHASE_SAFE) && + unsafe_transition(&previous_stat, &st)) + return log_unsafe_transition(fd, child, path, flags); + + previous_stat = st; + + if ((flags & CHASE_NO_AUTOFS) && + fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0) + return log_autofs_mount_point(child, path, flags); + + if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) { + char *joined; + + _cleanup_free_ char *destination = NULL; + + /* This is a symlink, in this case read the destination. But let's make sure we don't follow + * symlinks without bounds. */ + if (--max_follow <= 0) + return -ELOOP; + + r = readlinkat_malloc(fd, first + n, &destination); + if (r < 0) + return r; + if (isempty(destination)) + return -EINVAL; + + if (path_is_absolute(destination)) { + + /* An absolute destination. Start the loop from the beginning, but use the root + * directory as base. */ + + safe_close(fd); + fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + if (flags & CHASE_SAFE) { + if (fstat(fd, &st) < 0) + return -errno; + + if (unsafe_transition(&previous_stat, &st)) + return log_unsafe_transition(child, fd, path, flags); + + previous_stat = st; + } + + free(done); + + /* Note that we do not revalidate the root, we take it as is. */ + if (isempty(root)) + done = NULL; + else { + done = strdup(root); + if (!done) + return -ENOMEM; + } + + /* Prefix what's left to do with what we just read, and start the loop again, but + * remain in the current directory. */ + joined = strjoin(destination, todo); + } else + joined = strjoin("/", destination, todo); + if (!joined) + return -ENOMEM; + + free(buffer); + todo = buffer = joined; + + if (flags & CHASE_STEP) + goto chased_one; + + continue; + } + + /* If this is not a symlink, then let's just add the name we read to what we already verified. */ + if (!done) + done = TAKE_PTR(first); + else { + /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */ + if (streq(done, "/")) + *done = '\0'; + + if (!strextend(&done, first, NULL)) + return -ENOMEM; + } + + /* And iterate again, but go one directory further down. */ + safe_close(fd); + fd = TAKE_FD(child); + } + + if (!done) { + /* Special case, turn the empty string into "/", to indicate the root directory. */ + done = strdup("/"); + if (!done) + return -ENOMEM; + } + + if (ret) + *ret = TAKE_PTR(done); + + if (flags & CHASE_OPEN) { + /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by + * opening /proc/self/fd/xyz. */ + + assert(fd >= 0); + return TAKE_FD(fd); + } + + if (flags & CHASE_STEP) + return 1; + + return exists; + +chased_one: + if (ret) { + char *c; + + c = strjoin(strempty(done), todo); + if (!c) + return -ENOMEM; + + *ret = c; + } + + return 0; +} + +int chase_symlinks_and_open( + const char *path, + const char *root, + unsigned chase_flags, + int open_flags, + char **ret_path) { + + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + int r; + + if (chase_flags & CHASE_NONEXISTENT) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) { + /* Shortcut this call if none of the special features of this call are requested */ + r = open(path, open_flags); + if (r < 0) + return -errno; + + return r; + } + + path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL); + if (path_fd < 0) + return path_fd; + + r = fd_reopen(path_fd, open_flags); + if (r < 0) + return r; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + return r; +} + +int chase_symlinks_and_opendir( + const char *path, + const char *root, + unsigned chase_flags, + char **ret_path, + DIR **ret_dir) { + + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + DIR *d; + + if (!ret_dir) + return -EINVAL; + if (chase_flags & CHASE_NONEXISTENT) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) { + /* Shortcut this call if none of the special features of this call are requested */ + d = opendir(path); + if (!d) + return -errno; + + *ret_dir = d; + return 0; + } + + path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL); + if (path_fd < 0) + return path_fd; + + xsprintf(procfs_path, "/proc/self/fd/%i", path_fd); + d = opendir(procfs_path); + if (!d) + return -errno; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + *ret_dir = d; + return 0; +} + +int chase_symlinks_and_stat( + const char *path, + const char *root, + unsigned chase_flags, + char **ret_path, + struct stat *ret_stat) { + + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + + assert(path); + assert(ret_stat); + + if (chase_flags & CHASE_NONEXISTENT) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) { + /* Shortcut this call if none of the special features of this call are requested */ + if (stat(path, ret_stat) < 0) + return -errno; + + return 1; + } + + path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL); + if (path_fd < 0) + return path_fd; + + if (fstat(path_fd, ret_stat) < 0) + return -errno; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + if (chase_flags & CHASE_OPEN) + return TAKE_FD(path_fd); + + return 1; +} + +int access_fd(int fd, int mode) { + char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1]; + int r; + + /* Like access() but operates on an already open fd */ + + xsprintf(p, "/proc/self/fd/%i", fd); + r = access(p, mode); + if (r < 0) + return -errno; + + return r; +} + +void unlink_tempfilep(char (*p)[]) { + /* If the file is created with mkstemp(), it will (almost always) + * change the suffix. Treat this as a sign that the file was + * successfully created. We ignore both the rare case where the + * original suffix is used and unlink failures. */ + if (!endswith(*p, ".XXXXXX")) + (void) unlink_noerrno(*p); +} + +int unlinkat_deallocate(int fd, const char *name, int flags) { + _cleanup_close_ int truncate_fd = -1; + struct stat st; + off_t l, bs; + + /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other + * link to it. This is useful to ensure that other processes that might have the file open for reading won't be + * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up + * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and + * returned to the free pool. + * + * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means + * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other + * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes + * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.) + * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file + * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐). + * + * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the + * primary job – to delete the file – is accomplished. */ + + if ((flags & AT_REMOVEDIR) == 0) { + truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); + if (truncate_fd < 0) { + + /* If this failed because the file doesn't exist propagate the error right-away. Also, + * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is + * returned when this is a directory but we are not supposed to delete those, hence propagate + * the error right-away too. */ + if (IN_SET(errno, ENOENT, EISDIR)) + return -errno; + + if (errno != ELOOP) /* don't complain if this is a symlink */ + log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name); + } + } + + if (unlinkat(fd, name, flags) < 0) + return -errno; + + if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */ + return 0; + + if (fstat(truncate_fd, &st) < 0) { + log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name); + return 0; + } + + if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0) + return 0; + + /* If this is a regular file, it actually took up space on disk and there are no other links it's time to + * punch-hole/truncate this to release the disk space. */ + + bs = MAX(st.st_blksize, 512); + l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */ + + if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0) + return 0; /* Successfully punched a hole! 😊 */ + + /* Fall back to truncation */ + if (ftruncate(truncate_fd, 0) < 0) { + log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m"); + return 0; + } + + return 0; +} + +int fsync_directory_of_file(int fd) { + _cleanup_free_ char *path = NULL; + _cleanup_close_ int dfd = -1; + int r; + + r = fd_verify_regular(fd); + if (r < 0) + return r; + + r = fd_get_path(fd, &path); + if (r < 0) { + log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m", + fd, + r == -EOPNOTSUPP ? ", ignoring" : ""); + + if (r == -EOPNOTSUPP) + /* If /proc is not available, we're most likely running in some + * chroot environment, and syncing the directory is not very + * important in that case. Let's just silently do nothing. */ + return 0; + + return r; + } + + if (!path_is_absolute(path)) + return -EINVAL; + + dfd = open_parent(path, O_CLOEXEC, 0); + if (dfd < 0) + return dfd; + + if (fsync(dfd) < 0) + return -errno; + + return 0; +} + +int fsync_path_at(int at_fd, const char *path) { + _cleanup_close_ int opened_fd = -1; + int fd; + + if (isempty(path)) { + if (at_fd == AT_FDCWD) { + opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (opened_fd < 0) + return -errno; + + fd = opened_fd; + } else + fd = at_fd; + } else { + + opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC); + if (opened_fd < 0) + return -errno; + + fd = opened_fd; + } + + if (fsync(fd) < 0) + return -errno; + + return 0; +} + +int open_parent(const char *path, int flags, mode_t mode) { + _cleanup_free_ char *parent = NULL; + int fd; + + if (isempty(path)) + return -EINVAL; + if (path_equal(path, "/")) /* requesting the parent of the root dir is fishy, let's prohibit that */ + return -EINVAL; + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an + * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */ + + if ((flags & O_PATH) == O_PATH) + flags |= O_DIRECTORY; + else if ((flags & O_TMPFILE) != O_TMPFILE) + flags |= O_DIRECTORY|O_RDONLY; + + fd = open(parent, flags, mode); + if (fd < 0) + return -errno; + + return fd; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h new file mode 100644 index 0000000..7ad030b --- /dev/null +++ b/src/basic/fs-util.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <dirent.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/inotify.h> +#include <sys/types.h> +#include <unistd.h> + +#include "time-util.h" +#include "util.h" + +int unlink_noerrno(const char *path); + +int rmdir_parents(const char *path, const char *stop); + +int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); + +int readlinkat_malloc(int fd, const char *p, char **ret); +int readlink_malloc(const char *p, char **r); +int readlink_value(const char *p, char **ret); +int readlink_and_make_absolute(const char *p, char **r); + +int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid); +int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid); + +int fchmod_umask(int fd, mode_t mode); +int fchmod_opath(int fd, mode_t m); + +int fd_warn_permissions(const char *path, int fd); + +#define laccess(path, mode) faccessat(AT_FDCWD, (path), (mode), AT_SYMLINK_NOFOLLOW) + +int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode); +int touch(const char *path); + +int symlink_idempotent(const char *from, const char *to, bool make_relative); + +int symlink_atomic(const char *from, const char *to); +int mknod_atomic(const char *path, mode_t mode, dev_t dev); +int mkfifo_atomic(const char *path, mode_t mode); +int mkfifoat_atomic(int dir_fd, const char *path, mode_t mode); + +int get_files_in_directory(const char *path, char ***list); + +int tmp_dir(const char **ret); +int var_tmp_dir(const char **ret); + +int unlink_or_warn(const char *filename); + +#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX + 1) + +#define FOREACH_INOTIFY_EVENT(e, buffer, sz) \ + for ((e) = &buffer.ev; \ + (uint8_t*) (e) < (uint8_t*) (buffer.raw) + (sz); \ + (e) = (struct inotify_event*) ((uint8_t*) (e) + sizeof(struct inotify_event) + (e)->len)) + +union inotify_event_buffer { + struct inotify_event ev; + uint8_t raw[INOTIFY_EVENT_MAX]; +}; + +int inotify_add_watch_fd(int fd, int what, uint32_t mask); + +enum { + CHASE_PREFIX_ROOT = 1 << 0, /* If set, the specified path will be prefixed by the specified root before beginning the iteration */ + CHASE_NONEXISTENT = 1 << 1, /* If set, it's OK if the path doesn't actually exist. */ + CHASE_NO_AUTOFS = 1 << 2, /* If set, return -EREMOTE if autofs mount point found */ + CHASE_SAFE = 1 << 3, /* If set, return EPERM if we ever traverse from unprivileged to privileged files or directories */ + CHASE_OPEN = 1 << 4, /* If set, return an O_PATH object to the final component */ + CHASE_TRAIL_SLASH = 1 << 5, /* If set, any trailing slash will be preserved */ + CHASE_STEP = 1 << 6, /* If set, just execute a single step of the normalization */ + CHASE_NOFOLLOW = 1 << 7, /* Only valid with CHASE_OPEN: when the path's right-most component refers to symlink return O_PATH fd of the symlink, rather than following it. */ + CHASE_WARN = 1 << 8, /* Emit an appropriate warning when an error is encountered */ +}; + +/* How many iterations to execute before returning -ELOOP */ +#define CHASE_SYMLINKS_MAX 32 + +int chase_symlinks(const char *path_with_prefix, const char *root, unsigned flags, char **ret); + +int chase_symlinks_and_open(const char *path, const char *root, unsigned chase_flags, int open_flags, char **ret_path); +int chase_symlinks_and_opendir(const char *path, const char *root, unsigned chase_flags, char **ret_path, DIR **ret_dir); +int chase_symlinks_and_stat(const char *path, const char *root, unsigned chase_flags, char **ret_path, struct stat *ret_stat); + +/* Useful for usage with _cleanup_(), removes a directory and frees the pointer */ +static inline void rmdir_and_free(char *p) { + PROTECT_ERRNO; + (void) rmdir(p); + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rmdir_and_free); + +static inline void unlink_and_free(char *p) { + (void) unlink_noerrno(p); + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); + +int access_fd(int fd, int mode); + +void unlink_tempfilep(char (*p)[]); +int unlinkat_deallocate(int fd, const char *name, int flags); + +int fsync_directory_of_file(int fd); +int fsync_path_at(int at_fd, const char *path); + +int open_parent(const char *path, int flags, mode_t mode); diff --git a/src/basic/gcrypt-util.c b/src/basic/gcrypt-util.c new file mode 100644 index 0000000..f304a2b --- /dev/null +++ b/src/basic/gcrypt-util.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#if HAVE_GCRYPT +#include <gcrypt.h> + +#include "gcrypt-util.h" +#include "hexdecoct.h" + +void initialize_libgcrypt(bool secmem) { + const char *p; + if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P)) + return; + + p = gcry_check_version("1.4.5"); + assert(p); + + /* Turn off "secmem". Clients which wish to make use of this + * feature should initialize the library manually */ + if (!secmem) + gcry_control(GCRYCTL_DISABLE_SECMEM); + gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); +} + +int string_hashsum(const char *s, size_t len, int md_algorithm, char **out) { + _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL; + size_t hash_size; + void *hash; + char *enc; + + initialize_libgcrypt(false); + + hash_size = gcry_md_get_algo_dlen(md_algorithm); + assert(hash_size > 0); + + gcry_md_open(&md, md_algorithm, 0); + if (!md) + return -EIO; + + gcry_md_write(md, s, len); + + hash = gcry_md_read(md, 0); + if (!hash) + return -EIO; + + enc = hexmem(hash, hash_size); + if (!enc) + return -ENOMEM; + + *out = enc; + return 0; +} +#endif diff --git a/src/basic/gcrypt-util.h b/src/basic/gcrypt-util.h new file mode 100644 index 0000000..87eb606 --- /dev/null +++ b/src/basic/gcrypt-util.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#pragma once + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> + +#if HAVE_GCRYPT +#include <gcrypt.h> + +#include "macro.h" + +void initialize_libgcrypt(bool secmem); +int string_hashsum(const char *s, size_t len, int md_algorithm, char **out); + +DEFINE_TRIVIAL_CLEANUP_FUNC(gcry_md_hd_t, gcry_md_close); +#endif + +static inline int string_hashsum_sha224(const char *s, size_t len, char **out) { +#if HAVE_GCRYPT + return string_hashsum(s, len, GCRY_MD_SHA224, out); +#else + return -EOPNOTSUPP; +#endif +} + +static inline int string_hashsum_sha256(const char *s, size_t len, char **out) { +#if HAVE_GCRYPT + return string_hashsum(s, len, GCRY_MD_SHA256, out); +#else + return -EOPNOTSUPP; +#endif +} diff --git a/src/basic/generate-af-list.sh b/src/basic/generate-af-list.sh new file mode 100755 index 0000000..5bf244c --- /dev/null +++ b/src/basic/generate-af-list.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -eu + +$1 -E -dM -include sys/socket.h -include "$2" -include "$3" - </dev/null | \ + grep -Ev 'AF_UNSPEC|AF_MAX' | \ + awk '/^#define[ \t]+AF_[^ \t]+[ \t]+[AP]F_[^ \t]/ { print $2; }' diff --git a/src/basic/generate-arphrd-list.sh b/src/basic/generate-arphrd-list.sh new file mode 100755 index 0000000..e6e874a --- /dev/null +++ b/src/basic/generate-arphrd-list.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -eu + +$1 -dM -include net/if_arp.h -include "$2" -include "$3" - </dev/null | \ + awk '/^#define[ \t]+ARPHRD_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \ + sed -e 's/ARPHRD_//' diff --git a/src/basic/generate-cap-list.sh b/src/basic/generate-cap-list.sh new file mode 100755 index 0000000..0628d24 --- /dev/null +++ b/src/basic/generate-cap-list.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -eu + +$1 -dM -include linux/capability.h -include "$2" -include "$3" - </dev/null | \ + awk '/^#define[ \t]+CAP_[A-Z_]+[ \t]+/ { print $2; }' | \ + grep -v CAP_LAST_CAP diff --git a/src/basic/generate-errno-list.sh b/src/basic/generate-errno-list.sh new file mode 100755 index 0000000..953d5e3 --- /dev/null +++ b/src/basic/generate-errno-list.sh @@ -0,0 +1,5 @@ +#!/bin/sh +set -eu + +$1 -dM -include errno.h - </dev/null | \ + awk '/^#define[ \t]+E[^ _]+[ \t]+/ { print $2; }' diff --git a/src/basic/glob-util.c b/src/basic/glob-util.c new file mode 100644 index 0000000..9fac676 --- /dev/null +++ b/src/basic/glob-util.c @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <glob.h> +#include <sys/types.h> + +#include "dirent-util.h" +#include "glob-util.h" +#include "macro.h" +#include "path-util.h" +#include "strv.h" + +static void closedir_wrapper(void* v) { + (void) closedir(v); +} + +int safe_glob(const char *path, int flags, glob_t *pglob) { + int k; + + /* We want to set GLOB_ALTDIRFUNC ourselves, don't allow it to be set. */ + assert(!(flags & GLOB_ALTDIRFUNC)); + + if (!pglob->gl_closedir) + pglob->gl_closedir = closedir_wrapper; + if (!pglob->gl_readdir) + pglob->gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot; + if (!pglob->gl_opendir) + pglob->gl_opendir = (void *(*)(const char *)) opendir; + if (!pglob->gl_lstat) + pglob->gl_lstat = lstat; + if (!pglob->gl_stat) + pglob->gl_stat = stat; + + errno = 0; + k = glob(path, flags | GLOB_ALTDIRFUNC, NULL, pglob); + + if (k == GLOB_NOMATCH) + return -ENOENT; + if (k == GLOB_NOSPACE) + return -ENOMEM; + if (k != 0) + return errno > 0 ? -errno : -EIO; + if (strv_isempty(pglob->gl_pathv)) + return -ENOENT; + + return 0; +} + +int glob_exists(const char *path) { + _cleanup_globfree_ glob_t g = {}; + int k; + + assert(path); + + k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g); + if (k == -ENOENT) + return false; + if (k < 0) + return k; + return true; +} + +int glob_extend(char ***strv, const char *path) { + _cleanup_globfree_ glob_t g = {}; + int k; + + k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g); + if (k < 0) + return k; + + return strv_extend_strv(strv, g.gl_pathv, false); +} diff --git a/src/basic/glob-util.h b/src/basic/glob-util.h new file mode 100644 index 0000000..8e226c1 --- /dev/null +++ b/src/basic/glob-util.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <glob.h> +#include <stdbool.h> +#include <string.h> + +#include "macro.h" +#include "string-util.h" + +/* Note: this function modifies pglob to set various functions. */ +int safe_glob(const char *path, int flags, glob_t *pglob); + +int glob_exists(const char *path); +int glob_extend(char ***strv, const char *path); + +#define _cleanup_globfree_ _cleanup_(globfree) + +_pure_ static inline bool string_is_glob(const char *p) { + /* Check if a string contains any glob patterns. */ + return !!strpbrk(p, GLOB_CHARS); +} diff --git a/src/basic/gunicode.c b/src/basic/gunicode.c new file mode 100644 index 0000000..c51b1a7 --- /dev/null +++ b/src/basic/gunicode.c @@ -0,0 +1,110 @@ +/* gunicode.c - Unicode manipulation functions + * + * Copyright (C) 1999, 2000 Tom Tromey + * Copyright © 2000, 2005 Red Hat, Inc. + */ + +#include "gunicode.h" + +#define unichar uint32_t + +/** + * g_utf8_prev_char: + * @p: a pointer to a position within a UTF-8 encoded string + * + * Finds the previous UTF-8 character in the string before @p. + * + * @p does not have to be at the beginning of a UTF-8 character. No check + * is made to see if the character found is actually valid other than + * it starts with an appropriate byte. If @p might be the first + * character of the string, you must use g_utf8_find_prev_char() instead. + * + * Return value: a pointer to the found character. + **/ +char * +utf8_prev_char (const char *p) +{ + for (;;) + { + p--; + if ((*p & 0xc0) != 0x80) + return (char *)p; + } +} + +struct Interval +{ + unichar start, end; +}; + +static int +interval_compare (const void *key, const void *elt) +{ + unichar c = (unichar) (long) (key); + struct Interval *interval = (struct Interval *)elt; + + if (c < interval->start) + return -1; + if (c > interval->end) + return +1; + + return 0; +} + +/* + * NOTE: + * + * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are + * generated from the Unicode Character Database's file + * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py + * in this way: + * + * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt + * + * Last update for Unicode 6.0. + */ + +/** + * g_unichar_iswide: + * @c: a Unicode character + * + * Determines if a character is typically rendered in a double-width + * cell. + * + * Return value: %TRUE if the character is wide + **/ +bool +unichar_iswide (unichar c) +{ + /* See NOTE earlier for how to update this table. */ + static const struct Interval wide[] = { + {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, + {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, + {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA}, + {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE}, + {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, + {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, + {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, + {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, + {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, + {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */ + {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, + }; + + if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0], + interval_compare)) + return true; + + return false; +} + +const char utf8_skip_data[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 +}; diff --git a/src/basic/gunicode.h b/src/basic/gunicode.h new file mode 100644 index 0000000..a16b7b6 --- /dev/null +++ b/src/basic/gunicode.h @@ -0,0 +1,30 @@ +#pragma once + +/* gunicode.h - Unicode manipulation functions + * + * Copyright (C) 1999, 2000 Tom Tromey + * Copyright © 2000, 2005 Red Hat, Inc. + */ + +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> + +char *utf8_prev_char (const char *p); + +extern const char utf8_skip_data[256]; + +/** + * g_utf8_next_char: + * @p: Pointer to the start of a valid UTF-8 character + * + * Skips to the next character in a UTF-8 string. The string must be + * valid; this macro is as fast as possible, and has no error-checking. + * You would use this macro to iterate over a string character by + * character. The macro returns the start of the next UTF-8 character. + * Before using this macro, use g_utf8_validate() to validate strings + * that may contain invalid UTF-8. + */ +#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const unsigned char *)(p)]) + +bool unichar_iswide (uint32_t c); diff --git a/src/basic/hash-funcs.c b/src/basic/hash-funcs.c new file mode 100644 index 0000000..1be43d4 --- /dev/null +++ b/src/basic/hash-funcs.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <string.h> + +#include "hash-funcs.h" +#include "path-util.h" + +void string_hash_func(const char *p, struct siphash *state) { + siphash24_compress(p, strlen(p) + 1, state); +} + +DEFINE_HASH_OPS(string_hash_ops, char, string_hash_func, string_compare_func); + +void path_hash_func(const char *q, struct siphash *state) { + size_t n; + + assert(q); + assert(state); + + /* Calculates a hash for a path in a way this duplicate inner slashes don't make a differences, and also + * whether there's a trailing slash or not. This fits well with the semantics of path_compare(), which does + * similar checks and also doesn't care for trailing slashes. Note that relative and absolute paths (i.e. those + * which begin in a slash or not) will hash differently though. */ + + n = strspn(q, "/"); + if (n > 0) { /* Eat up initial slashes, and add one "/" to the hash for all of them */ + siphash24_compress(q, 1, state); + q += n; + } + + for (;;) { + /* Determine length of next component */ + n = strcspn(q, "/"); + if (n == 0) /* Reached the end? */ + break; + + /* Add this component to the hash and skip over it */ + siphash24_compress(q, n, state); + q += n; + + /* How many slashes follow this component? */ + n = strspn(q, "/"); + if (q[n] == 0) /* Is this a trailing slash? If so, we are at the end, and don't care about the slashes anymore */ + break; + + /* We are not add the end yet. Hash exactly one slash for all of the ones we just encountered. */ + siphash24_compress(q, 1, state); + q += n; + } +} + +int path_compare_func(const char *a, const char *b) { + return path_compare(a, b); +} + +DEFINE_HASH_OPS(path_hash_ops, char, path_hash_func, path_compare_func); + +void trivial_hash_func(const void *p, struct siphash *state) { + siphash24_compress(&p, sizeof(p), state); +} + +int trivial_compare_func(const void *a, const void *b) { + return CMP(a, b); +} + +const struct hash_ops trivial_hash_ops = { + .hash = trivial_hash_func, + .compare = trivial_compare_func, +}; + +void uint64_hash_func(const uint64_t *p, struct siphash *state) { + siphash24_compress(p, sizeof(uint64_t), state); +} + +int uint64_compare_func(const uint64_t *a, const uint64_t *b) { + return CMP(*a, *b); +} + +DEFINE_HASH_OPS(uint64_hash_ops, uint64_t, uint64_hash_func, uint64_compare_func); + +#if SIZEOF_DEV_T != 8 +void devt_hash_func(const dev_t *p, struct siphash *state) { + siphash24_compress(p, sizeof(dev_t), state); +} + +int devt_compare_func(const dev_t *a, const dev_t *b) { + return CMP(*a, *b); +} + +DEFINE_HASH_OPS(devt_hash_ops, dev_t, devt_hash_func, devt_compare_func); +#endif diff --git a/src/basic/hash-funcs.h b/src/basic/hash-funcs.h new file mode 100644 index 0000000..3d2ae4b --- /dev/null +++ b/src/basic/hash-funcs.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "alloc-util.h" +#include "macro.h" +#include "siphash24.h" + +typedef void (*hash_func_t)(const void *p, struct siphash *state); +typedef int (*compare_func_t)(const void *a, const void *b); + +struct hash_ops { + hash_func_t hash; + compare_func_t compare; + free_func_t free_key; + free_func_t free_value; +}; + +#define _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, free_key_func, free_value_func, scope) \ + _unused_ static void (* UNIQ_T(static_hash_wrapper, uq))(const type *, struct siphash *) = hash_func; \ + _unused_ static int (* UNIQ_T(static_compare_wrapper, uq))(const type *, const type *) = compare_func; \ + scope const struct hash_ops name = { \ + .hash = (hash_func_t) hash_func, \ + .compare = (compare_func_t) compare_func, \ + .free_key = free_key_func, \ + .free_value = free_value_func, \ + } + +#define _DEFINE_FREE_FUNC(uq, type, wrapper_name, func) \ + /* Type-safe free function */ \ + static void UNIQ_T(wrapper_name, uq)(void *a) { \ + type *_a = a; \ + func(_a); \ + } + +#define _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(uq, name, type, hash_func, compare_func, free_func, scope) \ + _DEFINE_FREE_FUNC(uq, type, static_free_wrapper, free_func); \ + _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \ + UNIQ_T(static_free_wrapper, uq), NULL, scope) + +#define _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(uq, name, type, hash_func, compare_func, type_value, free_func, scope) \ + _DEFINE_FREE_FUNC(uq, type_value, static_free_wrapper, free_func); \ + _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \ + NULL, UNIQ_T(static_free_wrapper, uq), scope) + +#define _DEFINE_HASH_OPS_FULL(uq, name, type, hash_func, compare_func, free_key_func, type_value, free_value_func, scope) \ + _DEFINE_FREE_FUNC(uq, type, static_free_key_wrapper, free_key_func); \ + _DEFINE_FREE_FUNC(uq, type_value, static_free_value_wrapper, free_value_func); \ + _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \ + UNIQ_T(static_free_key_wrapper, uq), \ + UNIQ_T(static_free_value_wrapper, uq), scope) + +#define DEFINE_HASH_OPS(name, type, hash_func, compare_func) \ + _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL,) + +#define DEFINE_PRIVATE_HASH_OPS(name, type, hash_func, compare_func) \ + _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL, static) + +#define DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \ + _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func,) + +#define DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \ + _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func, static) + +#define DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \ + _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func,) + +#define DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \ + _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func, static) + +#define DEFINE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \ + _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func,) + +#define DEFINE_PRIVATE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \ + _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func, static) + +void string_hash_func(const char *p, struct siphash *state); +#define string_compare_func strcmp +extern const struct hash_ops string_hash_ops; + +void path_hash_func(const char *p, struct siphash *state); +int path_compare_func(const char *a, const char *b) _pure_; +extern const struct hash_ops path_hash_ops; + +/* This will compare the passed pointers directly, and will not dereference them. This is hence not useful for strings + * or suchlike. */ +void trivial_hash_func(const void *p, struct siphash *state); +int trivial_compare_func(const void *a, const void *b) _const_; +extern const struct hash_ops trivial_hash_ops; + +/* 32bit values we can always just embed in the pointer itself, but in order to support 32bit archs we need store 64bit + * values indirectly, since they don't fit in a pointer. */ +void uint64_hash_func(const uint64_t *p, struct siphash *state); +int uint64_compare_func(const uint64_t *a, const uint64_t *b) _pure_; +extern const struct hash_ops uint64_hash_ops; + +/* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on + * 64bit archs. Yuck! */ +#if SIZEOF_DEV_T != 8 +void devt_hash_func(const dev_t *p, struct siphash *state) _pure_; +int devt_compare_func(const dev_t *a, const dev_t *b) _pure_; +extern const struct hash_ops devt_hash_ops; +#else +#define devt_hash_func uint64_hash_func +#define devt_compare_func uint64_compare_func +#define devt_hash_ops uint64_hash_ops +#endif diff --git a/src/basic/hashmap.c b/src/basic/hashmap.c new file mode 100644 index 0000000..66e9e00 --- /dev/null +++ b/src/basic/hashmap.c @@ -0,0 +1,1911 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "macro.h" +#include "mempool.h" +#include "process-util.h" +#include "random-util.h" +#include "set.h" +#include "siphash24.h" +#include "string-util.h" +#include "strv.h" +#include "util.h" + +#if ENABLE_DEBUG_HASHMAP +#include <pthread.h> +#include "list.h" +#endif + +/* + * Implementation of hashmaps. + * Addressing: open + * - uses less RAM compared to closed addressing (chaining), because + * our entries are small (especially in Sets, which tend to contain + * the majority of entries in systemd). + * Collision resolution: Robin Hood + * - tends to equalize displacement of entries from their optimal buckets. + * Probe sequence: linear + * - though theoretically worse than random probing/uniform hashing/double + * hashing, it is good for cache locality. + * + * References: + * Celis, P. 1986. Robin Hood Hashing. + * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada. + * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf + * - The results are derived for random probing. Suggests deletion with + * tombstones and two mean-centered search methods. None of that works + * well for linear probing. + * + * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies. + * ACM Trans. Algorithms 1, 2 (October 2005), 177-213. + * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964 + * http://www.math.uu.se/~svante/papers/sj157.pdf + * - Applies to Robin Hood with linear probing. Contains remarks on + * the unsuitability of mean-centered search with linear probing. + * + * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing. + * ACM Trans. Algorithms 1, 2 (October 2005), 214-242. + * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965 + * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes + * in a successful search), and Janson writes about displacement. C = d + 1. + * + * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion. + * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ + * - Explanation of backward shift deletion with pictures. + * + * Khuong, P. 2013. The Other Robin Hood Hashing. + * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/ + * - Short summary of random vs. linear probing, and tombstones vs. backward shift. + */ + +/* + * XXX Ideas for improvement: + * For unordered hashmaps, randomize iteration order, similarly to Perl: + * http://blog.booking.com/hardening-perls-hash-function.html + */ + +/* INV_KEEP_FREE = 1 / (1 - max_load_factor) + * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */ +#define INV_KEEP_FREE 5U + +/* Fields common to entries of all hashmap/set types */ +struct hashmap_base_entry { + const void *key; +}; + +/* Entry types for specific hashmap/set types + * hashmap_base_entry must be at the beginning of each entry struct. */ + +struct plain_hashmap_entry { + struct hashmap_base_entry b; + void *value; +}; + +struct ordered_hashmap_entry { + struct plain_hashmap_entry p; + unsigned iterate_next, iterate_previous; +}; + +struct set_entry { + struct hashmap_base_entry b; +}; + +/* In several functions it is advantageous to have the hash table extended + * virtually by a couple of additional buckets. We reserve special index values + * for these "swap" buckets. */ +#define _IDX_SWAP_BEGIN (UINT_MAX - 3) +#define IDX_PUT (_IDX_SWAP_BEGIN + 0) +#define IDX_TMP (_IDX_SWAP_BEGIN + 1) +#define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2) + +#define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */ +#define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */ + +assert_cc(IDX_FIRST == _IDX_SWAP_END); +assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST); + +/* Storage space for the "swap" buckets. + * All entry types can fit into a ordered_hashmap_entry. */ +struct swap_entries { + struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN]; +}; + +/* Distance from Initial Bucket */ +typedef uint8_t dib_raw_t; +#define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */ +#define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */ +#define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */ +#define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */ + +#define DIB_FREE UINT_MAX + +#if ENABLE_DEBUG_HASHMAP +struct hashmap_debug_info { + LIST_FIELDS(struct hashmap_debug_info, debug_list); + unsigned max_entries; /* high watermark of n_entries */ + + /* who allocated this hashmap */ + int line; + const char *file; + const char *func; + + /* fields to detect modification while iterating */ + unsigned put_count; /* counts puts into the hashmap */ + unsigned rem_count; /* counts removals from hashmap */ + unsigned last_rem_idx; /* remembers last removal index */ +}; + +/* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */ +static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list); +static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER; + +#define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug; + +#else /* !ENABLE_DEBUG_HASHMAP */ +#define HASHMAP_DEBUG_FIELDS +#endif /* ENABLE_DEBUG_HASHMAP */ + +enum HashmapType { + HASHMAP_TYPE_PLAIN, + HASHMAP_TYPE_ORDERED, + HASHMAP_TYPE_SET, + _HASHMAP_TYPE_MAX +}; + +struct _packed_ indirect_storage { + void *storage; /* where buckets and DIBs are stored */ + uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */ + + unsigned n_entries; /* number of stored entries */ + unsigned n_buckets; /* number of buckets */ + + unsigned idx_lowest_entry; /* Index below which all buckets are free. + Makes "while(hashmap_steal_first())" loops + O(n) instead of O(n^2) for unordered hashmaps. */ + uint8_t _pad[3]; /* padding for the whole HashmapBase */ + /* The bitfields in HashmapBase complete the alignment of the whole thing. */ +}; + +struct direct_storage { + /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit. + * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit, + * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */ + uint8_t storage[sizeof(struct indirect_storage)]; +}; + +#define DIRECT_BUCKETS(entry_t) \ + (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t))) + +/* We should be able to store at least one entry directly. */ +assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1); + +/* We have 3 bits for n_direct_entries. */ +assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3)); + +/* Hashmaps with directly stored entries all use this shared hash key. + * It's no big deal if the key is guessed, because there can be only + * a handful of directly stored entries in a hashmap. When a hashmap + * outgrows direct storage, it gets its own key for indirect storage. */ +static uint8_t shared_hash_key[HASH_KEY_SIZE]; +static bool shared_hash_key_initialized; + +/* Fields that all hashmap/set types must have */ +struct HashmapBase { + const struct hash_ops *hash_ops; /* hash and compare ops to use */ + + union _packed_ { + struct indirect_storage indirect; /* if has_indirect */ + struct direct_storage direct; /* if !has_indirect */ + }; + + enum HashmapType type:2; /* HASHMAP_TYPE_* */ + bool has_indirect:1; /* whether indirect storage is used */ + unsigned n_direct_entries:3; /* Number of entries in direct storage. + * Only valid if !has_indirect. */ + bool from_pool:1; /* whether was allocated from mempool */ + bool dirty:1; /* whether dirtied since last iterated_cache_get() */ + bool cached:1; /* whether this hashmap is being cached */ + HASHMAP_DEBUG_FIELDS /* optional hashmap_debug_info */ +}; + +/* Specific hash types + * HashmapBase must be at the beginning of each hashmap struct. */ + +struct Hashmap { + struct HashmapBase b; +}; + +struct OrderedHashmap { + struct HashmapBase b; + unsigned iterate_list_head, iterate_list_tail; +}; + +struct Set { + struct HashmapBase b; +}; + +typedef struct CacheMem { + const void **ptr; + size_t n_populated, n_allocated; + bool active:1; +} CacheMem; + +struct IteratedCache { + HashmapBase *hashmap; + CacheMem keys, values; +}; + +DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8); +DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8); +/* No need for a separate Set pool */ +assert_cc(sizeof(Hashmap) == sizeof(Set)); + +struct hashmap_type_info { + size_t head_size; + size_t entry_size; + struct mempool *mempool; + unsigned n_direct_buckets; +}; + +static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = { + [HASHMAP_TYPE_PLAIN] = { + .head_size = sizeof(Hashmap), + .entry_size = sizeof(struct plain_hashmap_entry), + .mempool = &hashmap_pool, + .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry), + }, + [HASHMAP_TYPE_ORDERED] = { + .head_size = sizeof(OrderedHashmap), + .entry_size = sizeof(struct ordered_hashmap_entry), + .mempool = &ordered_hashmap_pool, + .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry), + }, + [HASHMAP_TYPE_SET] = { + .head_size = sizeof(Set), + .entry_size = sizeof(struct set_entry), + .mempool = &hashmap_pool, + .n_direct_buckets = DIRECT_BUCKETS(struct set_entry), + }, +}; + +#if VALGRIND +_destructor_ static void cleanup_pools(void) { + _cleanup_free_ char *t = NULL; + int r; + + /* Be nice to valgrind */ + + /* The pool is only allocated by the main thread, but the memory can + * be passed to other threads. Let's clean up if we are the main thread + * and no other threads are live. */ + if (!is_main_thread()) + return; + + r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t); + if (r < 0 || !streq(t, "1")) + return; + + mempool_drop(&hashmap_pool); + mempool_drop(&ordered_hashmap_pool); +} +#endif + +static unsigned n_buckets(HashmapBase *h) { + return h->has_indirect ? h->indirect.n_buckets + : hashmap_type_info[h->type].n_direct_buckets; +} + +static unsigned n_entries(HashmapBase *h) { + return h->has_indirect ? h->indirect.n_entries + : h->n_direct_entries; +} + +static void n_entries_inc(HashmapBase *h) { + if (h->has_indirect) + h->indirect.n_entries++; + else + h->n_direct_entries++; +} + +static void n_entries_dec(HashmapBase *h) { + if (h->has_indirect) + h->indirect.n_entries--; + else + h->n_direct_entries--; +} + +static void *storage_ptr(HashmapBase *h) { + return h->has_indirect ? h->indirect.storage + : h->direct.storage; +} + +static uint8_t *hash_key(HashmapBase *h) { + return h->has_indirect ? h->indirect.hash_key + : shared_hash_key; +} + +static unsigned base_bucket_hash(HashmapBase *h, const void *p) { + struct siphash state; + uint64_t hash; + + siphash24_init(&state, hash_key(h)); + + h->hash_ops->hash(p, &state); + + hash = siphash24_finalize(&state); + + return (unsigned) (hash % n_buckets(h)); +} +#define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p) + +static void base_set_dirty(HashmapBase *h) { + h->dirty = true; +} +#define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h)) + +static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) { + static uint8_t current[HASH_KEY_SIZE]; + static bool current_initialized = false; + + /* Returns a hash function key to use. In order to keep things + * fast we will not generate a new key each time we allocate a + * new hash table. Instead, we'll just reuse the most recently + * generated one, except if we never generated one or when we + * are rehashing an entire hash table because we reached a + * fill level */ + + if (!current_initialized || !reuse_is_ok) { + random_bytes(current, sizeof(current)); + current_initialized = true; + } + + memcpy(hash_key, current, sizeof(current)); +} + +static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) { + return (struct hashmap_base_entry*) + ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size); +} + +static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) { + return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx); +} + +static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) { + return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx); +} + +static struct set_entry *set_bucket_at(Set *h, unsigned idx) { + return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx); +} + +static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) { + return &swap->e[idx - _IDX_SWAP_BEGIN]; +} + +/* Returns a pointer to the bucket at index idx. + * Understands real indexes and swap indexes, hence "_virtual". */ +static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap, + unsigned idx) { + if (idx < _IDX_SWAP_BEGIN) + return bucket_at(h, idx); + + if (idx < _IDX_SWAP_END) + return &bucket_at_swap(swap, idx)->p.b; + + assert_not_reached("Invalid index"); +} + +static dib_raw_t *dib_raw_ptr(HashmapBase *h) { + return (dib_raw_t*) + ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h)); +} + +static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) { + return idx >= from ? idx - from + : n_buckets(h) + idx - from; +} + +static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) { + unsigned initial_bucket; + + if (raw_dib == DIB_RAW_FREE) + return DIB_FREE; + + if (_likely_(raw_dib < DIB_RAW_OVERFLOW)) + return raw_dib; + + /* + * Having an overflow DIB value is very unlikely. The hash function + * would have to be bad. For example, in a table of size 2^24 filled + * to load factor 0.9 the maximum observed DIB is only about 60. + * In theory (assuming I used Maxima correctly), for an infinite size + * hash table with load factor 0.8 the probability of a given entry + * having DIB > 40 is 1.9e-8. + * This returns the correct DIB value by recomputing the hash value in + * the unlikely case. XXX Hitting this case could be a hint to rehash. + */ + initial_bucket = bucket_hash(h, bucket_at(h, idx)->key); + return bucket_distance(h, idx, initial_bucket); +} + +static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) { + dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE; +} + +static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) { + dib_raw_t *dibs; + + dibs = dib_raw_ptr(h); + + for ( ; idx < n_buckets(h); idx++) + if (dibs[idx] != DIB_RAW_FREE) + return idx; + + return IDX_NIL; +} + +static void bucket_mark_free(HashmapBase *h, unsigned idx) { + memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size); + bucket_set_dib(h, idx, DIB_FREE); +} + +static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap, + unsigned from, unsigned to) { + struct hashmap_base_entry *e_from, *e_to; + + assert(from != to); + + e_from = bucket_at_virtual(h, swap, from); + e_to = bucket_at_virtual(h, swap, to); + + memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size); + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + struct ordered_hashmap_entry *le, *le_to; + + le_to = (struct ordered_hashmap_entry*) e_to; + + if (le_to->iterate_next != IDX_NIL) { + le = (struct ordered_hashmap_entry*) + bucket_at_virtual(h, swap, le_to->iterate_next); + le->iterate_previous = to; + } + + if (le_to->iterate_previous != IDX_NIL) { + le = (struct ordered_hashmap_entry*) + bucket_at_virtual(h, swap, le_to->iterate_previous); + le->iterate_next = to; + } + + if (lh->iterate_list_head == from) + lh->iterate_list_head = to; + if (lh->iterate_list_tail == from) + lh->iterate_list_tail = to; + } +} + +static unsigned next_idx(HashmapBase *h, unsigned idx) { + return (idx + 1U) % n_buckets(h); +} + +static unsigned prev_idx(HashmapBase *h, unsigned idx) { + return (n_buckets(h) + idx - 1U) % n_buckets(h); +} + +static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) { + switch (h->type) { + + case HASHMAP_TYPE_PLAIN: + case HASHMAP_TYPE_ORDERED: + return ((struct plain_hashmap_entry*)e)->value; + + case HASHMAP_TYPE_SET: + return (void*) e->key; + + default: + assert_not_reached("Unknown hashmap type"); + } +} + +static void base_remove_entry(HashmapBase *h, unsigned idx) { + unsigned left, right, prev, dib; + dib_raw_t raw_dib, *dibs; + + dibs = dib_raw_ptr(h); + assert(dibs[idx] != DIB_RAW_FREE); + +#if ENABLE_DEBUG_HASHMAP + h->debug.rem_count++; + h->debug.last_rem_idx = idx; +#endif + + left = idx; + /* Find the stop bucket ("right"). It is either free or has DIB == 0. */ + for (right = next_idx(h, left); ; right = next_idx(h, right)) { + raw_dib = dibs[right]; + if (IN_SET(raw_dib, 0, DIB_RAW_FREE)) + break; + + /* The buckets are not supposed to be all occupied and with DIB > 0. + * That would mean we could make everyone better off by shifting them + * backward. This scenario is impossible. */ + assert(left != right); + } + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx); + + if (le->iterate_next != IDX_NIL) + ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous; + else + lh->iterate_list_tail = le->iterate_previous; + + if (le->iterate_previous != IDX_NIL) + ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next; + else + lh->iterate_list_head = le->iterate_next; + } + + /* Now shift all buckets in the interval (left, right) one step backwards */ + for (prev = left, left = next_idx(h, left); left != right; + prev = left, left = next_idx(h, left)) { + dib = bucket_calculate_dib(h, left, dibs[left]); + assert(dib != 0); + bucket_move_entry(h, NULL, left, prev); + bucket_set_dib(h, prev, dib - 1); + } + + bucket_mark_free(h, prev); + n_entries_dec(h); + base_set_dirty(h); +} +#define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx) + +static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) { + struct ordered_hashmap_entry *e; + unsigned idx; + + assert(h); + assert(i); + + if (i->idx == IDX_NIL) + goto at_end; + + if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL) + goto at_end; + + if (i->idx == IDX_FIRST) { + idx = h->iterate_list_head; + e = ordered_bucket_at(h, idx); + } else { + idx = i->idx; + e = ordered_bucket_at(h, idx); + /* + * We allow removing the current entry while iterating, but removal may cause + * a backward shift. The next entry may thus move one bucket to the left. + * To detect when it happens, we remember the key pointer of the entry we were + * going to iterate next. If it does not match, there was a backward shift. + */ + if (e->p.b.key != i->next_key) { + idx = prev_idx(HASHMAP_BASE(h), idx); + e = ordered_bucket_at(h, idx); + } + assert(e->p.b.key == i->next_key); + } + +#if ENABLE_DEBUG_HASHMAP + i->prev_idx = idx; +#endif + + if (e->iterate_next != IDX_NIL) { + struct ordered_hashmap_entry *n; + i->idx = e->iterate_next; + n = ordered_bucket_at(h, i->idx); + i->next_key = n->p.b.key; + } else + i->idx = IDX_NIL; + + return idx; + +at_end: + i->idx = IDX_NIL; + return IDX_NIL; +} + +static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) { + unsigned idx; + + assert(h); + assert(i); + + if (i->idx == IDX_NIL) + goto at_end; + + if (i->idx == IDX_FIRST) { + /* fast forward to the first occupied bucket */ + if (h->has_indirect) { + i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry); + h->indirect.idx_lowest_entry = i->idx; + } else + i->idx = skip_free_buckets(h, 0); + + if (i->idx == IDX_NIL) + goto at_end; + } else { + struct hashmap_base_entry *e; + + assert(i->idx > 0); + + e = bucket_at(h, i->idx); + /* + * We allow removing the current entry while iterating, but removal may cause + * a backward shift. The next entry may thus move one bucket to the left. + * To detect when it happens, we remember the key pointer of the entry we were + * going to iterate next. If it does not match, there was a backward shift. + */ + if (e->key != i->next_key) + e = bucket_at(h, --i->idx); + + assert(e->key == i->next_key); + } + + idx = i->idx; +#if ENABLE_DEBUG_HASHMAP + i->prev_idx = idx; +#endif + + i->idx = skip_free_buckets(h, i->idx + 1); + if (i->idx != IDX_NIL) + i->next_key = bucket_at(h, i->idx)->key; + else + i->idx = IDX_NIL; + + return idx; + +at_end: + i->idx = IDX_NIL; + return IDX_NIL; +} + +static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) { + if (!h) { + i->idx = IDX_NIL; + return IDX_NIL; + } + +#if ENABLE_DEBUG_HASHMAP + if (i->idx == IDX_FIRST) { + i->put_count = h->debug.put_count; + i->rem_count = h->debug.rem_count; + } else { + /* While iterating, must not add any new entries */ + assert(i->put_count == h->debug.put_count); + /* ... or remove entries other than the current one */ + assert(i->rem_count == h->debug.rem_count || + (i->rem_count == h->debug.rem_count - 1 && + i->prev_idx == h->debug.last_rem_idx)); + /* Reset our removals counter */ + i->rem_count = h->debug.rem_count; + } +#endif + + return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i) + : hashmap_iterate_in_internal_order(h, i); +} + +bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) { + struct hashmap_base_entry *e; + void *data; + unsigned idx; + + idx = hashmap_iterate_entry(h, i); + if (idx == IDX_NIL) { + if (value) + *value = NULL; + if (key) + *key = NULL; + + return false; + } + + e = bucket_at(h, idx); + data = entry_value(h, e); + if (value) + *value = data; + if (key) + *key = e->key; + + return true; +} + +bool set_iterate(Set *s, Iterator *i, void **value) { + return internal_hashmap_iterate(HASHMAP_BASE(s), i, value, NULL); +} + +#define HASHMAP_FOREACH_IDX(idx, h, i) \ + for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \ + (idx != IDX_NIL); \ + (idx) = hashmap_iterate_entry((h), &(i))) + +IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h) { + IteratedCache *cache; + + assert(h); + assert(!h->cached); + + if (h->cached) + return NULL; + + cache = new0(IteratedCache, 1); + if (!cache) + return NULL; + + cache->hashmap = h; + h->cached = true; + + return cache; +} + +static void reset_direct_storage(HashmapBase *h) { + const struct hashmap_type_info *hi = &hashmap_type_info[h->type]; + void *p; + + assert(!h->has_indirect); + + p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets); + memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets); +} + +static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) { + HashmapBase *h; + const struct hashmap_type_info *hi = &hashmap_type_info[type]; + bool up; + + up = mempool_enabled(); + + h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size); + if (!h) + return NULL; + + h->type = type; + h->from_pool = up; + h->hash_ops = hash_ops ?: &trivial_hash_ops; + + if (type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*)h; + lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL; + } + + reset_direct_storage(h); + + if (!shared_hash_key_initialized) { + random_bytes(shared_hash_key, sizeof(shared_hash_key)); + shared_hash_key_initialized= true; + } + +#if ENABLE_DEBUG_HASHMAP + h->debug.func = func; + h->debug.file = file; + h->debug.line = line; + assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0); + LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug); + assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0); +#endif + + return h; +} + +Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS); +} + +OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS); +} + +Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS); +} + +static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops, + enum HashmapType type HASHMAP_DEBUG_PARAMS) { + HashmapBase *q; + + assert(h); + + if (*h) + return 0; + + q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS); + if (!q) + return -ENOMEM; + + *h = q; + return 0; +} + +int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS); +} + +int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS); +} + +int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS); +} + +static void hashmap_free_no_clear(HashmapBase *h) { + assert(!h->has_indirect); + assert(h->n_direct_entries == 0); + +#if ENABLE_DEBUG_HASHMAP + assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0); + LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug); + assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0); +#endif + + if (h->from_pool) { + /* Ensure that the object didn't get migrated between threads. */ + assert_se(is_main_thread()); + mempool_free_tile(hashmap_type_info[h->type].mempool, h); + } else + free(h); +} + +HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) { + if (h) { + internal_hashmap_clear(h, default_free_key, default_free_value); + hashmap_free_no_clear(h); + } + + return NULL; +} + +void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) { + free_func_t free_key, free_value; + if (!h) + return; + + free_key = h->hash_ops->free_key ?: default_free_key; + free_value = h->hash_ops->free_value ?: default_free_value; + + if (free_key || free_value) { + + /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the + * hash table, and only then call the destructor functions. If these destructors then try to unregister + * themselves from our hash table a second time, the entry is already gone. */ + + while (internal_hashmap_size(h) > 0) { + void *k = NULL; + void *v; + + v = internal_hashmap_first_key_and_value(h, true, &k); + + if (free_key) + free_key(k); + + if (free_value) + free_value(v); + } + } + + if (h->has_indirect) { + free(h->indirect.storage); + h->has_indirect = false; + } + + h->n_direct_entries = 0; + reset_direct_storage(h); + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL; + } + + base_set_dirty(h); +} + +static int resize_buckets(HashmapBase *h, unsigned entries_add); + +/* + * Finds an empty bucket to put an entry into, starting the scan at 'idx'. + * Performs Robin Hood swaps as it goes. The entry to put must be placed + * by the caller into swap slot IDX_PUT. + * If used for in-place resizing, may leave a displaced entry in swap slot + * IDX_PUT. Caller must rehash it next. + * Returns: true if it left a displaced entry to rehash next in IDX_PUT, + * false otherwise. + */ +static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx, + struct swap_entries *swap) { + dib_raw_t raw_dib, *dibs; + unsigned dib, distance; + +#if ENABLE_DEBUG_HASHMAP + h->debug.put_count++; +#endif + + dibs = dib_raw_ptr(h); + + for (distance = 0; ; distance++) { + raw_dib = dibs[idx]; + if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) { + if (raw_dib == DIB_RAW_REHASH) + bucket_move_entry(h, swap, idx, IDX_TMP); + + if (h->has_indirect && h->indirect.idx_lowest_entry > idx) + h->indirect.idx_lowest_entry = idx; + + bucket_set_dib(h, idx, distance); + bucket_move_entry(h, swap, IDX_PUT, idx); + if (raw_dib == DIB_RAW_REHASH) { + bucket_move_entry(h, swap, IDX_TMP, IDX_PUT); + return true; + } + + return false; + } + + dib = bucket_calculate_dib(h, idx, raw_dib); + + if (dib < distance) { + /* Found a wealthier entry. Go Robin Hood! */ + bucket_set_dib(h, idx, distance); + + /* swap the entries */ + bucket_move_entry(h, swap, idx, IDX_TMP); + bucket_move_entry(h, swap, IDX_PUT, idx); + bucket_move_entry(h, swap, IDX_TMP, IDX_PUT); + + distance = dib; + } + + idx = next_idx(h, idx); + } +} + +/* + * Puts an entry into a hashmap, boldly - no check whether key already exists. + * The caller must place the entry (only its key and value, not link indexes) + * in swap slot IDX_PUT. + * Caller must ensure: the key does not exist yet in the hashmap. + * that resize is not needed if !may_resize. + * Returns: 1 if entry was put successfully. + * -ENOMEM if may_resize==true and resize failed with -ENOMEM. + * Cannot return -ENOMEM if !may_resize. + */ +static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx, + struct swap_entries *swap, bool may_resize) { + struct ordered_hashmap_entry *new_entry; + int r; + + assert(idx < n_buckets(h)); + + new_entry = bucket_at_swap(swap, IDX_PUT); + + if (may_resize) { + r = resize_buckets(h, 1); + if (r < 0) + return r; + if (r > 0) + idx = bucket_hash(h, new_entry->p.b.key); + } + assert(n_entries(h) < n_buckets(h)); + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + + new_entry->iterate_next = IDX_NIL; + new_entry->iterate_previous = lh->iterate_list_tail; + + if (lh->iterate_list_tail != IDX_NIL) { + struct ordered_hashmap_entry *old_tail; + + old_tail = ordered_bucket_at(lh, lh->iterate_list_tail); + assert(old_tail->iterate_next == IDX_NIL); + old_tail->iterate_next = IDX_PUT; + } + + lh->iterate_list_tail = IDX_PUT; + if (lh->iterate_list_head == IDX_NIL) + lh->iterate_list_head = IDX_PUT; + } + + assert_se(hashmap_put_robin_hood(h, idx, swap) == false); + + n_entries_inc(h); +#if ENABLE_DEBUG_HASHMAP + h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h)); +#endif + + base_set_dirty(h); + + return 1; +} +#define hashmap_put_boldly(h, idx, swap, may_resize) \ + hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize) + +/* + * Returns 0 if resize is not needed. + * 1 if successfully resized. + * -ENOMEM on allocation failure. + */ +static int resize_buckets(HashmapBase *h, unsigned entries_add) { + struct swap_entries swap; + void *new_storage; + dib_raw_t *old_dibs, *new_dibs; + const struct hashmap_type_info *hi; + unsigned idx, optimal_idx; + unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries; + uint8_t new_shift; + bool rehash_next; + + assert(h); + + hi = &hashmap_type_info[h->type]; + new_n_entries = n_entries(h) + entries_add; + + /* overflow? */ + if (_unlikely_(new_n_entries < entries_add)) + return -ENOMEM; + + /* For direct storage we allow 100% load, because it's tiny. */ + if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets) + return 0; + + /* + * Load factor = n/m = 1 - (1/INV_KEEP_FREE). + * From it follows: m = n + n/(INV_KEEP_FREE - 1) + */ + new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1); + /* overflow? */ + if (_unlikely_(new_n_buckets < new_n_entries)) + return -ENOMEM; + + if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t)))) + return -ENOMEM; + + old_n_buckets = n_buckets(h); + + if (_likely_(new_n_buckets <= old_n_buckets)) + return 0; + + new_shift = log2u_round_up(MAX( + new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)), + 2 * sizeof(struct direct_storage))); + + /* Realloc storage (buckets and DIB array). */ + new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL, + 1U << new_shift); + if (!new_storage) + return -ENOMEM; + + /* Must upgrade direct to indirect storage. */ + if (!h->has_indirect) { + memcpy(new_storage, h->direct.storage, + old_n_buckets * (hi->entry_size + sizeof(dib_raw_t))); + h->indirect.n_entries = h->n_direct_entries; + h->indirect.idx_lowest_entry = 0; + h->n_direct_entries = 0; + } + + /* Get a new hash key. If we've just upgraded to indirect storage, + * allow reusing a previously generated key. It's still a different key + * from the shared one that we used for direct storage. */ + get_hash_key(h->indirect.hash_key, !h->has_indirect); + + h->has_indirect = true; + h->indirect.storage = new_storage; + h->indirect.n_buckets = (1U << new_shift) / + (hi->entry_size + sizeof(dib_raw_t)); + + old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets); + new_dibs = dib_raw_ptr(h); + + /* + * Move the DIB array to the new place, replacing valid DIB values with + * DIB_RAW_REHASH to indicate all of the used buckets need rehashing. + * Note: Overlap is not possible, because we have at least doubled the + * number of buckets and dib_raw_t is smaller than any entry type. + */ + for (idx = 0; idx < old_n_buckets; idx++) { + assert(old_dibs[idx] != DIB_RAW_REHASH); + new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE + : DIB_RAW_REHASH; + } + + /* Zero the area of newly added entries (including the old DIB area) */ + memzero(bucket_at(h, old_n_buckets), + (n_buckets(h) - old_n_buckets) * hi->entry_size); + + /* The upper half of the new DIB array needs initialization */ + memset(&new_dibs[old_n_buckets], DIB_RAW_INIT, + (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t)); + + /* Rehash entries that need it */ + n_rehashed = 0; + for (idx = 0; idx < old_n_buckets; idx++) { + if (new_dibs[idx] != DIB_RAW_REHASH) + continue; + + optimal_idx = bucket_hash(h, bucket_at(h, idx)->key); + + /* + * Not much to do if by luck the entry hashes to its current + * location. Just set its DIB. + */ + if (optimal_idx == idx) { + new_dibs[idx] = 0; + n_rehashed++; + continue; + } + + new_dibs[idx] = DIB_RAW_FREE; + bucket_move_entry(h, &swap, idx, IDX_PUT); + /* bucket_move_entry does not clear the source */ + memzero(bucket_at(h, idx), hi->entry_size); + + do { + /* + * Find the new bucket for the current entry. This may make + * another entry homeless and load it into IDX_PUT. + */ + rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap); + n_rehashed++; + + /* Did the current entry displace another one? */ + if (rehash_next) + optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key); + } while (rehash_next); + } + + assert(n_rehashed == n_entries(h)); + + return 1; +} + +/* + * Finds an entry with a matching key + * Returns: index of the found entry, or IDX_NIL if not found. + */ +static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) { + struct hashmap_base_entry *e; + unsigned dib, distance; + dib_raw_t *dibs = dib_raw_ptr(h); + + assert(idx < n_buckets(h)); + + for (distance = 0; ; distance++) { + if (dibs[idx] == DIB_RAW_FREE) + return IDX_NIL; + + dib = bucket_calculate_dib(h, idx, dibs[idx]); + + if (dib < distance) + return IDX_NIL; + if (dib == distance) { + e = bucket_at(h, idx); + if (h->hash_ops->compare(e->key, key) == 0) + return idx; + } + + idx = next_idx(h, idx); + } +} +#define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key) + +int hashmap_put(Hashmap *h, const void *key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned hash, idx; + + assert(h); + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx != IDX_NIL) { + e = plain_bucket_at(h, idx); + if (e->value == value) + return 0; + return -EEXIST; + } + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = key; + e->value = value; + return hashmap_put_boldly(h, hash, &swap, true); +} + +int set_put(Set *s, const void *key) { + struct swap_entries swap; + struct hashmap_base_entry *e; + unsigned hash, idx; + + assert(s); + + hash = bucket_hash(s, key); + idx = bucket_scan(s, hash, key); + if (idx != IDX_NIL) + return 0; + + e = &bucket_at_swap(&swap, IDX_PUT)->p.b; + e->key = key; + return hashmap_put_boldly(s, hash, &swap, true); +} + +int hashmap_replace(Hashmap *h, const void *key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned hash, idx; + + assert(h); + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx != IDX_NIL) { + e = plain_bucket_at(h, idx); +#if ENABLE_DEBUG_HASHMAP + /* Although the key is equal, the key pointer may have changed, + * and this would break our assumption for iterating. So count + * this operation as incompatible with iteration. */ + if (e->b.key != key) { + h->b.debug.put_count++; + h->b.debug.rem_count++; + h->b.debug.last_rem_idx = idx; + } +#endif + e->b.key = key; + e->value = value; + hashmap_set_dirty(h); + + return 0; + } + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = key; + e->value = value; + return hashmap_put_boldly(h, hash, &swap, true); +} + +int hashmap_update(Hashmap *h, const void *key, void *value) { + struct plain_hashmap_entry *e; + unsigned hash, idx; + + assert(h); + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return -ENOENT; + + e = plain_bucket_at(h, idx); + e->value = value; + hashmap_set_dirty(h); + + return 0; +} + +void *internal_hashmap_get(HashmapBase *h, const void *key) { + struct hashmap_base_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = bucket_at(h, idx); + return entry_value(h, e); +} + +void *hashmap_get2(Hashmap *h, const void *key, void **key2) { + struct plain_hashmap_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = plain_bucket_at(h, idx); + if (key2) + *key2 = (void*) e->b.key; + + return e->value; +} + +bool internal_hashmap_contains(HashmapBase *h, const void *key) { + unsigned hash; + + if (!h) + return false; + + hash = bucket_hash(h, key); + return bucket_scan(h, hash, key) != IDX_NIL; +} + +void *internal_hashmap_remove(HashmapBase *h, const void *key) { + struct hashmap_base_entry *e; + unsigned hash, idx; + void *data; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = bucket_at(h, idx); + data = entry_value(h, e); + remove_entry(h, idx); + + return data; +} + +void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) { + struct plain_hashmap_entry *e; + unsigned hash, idx; + void *data; + + if (!h) { + if (rkey) + *rkey = NULL; + return NULL; + } + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) { + if (rkey) + *rkey = NULL; + return NULL; + } + + e = plain_bucket_at(h, idx); + data = e->value; + if (rkey) + *rkey = (void*) e->b.key; + + remove_entry(h, idx); + + return data; +} + +int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned old_hash, new_hash, idx; + + if (!h) + return -ENOENT; + + old_hash = bucket_hash(h, old_key); + idx = bucket_scan(h, old_hash, old_key); + if (idx == IDX_NIL) + return -ENOENT; + + new_hash = bucket_hash(h, new_key); + if (bucket_scan(h, new_hash, new_key) != IDX_NIL) + return -EEXIST; + + remove_entry(h, idx); + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = new_key; + e->value = value; + assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1); + + return 0; +} + +int set_remove_and_put(Set *s, const void *old_key, const void *new_key) { + struct swap_entries swap; + struct hashmap_base_entry *e; + unsigned old_hash, new_hash, idx; + + if (!s) + return -ENOENT; + + old_hash = bucket_hash(s, old_key); + idx = bucket_scan(s, old_hash, old_key); + if (idx == IDX_NIL) + return -ENOENT; + + new_hash = bucket_hash(s, new_key); + if (bucket_scan(s, new_hash, new_key) != IDX_NIL) + return -EEXIST; + + remove_entry(s, idx); + + e = &bucket_at_swap(&swap, IDX_PUT)->p.b; + e->key = new_key; + assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1); + + return 0; +} + +int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned old_hash, new_hash, idx_old, idx_new; + + if (!h) + return -ENOENT; + + old_hash = bucket_hash(h, old_key); + idx_old = bucket_scan(h, old_hash, old_key); + if (idx_old == IDX_NIL) + return -ENOENT; + + old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key; + + new_hash = bucket_hash(h, new_key); + idx_new = bucket_scan(h, new_hash, new_key); + if (idx_new != IDX_NIL) + if (idx_old != idx_new) { + remove_entry(h, idx_new); + /* Compensate for a possible backward shift. */ + if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key) + idx_old = prev_idx(HASHMAP_BASE(h), idx_old); + assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key); + } + + remove_entry(h, idx_old); + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = new_key; + e->value = value; + assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1); + + return 0; +} + +void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value) { + struct hashmap_base_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = bucket_at(h, idx); + if (entry_value(h, e) != value) + return NULL; + + remove_entry(h, idx); + + return value; +} + +static unsigned find_first_entry(HashmapBase *h) { + Iterator i = ITERATOR_FIRST; + + if (!h || !n_entries(h)) + return IDX_NIL; + + return hashmap_iterate_entry(h, &i); +} + +void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) { + struct hashmap_base_entry *e; + void *key, *data; + unsigned idx; + + idx = find_first_entry(h); + if (idx == IDX_NIL) { + if (ret_key) + *ret_key = NULL; + return NULL; + } + + e = bucket_at(h, idx); + key = (void*) e->key; + data = entry_value(h, e); + + if (remove) + remove_entry(h, idx); + + if (ret_key) + *ret_key = key; + + return data; +} + +unsigned internal_hashmap_size(HashmapBase *h) { + + if (!h) + return 0; + + return n_entries(h); +} + +unsigned internal_hashmap_buckets(HashmapBase *h) { + + if (!h) + return 0; + + return n_buckets(h); +} + +int internal_hashmap_merge(Hashmap *h, Hashmap *other) { + Iterator i; + unsigned idx; + + assert(h); + + HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) { + struct plain_hashmap_entry *pe = plain_bucket_at(other, idx); + int r; + + r = hashmap_put(h, pe->b.key, pe->value); + if (r < 0 && r != -EEXIST) + return r; + } + + return 0; +} + +int set_merge(Set *s, Set *other) { + Iterator i; + unsigned idx; + + assert(s); + + HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) { + struct set_entry *se = set_bucket_at(other, idx); + int r; + + r = set_put(s, se->b.key); + if (r < 0) + return r; + } + + return 0; +} + +int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) { + int r; + + assert(h); + + r = resize_buckets(h, entries_add); + if (r < 0) + return r; + + return 0; +} + +/* + * The same as hashmap_merge(), but every new item from other is moved to h. + * Keys already in h are skipped and stay in other. + * Returns: 0 on success. + * -ENOMEM on alloc failure, in which case no move has been done. + */ +int internal_hashmap_move(HashmapBase *h, HashmapBase *other) { + struct swap_entries swap; + struct hashmap_base_entry *e, *n; + Iterator i; + unsigned idx; + int r; + + assert(h); + + if (!other) + return 0; + + assert(other->type == h->type); + + /* + * This reserves buckets for the worst case, where none of other's + * entries are yet present in h. This is preferable to risking + * an allocation failure in the middle of the moving and having to + * rollback or return a partial result. + */ + r = resize_buckets(h, n_entries(other)); + if (r < 0) + return r; + + HASHMAP_FOREACH_IDX(idx, other, i) { + unsigned h_hash; + + e = bucket_at(other, idx); + h_hash = bucket_hash(h, e->key); + if (bucket_scan(h, h_hash, e->key) != IDX_NIL) + continue; + + n = &bucket_at_swap(&swap, IDX_PUT)->p.b; + n->key = e->key; + if (h->type != HASHMAP_TYPE_SET) + ((struct plain_hashmap_entry*) n)->value = + ((struct plain_hashmap_entry*) e)->value; + assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1); + + remove_entry(other, idx); + } + + return 0; +} + +int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) { + struct swap_entries swap; + unsigned h_hash, other_hash, idx; + struct hashmap_base_entry *e, *n; + int r; + + assert(h); + + h_hash = bucket_hash(h, key); + if (bucket_scan(h, h_hash, key) != IDX_NIL) + return -EEXIST; + + if (!other) + return -ENOENT; + + assert(other->type == h->type); + + other_hash = bucket_hash(other, key); + idx = bucket_scan(other, other_hash, key); + if (idx == IDX_NIL) + return -ENOENT; + + e = bucket_at(other, idx); + + n = &bucket_at_swap(&swap, IDX_PUT)->p.b; + n->key = e->key; + if (h->type != HASHMAP_TYPE_SET) + ((struct plain_hashmap_entry*) n)->value = + ((struct plain_hashmap_entry*) e)->value; + r = hashmap_put_boldly(h, h_hash, &swap, true); + if (r < 0) + return r; + + remove_entry(other, idx); + return 0; +} + +HashmapBase *internal_hashmap_copy(HashmapBase *h) { + HashmapBase *copy; + int r; + + assert(h); + + copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_SRC_ARGS); + if (!copy) + return NULL; + + switch (h->type) { + case HASHMAP_TYPE_PLAIN: + case HASHMAP_TYPE_ORDERED: + r = hashmap_merge((Hashmap*)copy, (Hashmap*)h); + break; + case HASHMAP_TYPE_SET: + r = set_merge((Set*)copy, (Set*)h); + break; + default: + assert_not_reached("Unknown hashmap type"); + } + + if (r < 0) { + internal_hashmap_free(copy, false, false); + return NULL; + } + + return copy; +} + +char **internal_hashmap_get_strv(HashmapBase *h) { + char **sv; + Iterator i; + unsigned idx, n; + + sv = new(char*, n_entries(h)+1); + if (!sv) + return NULL; + + n = 0; + HASHMAP_FOREACH_IDX(idx, h, i) + sv[n++] = entry_value(h, bucket_at(h, idx)); + sv[n] = NULL; + + return sv; +} + +void *ordered_hashmap_next(OrderedHashmap *h, const void *key) { + struct ordered_hashmap_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = ordered_bucket_at(h, idx); + if (e->iterate_next == IDX_NIL) + return NULL; + return ordered_bucket_at(h, e->iterate_next)->p.value; +} + +int set_consume(Set *s, void *value) { + int r; + + assert(s); + assert(value); + + r = set_put(s, value); + if (r <= 0) + free(value); + + return r; +} + +int set_put_strdup(Set *s, const char *p) { + char *c; + + assert(s); + assert(p); + + if (set_contains(s, (char*) p)) + return 0; + + c = strdup(p); + if (!c) + return -ENOMEM; + + return set_consume(s, c); +} + +int set_put_strdupv(Set *s, char **l) { + int n = 0, r; + char **i; + + assert(s); + + STRV_FOREACH(i, l) { + r = set_put_strdup(s, *i); + if (r < 0) + return r; + + n += r; + } + + return n; +} + +int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) { + const char *p = v; + int r; + + assert(s); + assert(v); + + for (;;) { + char *word; + + r = extract_first_word(&p, &word, separators, flags); + if (r <= 0) + return r; + + r = set_consume(s, word); + if (r < 0) + return r; + } +} + +/* expand the cachemem if needed, return true if newly (re)activated. */ +static int cachemem_maintain(CacheMem *mem, unsigned size) { + assert(mem); + + if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) { + if (size > 0) + return -ENOMEM; + } + + if (!mem->active) { + mem->active = true; + return true; + } + + return false; +} + +int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) { + bool sync_keys = false, sync_values = false; + unsigned size; + int r; + + assert(cache); + assert(cache->hashmap); + + size = n_entries(cache->hashmap); + + if (res_keys) { + r = cachemem_maintain(&cache->keys, size); + if (r < 0) + return r; + + sync_keys = r; + } else + cache->keys.active = false; + + if (res_values) { + r = cachemem_maintain(&cache->values, size); + if (r < 0) + return r; + + sync_values = r; + } else + cache->values.active = false; + + if (cache->hashmap->dirty) { + if (cache->keys.active) + sync_keys = true; + if (cache->values.active) + sync_values = true; + + cache->hashmap->dirty = false; + } + + if (sync_keys || sync_values) { + unsigned i, idx; + Iterator iter; + + i = 0; + HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) { + struct hashmap_base_entry *e; + + e = bucket_at(cache->hashmap, idx); + + if (sync_keys) + cache->keys.ptr[i] = e->key; + if (sync_values) + cache->values.ptr[i] = entry_value(cache->hashmap, e); + i++; + } + } + + if (res_keys) + *res_keys = cache->keys.ptr; + if (res_values) + *res_values = cache->values.ptr; + if (res_n_entries) + *res_n_entries = size; + + return 0; +} + +IteratedCache *iterated_cache_free(IteratedCache *cache) { + if (cache) { + free(cache->keys.ptr); + free(cache->values.ptr); + free(cache); + } + + return NULL; +} diff --git a/src/basic/hashmap.h b/src/basic/hashmap.h new file mode 100644 index 0000000..e16a9f9 --- /dev/null +++ b/src/basic/hashmap.h @@ -0,0 +1,429 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <limits.h> +#include <stdbool.h> +#include <stddef.h> + +#include "hash-funcs.h" +#include "macro.h" +#include "util.h" + +/* + * A hash table implementation. As a minor optimization a NULL hashmap object + * will be treated as empty hashmap for all read operations. That way it is not + * necessary to instantiate an object for each Hashmap use. + * + * If ENABLE_DEBUG_HASHMAP is defined (by configuring with --enable-debug=hashmap), + * the implementation will: + * - store extra data for debugging and statistics (see tools/gdb-sd_dump_hashmaps.py) + * - perform extra checks for invalid use of iterators + */ + +#define HASH_KEY_SIZE 16 + +typedef void* (*hashmap_destroy_t)(void *p); + +/* The base type for all hashmap and set types. Many functions in the + * implementation take (HashmapBase*) parameters and are run-time polymorphic, + * though the API is not meant to be polymorphic (do not call functions + * internal_*() directly). */ +typedef struct HashmapBase HashmapBase; + +/* Specific hashmap/set types */ +typedef struct Hashmap Hashmap; /* Maps keys to values */ +typedef struct OrderedHashmap OrderedHashmap; /* Like Hashmap, but also remembers entry insertion order */ +typedef struct Set Set; /* Stores just keys */ + +typedef struct IteratedCache IteratedCache; /* Caches the iterated order of one of the above */ + +/* Ideally the Iterator would be an opaque struct, but it is instantiated + * by hashmap users, so the definition has to be here. Do not use its fields + * directly. */ +typedef struct { + unsigned idx; /* index of an entry to be iterated next */ + const void *next_key; /* expected value of that entry's key pointer */ +#if ENABLE_DEBUG_HASHMAP + unsigned put_count; /* hashmap's put_count recorded at start of iteration */ + unsigned rem_count; /* hashmap's rem_count in previous iteration */ + unsigned prev_idx; /* idx in previous iteration */ +#endif +} Iterator; + +#define _IDX_ITERATOR_FIRST (UINT_MAX - 1) +#define ITERATOR_FIRST ((Iterator) { .idx = _IDX_ITERATOR_FIRST, .next_key = NULL }) + +/* Macros for type checking */ +#define PTR_COMPATIBLE_WITH_HASHMAP_BASE(h) \ + (__builtin_types_compatible_p(typeof(h), HashmapBase*) || \ + __builtin_types_compatible_p(typeof(h), Hashmap*) || \ + __builtin_types_compatible_p(typeof(h), OrderedHashmap*) || \ + __builtin_types_compatible_p(typeof(h), Set*)) + +#define PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h) \ + (__builtin_types_compatible_p(typeof(h), Hashmap*) || \ + __builtin_types_compatible_p(typeof(h), OrderedHashmap*)) \ + +#define HASHMAP_BASE(h) \ + __builtin_choose_expr(PTR_COMPATIBLE_WITH_HASHMAP_BASE(h), \ + (HashmapBase*)(h), \ + (void)0) + +#define PLAIN_HASHMAP(h) \ + __builtin_choose_expr(PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h), \ + (Hashmap*)(h), \ + (void)0) + +#if ENABLE_DEBUG_HASHMAP +# define HASHMAP_DEBUG_PARAMS , const char *func, const char *file, int line +# define HASHMAP_DEBUG_SRC_ARGS , __func__, __FILE__, __LINE__ +# define HASHMAP_DEBUG_PASS_ARGS , func, file, line +#else +# define HASHMAP_DEBUG_PARAMS +# define HASHMAP_DEBUG_SRC_ARGS +# define HASHMAP_DEBUG_PASS_ARGS +#endif + +Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define hashmap_new(ops) internal_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS) +#define ordered_hashmap_new(ops) internal_ordered_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS) + +HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value); +static inline Hashmap *hashmap_free(Hashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, NULL); +} +static inline OrderedHashmap *ordered_hashmap_free(OrderedHashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, NULL); +} + +static inline Hashmap *hashmap_free_free(Hashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, free); +} +static inline OrderedHashmap *ordered_hashmap_free_free(OrderedHashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, free); +} + +static inline Hashmap *hashmap_free_free_key(Hashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, NULL); +} +static inline OrderedHashmap *ordered_hashmap_free_free_key(OrderedHashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, NULL); +} + +static inline Hashmap *hashmap_free_free_free(Hashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, free); +} +static inline OrderedHashmap *ordered_hashmap_free_free_free(OrderedHashmap *h) { + return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, free); +} + +IteratedCache *iterated_cache_free(IteratedCache *cache); +int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries); + +HashmapBase *internal_hashmap_copy(HashmapBase *h); +static inline Hashmap *hashmap_copy(Hashmap *h) { + return (Hashmap*) internal_hashmap_copy(HASHMAP_BASE(h)); +} +static inline OrderedHashmap *ordered_hashmap_copy(OrderedHashmap *h) { + return (OrderedHashmap*) internal_hashmap_copy(HASHMAP_BASE(h)); +} + +int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define hashmap_ensure_allocated(h, ops) internal_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) +#define ordered_hashmap_ensure_allocated(h, ops) internal_ordered_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) + +IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h); +static inline IteratedCache *hashmap_iterated_cache_new(Hashmap *h) { + return (IteratedCache*) internal_hashmap_iterated_cache_new(HASHMAP_BASE(h)); +} +static inline IteratedCache *ordered_hashmap_iterated_cache_new(OrderedHashmap *h) { + return (IteratedCache*) internal_hashmap_iterated_cache_new(HASHMAP_BASE(h)); +} + +int hashmap_put(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_put(OrderedHashmap *h, const void *key, void *value) { + return hashmap_put(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_update(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_update(OrderedHashmap *h, const void *key, void *value) { + return hashmap_update(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_replace(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_replace(OrderedHashmap *h, const void *key, void *value) { + return hashmap_replace(PLAIN_HASHMAP(h), key, value); +} + +void *internal_hashmap_get(HashmapBase *h, const void *key); +static inline void *hashmap_get(Hashmap *h, const void *key) { + return internal_hashmap_get(HASHMAP_BASE(h), key); +} +static inline void *ordered_hashmap_get(OrderedHashmap *h, const void *key) { + return internal_hashmap_get(HASHMAP_BASE(h), key); +} + +void *hashmap_get2(Hashmap *h, const void *key, void **rkey); +static inline void *ordered_hashmap_get2(OrderedHashmap *h, const void *key, void **rkey) { + return hashmap_get2(PLAIN_HASHMAP(h), key, rkey); +} + +bool internal_hashmap_contains(HashmapBase *h, const void *key); +static inline bool hashmap_contains(Hashmap *h, const void *key) { + return internal_hashmap_contains(HASHMAP_BASE(h), key); +} +static inline bool ordered_hashmap_contains(OrderedHashmap *h, const void *key) { + return internal_hashmap_contains(HASHMAP_BASE(h), key); +} + +void *internal_hashmap_remove(HashmapBase *h, const void *key); +static inline void *hashmap_remove(Hashmap *h, const void *key) { + return internal_hashmap_remove(HASHMAP_BASE(h), key); +} +static inline void *ordered_hashmap_remove(OrderedHashmap *h, const void *key) { + return internal_hashmap_remove(HASHMAP_BASE(h), key); +} + +void *hashmap_remove2(Hashmap *h, const void *key, void **rkey); +static inline void *ordered_hashmap_remove2(OrderedHashmap *h, const void *key, void **rkey) { + return hashmap_remove2(PLAIN_HASHMAP(h), key, rkey); +} + +void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value); +static inline void *hashmap_remove_value(Hashmap *h, const void *key, void *value) { + return internal_hashmap_remove_value(HASHMAP_BASE(h), key, value); +} + +static inline void *ordered_hashmap_remove_value(OrderedHashmap *h, const void *key, void *value) { + return hashmap_remove_value(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value); +static inline int ordered_hashmap_remove_and_put(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) { + return hashmap_remove_and_put(PLAIN_HASHMAP(h), old_key, new_key, value); +} + +int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value); +static inline int ordered_hashmap_remove_and_replace(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) { + return hashmap_remove_and_replace(PLAIN_HASHMAP(h), old_key, new_key, value); +} + +/* Since merging data from a OrderedHashmap into a Hashmap or vice-versa + * should just work, allow this by having looser type-checking here. */ +int internal_hashmap_merge(Hashmap *h, Hashmap *other); +#define hashmap_merge(h, other) internal_hashmap_merge(PLAIN_HASHMAP(h), PLAIN_HASHMAP(other)) +#define ordered_hashmap_merge(h, other) hashmap_merge(h, other) + +int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add); +static inline int hashmap_reserve(Hashmap *h, unsigned entries_add) { + return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add); +} +static inline int ordered_hashmap_reserve(OrderedHashmap *h, unsigned entries_add) { + return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add); +} + +int internal_hashmap_move(HashmapBase *h, HashmapBase *other); +/* Unlike hashmap_merge, hashmap_move does not allow mixing the types. */ +static inline int hashmap_move(Hashmap *h, Hashmap *other) { + return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other)); +} +static inline int ordered_hashmap_move(OrderedHashmap *h, OrderedHashmap *other) { + return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other)); +} + +int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key); +static inline int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) { + return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key); +} +static inline int ordered_hashmap_move_one(OrderedHashmap *h, OrderedHashmap *other, const void *key) { + return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key); +} + +unsigned internal_hashmap_size(HashmapBase *h) _pure_; +static inline unsigned hashmap_size(Hashmap *h) { + return internal_hashmap_size(HASHMAP_BASE(h)); +} +static inline unsigned ordered_hashmap_size(OrderedHashmap *h) { + return internal_hashmap_size(HASHMAP_BASE(h)); +} + +static inline bool hashmap_isempty(Hashmap *h) { + return hashmap_size(h) == 0; +} +static inline bool ordered_hashmap_isempty(OrderedHashmap *h) { + return ordered_hashmap_size(h) == 0; +} + +unsigned internal_hashmap_buckets(HashmapBase *h) _pure_; +static inline unsigned hashmap_buckets(Hashmap *h) { + return internal_hashmap_buckets(HASHMAP_BASE(h)); +} +static inline unsigned ordered_hashmap_buckets(OrderedHashmap *h) { + return internal_hashmap_buckets(HASHMAP_BASE(h)); +} + +bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key); +static inline bool hashmap_iterate(Hashmap *h, Iterator *i, void **value, const void **key) { + return internal_hashmap_iterate(HASHMAP_BASE(h), i, value, key); +} +static inline bool ordered_hashmap_iterate(OrderedHashmap *h, Iterator *i, void **value, const void **key) { + return internal_hashmap_iterate(HASHMAP_BASE(h), i, value, key); +} + +void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value); +static inline void hashmap_clear(Hashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), NULL, NULL); +} +static inline void ordered_hashmap_clear(OrderedHashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), NULL, NULL); +} + +static inline void hashmap_clear_free(Hashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), NULL, free); +} +static inline void ordered_hashmap_clear_free(OrderedHashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), NULL, free); +} + +static inline void hashmap_clear_free_key(Hashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), free, NULL); +} +static inline void ordered_hashmap_clear_free_key(OrderedHashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), free, NULL); +} + +static inline void hashmap_clear_free_free(Hashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), free, free); +} +static inline void ordered_hashmap_clear_free_free(OrderedHashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h), free, free); +} + +/* + * Note about all *_first*() functions + * + * For plain Hashmaps and Sets the order of entries is undefined. + * The functions find whatever entry is first in the implementation + * internal order. + * + * Only for OrderedHashmaps the order is well defined and finding + * the first entry is O(1). + */ + +void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key); +static inline void *hashmap_steal_first_key_and_value(Hashmap *h, void **ret) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret); +} +static inline void *ordered_hashmap_steal_first_key_and_value(OrderedHashmap *h, void **ret) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret); +} +static inline void *hashmap_first_key_and_value(Hashmap *h, void **ret) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret); +} +static inline void *ordered_hashmap_first_key_and_value(OrderedHashmap *h, void **ret) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret); +} + +static inline void *hashmap_steal_first(Hashmap *h) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL); +} +static inline void *ordered_hashmap_steal_first(OrderedHashmap *h) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL); +} +static inline void *hashmap_first(Hashmap *h) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL); +} +static inline void *ordered_hashmap_first(OrderedHashmap *h) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL); +} + +static inline void *internal_hashmap_first_key(HashmapBase *h, bool remove) { + void *key = NULL; + + (void) internal_hashmap_first_key_and_value(HASHMAP_BASE(h), remove, &key); + return key; +} +static inline void *hashmap_steal_first_key(Hashmap *h) { + return internal_hashmap_first_key(HASHMAP_BASE(h), true); +} +static inline void *ordered_hashmap_steal_first_key(OrderedHashmap *h) { + return internal_hashmap_first_key(HASHMAP_BASE(h), true); +} +static inline void *hashmap_first_key(Hashmap *h) { + return internal_hashmap_first_key(HASHMAP_BASE(h), false); +} +static inline void *ordered_hashmap_first_key(OrderedHashmap *h) { + return internal_hashmap_first_key(HASHMAP_BASE(h), false); +} + +#define hashmap_clear_with_destructor(_s, _f) \ + ({ \ + void *_item; \ + while ((_item = hashmap_steal_first(_s))) \ + _f(_item); \ + }) +#define hashmap_free_with_destructor(_s, _f) \ + ({ \ + hashmap_clear_with_destructor(_s, _f); \ + hashmap_free(_s); \ + }) +#define ordered_hashmap_clear_with_destructor(_s, _f) \ + ({ \ + void *_item; \ + while ((_item = ordered_hashmap_steal_first(_s))) \ + _f(_item); \ + }) +#define ordered_hashmap_free_with_destructor(_s, _f) \ + ({ \ + ordered_hashmap_clear_with_destructor(_s, _f); \ + ordered_hashmap_free(_s); \ + }) + +/* no hashmap_next */ +void *ordered_hashmap_next(OrderedHashmap *h, const void *key); + +char **internal_hashmap_get_strv(HashmapBase *h); +static inline char **hashmap_get_strv(Hashmap *h) { + return internal_hashmap_get_strv(HASHMAP_BASE(h)); +} +static inline char **ordered_hashmap_get_strv(OrderedHashmap *h) { + return internal_hashmap_get_strv(HASHMAP_BASE(h)); +} + +/* + * Hashmaps are iterated in unpredictable order. + * OrderedHashmaps are an exception to this. They are iterated in the order + * the entries were inserted. + * It is safe to remove the current entry. + */ +#define HASHMAP_FOREACH(e, h, i) \ + for ((i) = ITERATOR_FIRST; hashmap_iterate((h), &(i), (void**)&(e), NULL); ) + +#define ORDERED_HASHMAP_FOREACH(e, h, i) \ + for ((i) = ITERATOR_FIRST; ordered_hashmap_iterate((h), &(i), (void**)&(e), NULL); ) + +#define HASHMAP_FOREACH_KEY(e, k, h, i) \ + for ((i) = ITERATOR_FIRST; hashmap_iterate((h), &(i), (void**)&(e), (const void**) &(k)); ) + +#define ORDERED_HASHMAP_FOREACH_KEY(e, k, h, i) \ + for ((i) = ITERATOR_FIRST; ordered_hashmap_iterate((h), &(i), (void**)&(e), (const void**) &(k)); ) + +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free); + +#define _cleanup_hashmap_free_ _cleanup_(hashmap_freep) +#define _cleanup_hashmap_free_free_ _cleanup_(hashmap_free_freep) +#define _cleanup_hashmap_free_free_free_ _cleanup_(hashmap_free_free_freep) +#define _cleanup_ordered_hashmap_free_ _cleanup_(ordered_hashmap_freep) +#define _cleanup_ordered_hashmap_free_free_ _cleanup_(ordered_hashmap_free_freep) +#define _cleanup_ordered_hashmap_free_free_free_ _cleanup_(ordered_hashmap_free_free_freep) + +DEFINE_TRIVIAL_CLEANUP_FUNC(IteratedCache*, iterated_cache_free); + +#define _cleanup_iterated_cache_free_ _cleanup_(iterated_cache_freep) diff --git a/src/basic/hexdecoct.c b/src/basic/hexdecoct.c new file mode 100644 index 0000000..c0f9640 --- /dev/null +++ b/src/basic/hexdecoct.c @@ -0,0 +1,819 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <ctype.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "hexdecoct.h" +#include "macro.h" +#include "string-util.h" +#include "util.h" + +char octchar(int x) { + return '0' + (x & 7); +} + +int unoctchar(char c) { + + if (c >= '0' && c <= '7') + return c - '0'; + + return -EINVAL; +} + +char decchar(int x) { + return '0' + (x % 10); +} + +int undecchar(char c) { + + if (c >= '0' && c <= '9') + return c - '0'; + + return -EINVAL; +} + +char hexchar(int x) { + static const char table[16] = "0123456789abcdef"; + + return table[x & 15]; +} + +int unhexchar(char c) { + + if (c >= '0' && c <= '9') + return c - '0'; + + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + return -EINVAL; +} + +char *hexmem(const void *p, size_t l) { + const uint8_t *x; + char *r, *z; + + z = r = new(char, l * 2 + 1); + if (!r) + return NULL; + + for (x = p; x < (const uint8_t*) p + l; x++) { + *(z++) = hexchar(*x >> 4); + *(z++) = hexchar(*x & 15); + } + + *z = 0; + return r; +} + +static int unhex_next(const char **p, size_t *l) { + int r; + + assert(p); + assert(l); + + /* Find the next non-whitespace character, and decode it. We + * greedily skip all preceding and all following whitespace. */ + + for (;;) { + if (*l == 0) + return -EPIPE; + + if (!strchr(WHITESPACE, **p)) + break; + + /* Skip leading whitespace */ + (*p)++, (*l)--; + } + + r = unhexchar(**p); + if (r < 0) + return r; + + for (;;) { + (*p)++, (*l)--; + + if (*l == 0 || !strchr(WHITESPACE, **p)) + break; + + /* Skip following whitespace */ + } + + return r; +} + +int unhexmem(const char *p, size_t l, void **ret, size_t *ret_len) { + _cleanup_free_ uint8_t *buf = NULL; + const char *x; + uint8_t *z; + + assert(ret); + assert(ret_len); + assert(p || l == 0); + + if (l == (size_t) -1) + l = strlen(p); + + /* Note that the calculation of memory size is an upper boundary, as we ignore whitespace while decoding */ + buf = malloc((l + 1) / 2 + 1); + if (!buf) + return -ENOMEM; + + for (x = p, z = buf;;) { + int a, b; + + a = unhex_next(&x, &l); + if (a == -EPIPE) /* End of string */ + break; + if (a < 0) + return a; + + b = unhex_next(&x, &l); + if (b < 0) + return b; + + *(z++) = (uint8_t) a << 4 | (uint8_t) b; + } + + *z = 0; + + *ret_len = (size_t) (z - buf); + *ret = TAKE_PTR(buf); + + return 0; +} + +/* https://tools.ietf.org/html/rfc4648#section-6 + * Notice that base32hex differs from base32 in the alphabet it uses. + * The distinction is that the base32hex representation preserves the + * order of the underlying data when compared as bytestrings, this is + * useful when representing NSEC3 hashes, as one can then verify the + * order of hashes directly from their representation. */ +char base32hexchar(int x) { + static const char table[32] = "0123456789" + "ABCDEFGHIJKLMNOPQRSTUV"; + + return table[x & 31]; +} + +int unbase32hexchar(char c) { + unsigned offset; + + if (c >= '0' && c <= '9') + return c - '0'; + + offset = '9' - '0' + 1; + + if (c >= 'A' && c <= 'V') + return c - 'A' + offset; + + return -EINVAL; +} + +char *base32hexmem(const void *p, size_t l, bool padding) { + char *r, *z; + const uint8_t *x; + size_t len; + + assert(p || l == 0); + + if (padding) + /* five input bytes makes eight output bytes, padding is added so we must round up */ + len = 8 * (l + 4) / 5; + else { + /* same, but round down as there is no padding */ + len = 8 * l / 5; + + switch (l % 5) { + case 4: + len += 7; + break; + case 3: + len += 5; + break; + case 2: + len += 4; + break; + case 1: + len += 2; + break; + } + } + + z = r = malloc(len + 1); + if (!r) + return NULL; + + for (x = p; x < (const uint8_t*) p + (l / 5) * 5; x += 5) { + /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ + * x[3] == QQQQQQQQ; x[4] == WWWWWWWW */ + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */ + *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */ + *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */ + *(z++) = base32hexchar((x[3] & 3) << 3 | x[4] >> 5); /* 000QQWWW */ + *(z++) = base32hexchar((x[4] & 31)); /* 000WWWWW */ + } + + switch (l % 5) { + case 4: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */ + *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */ + *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */ + *(z++) = base32hexchar((x[3] & 3) << 3); /* 000QQ000 */ + if (padding) + *(z++) = '='; + + break; + + case 3: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */ + *(z++) = base32hexchar((x[2] & 15) << 1); /* 000ZZZZ0 */ + if (padding) { + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + } + + break; + + case 2: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4); /* 000Y0000 */ + if (padding) { + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + } + + break; + + case 1: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2); /* 000XXX00 */ + if (padding) { + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + } + + break; + } + + *z = 0; + return r; +} + +int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *_len) { + _cleanup_free_ uint8_t *r = NULL; + int a, b, c, d, e, f, g, h; + uint8_t *z; + const char *x; + size_t len; + unsigned pad = 0; + + assert(p || l == 0); + assert(mem); + assert(_len); + + if (l == (size_t) -1) + l = strlen(p); + + /* padding ensures any base32hex input has input divisible by 8 */ + if (padding && l % 8 != 0) + return -EINVAL; + + if (padding) { + /* strip the padding */ + while (l > 0 && p[l - 1] == '=' && pad < 7) { + pad++; + l--; + } + } + + /* a group of eight input bytes needs five output bytes, in case of + * padding we need to add some extra bytes */ + len = (l / 8) * 5; + + switch (l % 8) { + case 7: + len += 4; + break; + case 5: + len += 3; + break; + case 4: + len += 2; + break; + case 2: + len += 1; + break; + case 0: + break; + default: + return -EINVAL; + } + + z = r = malloc(len + 1); + if (!r) + return -ENOMEM; + + for (x = p; x < p + (l / 8) * 8; x += 8) { + /* a == 000XXXXX; b == 000YYYYY; c == 000ZZZZZ; d == 000WWWWW + * e == 000SSSSS; f == 000QQQQQ; g == 000VVVVV; h == 000RRRRR */ + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + e = unbase32hexchar(x[4]); + if (e < 0) + return -EINVAL; + + f = unbase32hexchar(x[5]); + if (f < 0) + return -EINVAL; + + g = unbase32hexchar(x[6]); + if (g < 0) + return -EINVAL; + + h = unbase32hexchar(x[7]); + if (h < 0) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */ + *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */ + *(z++) = (uint8_t) g << 5 | (uint8_t) h; /* VVVRRRRR */ + } + + switch (l % 8) { + case 7: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + e = unbase32hexchar(x[4]); + if (e < 0) + return -EINVAL; + + f = unbase32hexchar(x[5]); + if (f < 0) + return -EINVAL; + + g = unbase32hexchar(x[6]); + if (g < 0) + return -EINVAL; + + /* g == 000VV000 */ + if (g & 7) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */ + *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */ + + break; + case 5: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + e = unbase32hexchar(x[4]); + if (e < 0) + return -EINVAL; + + /* e == 000SSSS0 */ + if (e & 1) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */ + + break; + case 4: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + /* d == 000W0000 */ + if (d & 15) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + + break; + case 2: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + /* b == 000YYY00 */ + if (b & 3) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + + break; + case 0: + break; + default: + return -EINVAL; + } + + *z = 0; + + *mem = TAKE_PTR(r); + *_len = len; + + return 0; +} + +/* https://tools.ietf.org/html/rfc4648#section-4 */ +char base64char(int x) { + static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + return table[x & 63]; +} + +int unbase64char(char c) { + unsigned offset; + + if (c >= 'A' && c <= 'Z') + return c - 'A'; + + offset = 'Z' - 'A' + 1; + + if (c >= 'a' && c <= 'z') + return c - 'a' + offset; + + offset += 'z' - 'a' + 1; + + if (c >= '0' && c <= '9') + return c - '0' + offset; + + offset += '9' - '0' + 1; + + if (c == '+') + return offset; + + offset++; + + if (c == '/') + return offset; + + return -EINVAL; +} + +ssize_t base64mem(const void *p, size_t l, char **out) { + char *r, *z; + const uint8_t *x; + + assert(p || l == 0); + assert(out); + + /* three input bytes makes four output bytes, padding is added so we must round up */ + z = r = malloc(4 * (l + 2) / 3 + 1); + if (!r) + return -ENOMEM; + + for (x = p; x < (const uint8_t*) p + (l / 3) * 3; x += 3) { + /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ */ + *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */ + *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */ + *(z++) = base64char((x[1] & 15) << 2 | x[2] >> 6); /* 00YYYYZZ */ + *(z++) = base64char(x[2] & 63); /* 00ZZZZZZ */ + } + + switch (l % 3) { + case 2: + *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */ + *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */ + *(z++) = base64char((x[1] & 15) << 2); /* 00YYYY00 */ + *(z++) = '='; + + break; + case 1: + *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */ + *(z++) = base64char((x[0] & 3) << 4); /* 00XX0000 */ + *(z++) = '='; + *(z++) = '='; + + break; + } + + *z = 0; + *out = r; + return z - r; +} + +static int base64_append_width( + char **prefix, int plen, + const char *sep, int indent, + const void *p, size_t l, + int width) { + + _cleanup_free_ char *x = NULL; + char *t, *s; + ssize_t len, slen, avail, line, lines; + + len = base64mem(p, l, &x); + if (len <= 0) + return len; + + lines = DIV_ROUND_UP(len, width); + + slen = strlen_ptr(sep); + if (lines > (SSIZE_MAX - plen - 1 - slen) / (indent + width + 1)) + return -ENOMEM; + + t = realloc(*prefix, plen + 1 + slen + (indent + width + 1) * lines); + if (!t) + return -ENOMEM; + + memcpy_safe(t + plen, sep, slen); + + for (line = 0, s = t + plen + slen, avail = len; line < lines; line++) { + int act = MIN(width, avail); + + if (line > 0 || sep) { + memset(s, ' ', indent); + s += indent; + } + + memcpy(s, x + width * line, act); + s += act; + *(s++) = line < lines - 1 ? '\n' : '\0'; + avail -= act; + } + assert(avail == 0); + + *prefix = t; + return 0; +} + +int base64_append( + char **prefix, int plen, + const void *p, size_t l, + int indent, int width) { + + if (plen > width / 2 || plen + indent > width) + /* leave indent on the left, keep last column free */ + return base64_append_width(prefix, plen, "\n", indent, p, l, width - indent - 1); + else + /* leave plen on the left, keep last column free */ + return base64_append_width(prefix, plen, NULL, plen, p, l, width - plen - 1); +} + +static int unbase64_next(const char **p, size_t *l) { + int ret; + + assert(p); + assert(l); + + /* Find the next non-whitespace character, and decode it. If we find padding, we return it as INT_MAX. We + * greedily skip all preceding and all following whitespace. */ + + for (;;) { + if (*l == 0) + return -EPIPE; + + if (!strchr(WHITESPACE, **p)) + break; + + /* Skip leading whitespace */ + (*p)++, (*l)--; + } + + if (**p == '=') + ret = INT_MAX; /* return padding as INT_MAX */ + else { + ret = unbase64char(**p); + if (ret < 0) + return ret; + } + + for (;;) { + (*p)++, (*l)--; + + if (*l == 0) + break; + if (!strchr(WHITESPACE, **p)) + break; + + /* Skip following whitespace */ + } + + return ret; +} + +int unbase64mem(const char *p, size_t l, void **ret, size_t *ret_size) { + _cleanup_free_ uint8_t *buf = NULL; + const char *x; + uint8_t *z; + size_t len; + + assert(p || l == 0); + assert(ret); + assert(ret_size); + + if (l == (size_t) -1) + l = strlen(p); + + /* A group of four input bytes needs three output bytes, in case of padding we need to add two or three extra + * bytes. Note that this calculation is an upper boundary, as we ignore whitespace while decoding */ + len = (l / 4) * 3 + (l % 4 != 0 ? (l % 4) - 1 : 0); + + buf = malloc(len + 1); + if (!buf) + return -ENOMEM; + + for (x = p, z = buf;;) { + int a, b, c, d; /* a == 00XXXXXX; b == 00YYYYYY; c == 00ZZZZZZ; d == 00WWWWWW */ + + a = unbase64_next(&x, &l); + if (a == -EPIPE) /* End of string */ + break; + if (a < 0) + return a; + if (a == INT_MAX) /* Padding is not allowed at the beginning of a 4ch block */ + return -EINVAL; + + b = unbase64_next(&x, &l); + if (b < 0) + return b; + if (b == INT_MAX) /* Padding is not allowed at the second character of a 4ch block either */ + return -EINVAL; + + c = unbase64_next(&x, &l); + if (c < 0) + return c; + + d = unbase64_next(&x, &l); + if (d < 0) + return d; + + if (c == INT_MAX) { /* Padding at the third character */ + + if (d != INT_MAX) /* If the third character is padding, the fourth must be too */ + return -EINVAL; + + /* b == 00YY0000 */ + if (b & 15) + return -EINVAL; + + if (l > 0) /* Trailing rubbish? */ + return -ENAMETOOLONG; + + *(z++) = (uint8_t) a << 2 | (uint8_t) (b >> 4); /* XXXXXXYY */ + break; + } + + if (d == INT_MAX) { + /* c == 00ZZZZ00 */ + if (c & 3) + return -EINVAL; + + if (l > 0) /* Trailing rubbish? */ + return -ENAMETOOLONG; + + *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */ + *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */ + break; + } + + *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */ + *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */ + *(z++) = (uint8_t) c << 6 | (uint8_t) d; /* ZZWWWWWW */ + } + + *z = 0; + + *ret_size = (size_t) (z - buf); + *ret = TAKE_PTR(buf); + + return 0; +} + +void hexdump(FILE *f, const void *p, size_t s) { + const uint8_t *b = p; + unsigned n = 0; + + assert(b || s == 0); + + if (!f) + f = stdout; + + while (s > 0) { + size_t i; + + fprintf(f, "%04x ", n); + + for (i = 0; i < 16; i++) { + + if (i >= s) + fputs(" ", f); + else + fprintf(f, "%02x ", b[i]); + + if (i == 7) + fputc(' ', f); + } + + fputc(' ', f); + + for (i = 0; i < 16; i++) { + + if (i >= s) + fputc(' ', f); + else + fputc(isprint(b[i]) ? (char) b[i] : '.', f); + } + + fputc('\n', f); + + if (s < 16) + break; + + n += 16; + b += 16; + s -= 16; + } +} diff --git a/src/basic/hexdecoct.h b/src/basic/hexdecoct.h new file mode 100644 index 0000000..9477d16 --- /dev/null +++ b/src/basic/hexdecoct.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +#include "macro.h" + +char octchar(int x) _const_; +int unoctchar(char c) _const_; + +char decchar(int x) _const_; +int undecchar(char c) _const_; + +char hexchar(int x) _const_; +int unhexchar(char c) _const_; + +char *hexmem(const void *p, size_t l); +int unhexmem(const char *p, size_t l, void **mem, size_t *len); + +char base32hexchar(int x) _const_; +int unbase32hexchar(char c) _const_; + +char base64char(int x) _const_; +int unbase64char(char c) _const_; + +char *base32hexmem(const void *p, size_t l, bool padding); +int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *len); + +ssize_t base64mem(const void *p, size_t l, char **out); +int base64_append(char **prefix, int plen, + const void *p, size_t l, + int margin, int width); +int unbase64mem(const char *p, size_t l, void **mem, size_t *len); + +void hexdump(FILE *f, const void *p, size_t s); diff --git a/src/basic/hostname-util.c b/src/basic/hostname-util.c new file mode 100644 index 0000000..5bfa028 --- /dev/null +++ b/src/basic/hostname-util.c @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <sys/utsname.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "hostname-util.h" +#include "macro.h" +#include "string-util.h" + +bool hostname_is_set(void) { + struct utsname u; + + assert_se(uname(&u) >= 0); + + if (isempty(u.nodename)) + return false; + + /* This is the built-in kernel default host name */ + if (streq(u.nodename, "(none)")) + return false; + + return true; +} + +char* gethostname_malloc(void) { + struct utsname u; + + /* This call tries to return something useful, either the actual hostname + * or it makes something up. The only reason it might fail is OOM. + * It might even return "localhost" if that's set. */ + + assert_se(uname(&u) >= 0); + + if (isempty(u.nodename) || streq(u.nodename, "(none)")) + return strdup(FALLBACK_HOSTNAME); + + return strdup(u.nodename); +} + +int gethostname_strict(char **ret) { + struct utsname u; + char *k; + + /* This call will rather fail than make up a name. It will not return "localhost" either. */ + + assert_se(uname(&u) >= 0); + + if (isempty(u.nodename)) + return -ENXIO; + + if (streq(u.nodename, "(none)")) + return -ENXIO; + + if (is_localhost(u.nodename)) + return -ENXIO; + + k = strdup(u.nodename); + if (!k) + return -ENOMEM; + + *ret = k; + return 0; +} + +bool valid_ldh_char(char c) { + return + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '-'; +} + +/** + * Check if s looks like a valid host name or FQDN. This does not do + * full DNS validation, but only checks if the name is composed of + * allowed characters and the length is not above the maximum allowed + * by Linux (c.f. dns_name_is_valid()). Trailing dot is allowed if + * allow_trailing_dot is true and at least two components are present + * in the name. Note that due to the restricted charset and length + * this call is substantially more conservative than + * dns_name_is_valid(). + */ +bool hostname_is_valid(const char *s, bool allow_trailing_dot) { + unsigned n_dots = 0; + const char *p; + bool dot, hyphen; + + if (isempty(s)) + return false; + + /* Doesn't accept empty hostnames, hostnames with + * leading dots, and hostnames with multiple dots in a + * sequence. Also ensures that the length stays below + * HOST_NAME_MAX. */ + + for (p = s, dot = hyphen = true; *p; p++) + if (*p == '.') { + if (dot || hyphen) + return false; + + dot = true; + hyphen = false; + n_dots++; + + } else if (*p == '-') { + if (dot) + return false; + + dot = false; + hyphen = true; + + } else { + if (!valid_ldh_char(*p)) + return false; + + dot = false; + hyphen = false; + } + + if (dot && (n_dots < 2 || !allow_trailing_dot)) + return false; + if (hyphen) + return false; + + if (p-s > HOST_NAME_MAX) /* Note that HOST_NAME_MAX is 64 on + * Linux, but DNS allows domain names + * up to 255 characters */ + return false; + + return true; +} + +char* hostname_cleanup(char *s) { + char *p, *d; + bool dot, hyphen; + + assert(s); + + for (p = s, d = s, dot = hyphen = true; *p && d - s < HOST_NAME_MAX; p++) + if (*p == '.') { + if (dot || hyphen) + continue; + + *(d++) = '.'; + dot = true; + hyphen = false; + + } else if (*p == '-') { + if (dot) + continue; + + *(d++) = '-'; + dot = false; + hyphen = true; + + } else if (valid_ldh_char(*p)) { + *(d++) = *p; + dot = false; + hyphen = false; + } + + if (d > s && IN_SET(d[-1], '-', '.')) + /* The dot can occur at most once, but we might have multiple + * hyphens, hence the loop */ + d--; + *d = 0; + + return s; +} + +bool is_localhost(const char *hostname) { + assert(hostname); + + /* This tries to identify local host and domain names + * described in RFC6761 plus the redhatism of localdomain */ + + return strcaseeq(hostname, "localhost") || + strcaseeq(hostname, "localhost.") || + strcaseeq(hostname, "localhost.localdomain") || + strcaseeq(hostname, "localhost.localdomain.") || + endswith_no_case(hostname, ".localhost") || + endswith_no_case(hostname, ".localhost.") || + endswith_no_case(hostname, ".localhost.localdomain") || + endswith_no_case(hostname, ".localhost.localdomain."); +} + +bool is_gateway_hostname(const char *hostname) { + assert(hostname); + + /* This tries to identify the valid syntaxes for the our + * synthetic "gateway" host. */ + + return + strcaseeq(hostname, "_gateway") || strcaseeq(hostname, "_gateway.") +#if ENABLE_COMPAT_GATEWAY_HOSTNAME + || strcaseeq(hostname, "gateway") || strcaseeq(hostname, "gateway.") +#endif + ; +} + +int sethostname_idempotent(const char *s) { + char buf[HOST_NAME_MAX + 1] = {}; + + assert(s); + + if (gethostname(buf, sizeof(buf)) < 0) + return -errno; + + if (streq(buf, s)) + return 0; + + if (sethostname(s, strlen(s)) < 0) + return -errno; + + return 1; +} + +int shorten_overlong(const char *s, char **ret) { + char *h, *p; + + /* Shorten an overlong name to HOST_NAME_MAX or to the first dot, + * whatever comes earlier. */ + + assert(s); + + h = strdup(s); + if (!h) + return -ENOMEM; + + if (hostname_is_valid(h, false)) { + *ret = h; + return 0; + } + + p = strchr(h, '.'); + if (p) + *p = 0; + + strshorten(h, HOST_NAME_MAX); + + if (!hostname_is_valid(h, false)) { + free(h); + return -EDOM; + } + + *ret = h; + return 1; +} + +int read_etc_hostname_stream(FILE *f, char **ret) { + int r; + + assert(f); + assert(ret); + + for (;;) { + _cleanup_free_ char *line = NULL; + char *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) /* EOF without any hostname? the file is empty, let's treat that exactly like no file at all: ENOENT */ + return -ENOENT; + + p = strstrip(line); + + /* File may have empty lines or comments, ignore them */ + if (!IN_SET(*p, '\0', '#')) { + char *copy; + + hostname_cleanup(p); /* normalize the hostname */ + + if (!hostname_is_valid(p, true)) /* check that the hostname we return is valid */ + return -EBADMSG; + + copy = strdup(p); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; + } + } +} + +int read_etc_hostname(const char *path, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + + assert(ret); + + if (!path) + path = "/etc/hostname"; + + f = fopen(path, "re"); + if (!f) + return -errno; + + return read_etc_hostname_stream(f, ret); + +} diff --git a/src/basic/hostname-util.h b/src/basic/hostname-util.h new file mode 100644 index 0000000..7ba386a --- /dev/null +++ b/src/basic/hostname-util.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> + +#include "macro.h" + +bool hostname_is_set(void); + +char* gethostname_malloc(void); +int gethostname_strict(char **ret); + +bool valid_ldh_char(char c) _const_; +bool hostname_is_valid(const char *s, bool allow_trailing_dot) _pure_; +char* hostname_cleanup(char *s); + +#define machine_name_is_valid(s) hostname_is_valid(s, false) + +bool is_localhost(const char *hostname); +bool is_gateway_hostname(const char *hostname); + +int sethostname_idempotent(const char *s); + +int shorten_overlong(const char *s, char **ret); + +int read_etc_hostname_stream(FILE *f, char **ret); +int read_etc_hostname(const char *path, char **ret); diff --git a/src/basic/in-addr-util.c b/src/basic/in-addr-util.c new file mode 100644 index 0000000..2bffe47 --- /dev/null +++ b/src/basic/in-addr-util.c @@ -0,0 +1,622 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <arpa/inet.h> +#include <endian.h> +#include <errno.h> +#include <net/if.h> +#include <stdint.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "parse-util.h" +#include "util.h" + +bool in4_addr_is_null(const struct in_addr *a) { + assert(a); + + return a->s_addr == 0; +} + +int in_addr_is_null(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return in4_addr_is_null(&u->in); + + if (family == AF_INET6) + return IN6_IS_ADDR_UNSPECIFIED(&u->in6); + + return -EAFNOSUPPORT; +} + +bool in4_addr_is_link_local(const struct in_addr *a) { + assert(a); + + return (be32toh(a->s_addr) & UINT32_C(0xFFFF0000)) == (UINT32_C(169) << 24 | UINT32_C(254) << 16); +} + +int in_addr_is_link_local(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return in4_addr_is_link_local(&u->in); + + if (family == AF_INET6) + return IN6_IS_ADDR_LINKLOCAL(&u->in6); + + return -EAFNOSUPPORT; +} + +int in_addr_is_multicast(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return IN_MULTICAST(be32toh(u->in.s_addr)); + + if (family == AF_INET6) + return IN6_IS_ADDR_MULTICAST(&u->in6); + + return -EAFNOSUPPORT; +} + +bool in4_addr_is_localhost(const struct in_addr *a) { + assert(a); + + /* All of 127.x.x.x is localhost. */ + return (be32toh(a->s_addr) & UINT32_C(0xFF000000)) == UINT32_C(127) << 24; +} + +int in_addr_is_localhost(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return in4_addr_is_localhost(&u->in); + + if (family == AF_INET6) + return IN6_IS_ADDR_LOOPBACK(&u->in6); + + return -EAFNOSUPPORT; +} + +int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b) { + assert(a); + assert(b); + + if (family == AF_INET) + return a->in.s_addr == b->in.s_addr; + + if (family == AF_INET6) + return + a->in6.s6_addr32[0] == b->in6.s6_addr32[0] && + a->in6.s6_addr32[1] == b->in6.s6_addr32[1] && + a->in6.s6_addr32[2] == b->in6.s6_addr32[2] && + a->in6.s6_addr32[3] == b->in6.s6_addr32[3]; + + return -EAFNOSUPPORT; +} + +int in_addr_prefix_intersect( + int family, + const union in_addr_union *a, + unsigned aprefixlen, + const union in_addr_union *b, + unsigned bprefixlen) { + + unsigned m; + + assert(a); + assert(b); + + /* Checks whether there are any addresses that are in both + * networks */ + + m = MIN(aprefixlen, bprefixlen); + + if (family == AF_INET) { + uint32_t x, nm; + + x = be32toh(a->in.s_addr ^ b->in.s_addr); + nm = (m == 0) ? 0 : 0xFFFFFFFFUL << (32 - m); + + return (x & nm) == 0; + } + + if (family == AF_INET6) { + unsigned i; + + if (m > 128) + m = 128; + + for (i = 0; i < 16; i++) { + uint8_t x, nm; + + x = a->in6.s6_addr[i] ^ b->in6.s6_addr[i]; + + if (m < 8) + nm = 0xFF << (8 - m); + else + nm = 0xFF; + + if ((x & nm) != 0) + return 0; + + if (m > 8) + m -= 8; + else + m = 0; + } + + return 1; + } + + return -EAFNOSUPPORT; +} + +int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen) { + assert(u); + + /* Increases the network part of an address by one. Returns + * positive it that succeeds, or 0 if this overflows. */ + + if (prefixlen <= 0) + return 0; + + if (family == AF_INET) { + uint32_t c, n; + + if (prefixlen > 32) + prefixlen = 32; + + c = be32toh(u->in.s_addr); + n = c + (1UL << (32 - prefixlen)); + if (n < c) + return 0; + n &= 0xFFFFFFFFUL << (32 - prefixlen); + + u->in.s_addr = htobe32(n); + return 1; + } + + if (family == AF_INET6) { + struct in6_addr add = {}, result; + uint8_t overflow = 0; + unsigned i; + + if (prefixlen > 128) + prefixlen = 128; + + /* First calculate what we have to add */ + add.s6_addr[(prefixlen-1) / 8] = 1 << (7 - (prefixlen-1) % 8); + + for (i = 16; i > 0; i--) { + unsigned j = i - 1; + + result.s6_addr[j] = u->in6.s6_addr[j] + add.s6_addr[j] + overflow; + overflow = (result.s6_addr[j] < u->in6.s6_addr[j]); + } + + if (overflow) + return 0; + + u->in6 = result; + return 1; + } + + return -EAFNOSUPPORT; +} + +int in_addr_to_string(int family, const union in_addr_union *u, char **ret) { + char *x; + size_t l; + + assert(u); + assert(ret); + + if (family == AF_INET) + l = INET_ADDRSTRLEN; + else if (family == AF_INET6) + l = INET6_ADDRSTRLEN; + else + return -EAFNOSUPPORT; + + x = new(char, l); + if (!x) + return -ENOMEM; + + errno = 0; + if (!inet_ntop(family, u, x, l)) { + free(x); + return errno > 0 ? -errno : -EINVAL; + } + + *ret = x; + return 0; +} + +int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret) { + size_t l; + char *x; + int r; + + assert(u); + assert(ret); + + /* Much like in_addr_to_string(), but optionally appends the zone interface index to the address, to properly + * handle IPv6 link-local addresses. */ + + if (family != AF_INET6) + goto fallback; + if (ifindex <= 0) + goto fallback; + + r = in_addr_is_link_local(family, u); + if (r < 0) + return r; + if (r == 0) + goto fallback; + + l = INET6_ADDRSTRLEN + 1 + DECIMAL_STR_MAX(ifindex) + 1; + x = new(char, l); + if (!x) + return -ENOMEM; + + errno = 0; + if (!inet_ntop(family, u, x, l)) { + free(x); + return errno > 0 ? -errno : -EINVAL; + } + + sprintf(strchr(x, 0), "%%%i", ifindex); + *ret = x; + + return 0; + +fallback: + return in_addr_to_string(family, u, ret); +} + +int in_addr_from_string(int family, const char *s, union in_addr_union *ret) { + union in_addr_union buffer; + assert(s); + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + errno = 0; + if (inet_pton(family, s, ret ?: &buffer) <= 0) + return errno > 0 ? -errno : -EINVAL; + + return 0; +} + +int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) { + int r; + + assert(s); + + r = in_addr_from_string(AF_INET, s, ret); + if (r >= 0) { + if (ret_family) + *ret_family = AF_INET; + return 0; + } + + r = in_addr_from_string(AF_INET6, s, ret); + if (r >= 0) { + if (ret_family) + *ret_family = AF_INET6; + return 0; + } + + return -EINVAL; +} + +int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex) { + _cleanup_free_ char *buf = NULL; + const char *suffix; + int r, ifi = 0; + + assert(s); + assert(family); + assert(ret); + + /* Similar to in_addr_from_string_auto() but also parses an optionally appended IPv6 zone suffix ("scope id") + * if one is found. */ + + suffix = strchr(s, '%'); + if (suffix) { + + if (ifindex) { + /* If we shall return the interface index, try to parse it */ + r = parse_ifindex(suffix + 1, &ifi); + if (r < 0) { + unsigned u; + + u = if_nametoindex(suffix + 1); + if (u <= 0) + return -errno; + + ifi = (int) u; + } + } + + buf = strndup(s, suffix - s); + if (!buf) + return -ENOMEM; + + s = buf; + } + + r = in_addr_from_string_auto(s, family, ret); + if (r < 0) + return r; + + if (ifindex) + *ifindex = ifi; + + return r; +} + +unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) { + assert(addr); + + return 32U - u32ctz(be32toh(addr->s_addr)); +} + +struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) { + assert(addr); + assert(prefixlen <= 32); + + /* Shifting beyond 32 is not defined, handle this specially. */ + if (prefixlen == 0) + addr->s_addr = 0; + else + addr->s_addr = htobe32((0xffffffff << (32 - prefixlen)) & 0xffffffff); + + return addr; +} + +int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) { + uint8_t msb_octet = *(uint8_t*) addr; + + /* addr may not be aligned, so make sure we only access it byte-wise */ + + assert(addr); + assert(prefixlen); + + if (msb_octet < 128) + /* class A, leading bits: 0 */ + *prefixlen = 8; + else if (msb_octet < 192) + /* class B, leading bits 10 */ + *prefixlen = 16; + else if (msb_octet < 224) + /* class C, leading bits 110 */ + *prefixlen = 24; + else + /* class D or E, no default prefixlen */ + return -ERANGE; + + return 0; +} + +int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) { + unsigned char prefixlen; + int r; + + assert(addr); + assert(mask); + + r = in4_addr_default_prefixlen(addr, &prefixlen); + if (r < 0) + return r; + + in4_addr_prefixlen_to_netmask(mask, prefixlen); + return 0; +} + +int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen) { + assert(addr); + + if (family == AF_INET) { + struct in_addr mask; + + if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen)) + return -EINVAL; + + addr->in.s_addr &= mask.s_addr; + return 0; + } + + if (family == AF_INET6) { + unsigned i; + + for (i = 0; i < 16; i++) { + uint8_t mask; + + if (prefixlen >= 8) { + mask = 0xFF; + prefixlen -= 8; + } else { + mask = 0xFF << (8 - prefixlen); + prefixlen = 0; + } + + addr->in6.s6_addr[i] &= mask; + } + + return 0; + } + + return -EAFNOSUPPORT; +} + +int in_addr_prefix_covers(int family, + const union in_addr_union *prefix, + unsigned char prefixlen, + const union in_addr_union *address) { + + union in_addr_union masked_prefix, masked_address; + int r; + + assert(prefix); + assert(address); + + masked_prefix = *prefix; + r = in_addr_mask(family, &masked_prefix, prefixlen); + if (r < 0) + return r; + + masked_address = *address; + r = in_addr_mask(family, &masked_address, prefixlen); + if (r < 0) + return r; + + return in_addr_equal(family, &masked_prefix, &masked_address); +} + +int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) { + uint8_t u; + int r; + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + r = safe_atou8(p, &u); + if (r < 0) + return r; + + if (u > FAMILY_ADDRESS_SIZE(family) * 8) + return -ERANGE; + + *ret = u; + return 0; +} + +int in_addr_prefix_from_string( + const char *p, + int family, + union in_addr_union *ret_prefix, + unsigned char *ret_prefixlen) { + + _cleanup_free_ char *str = NULL; + union in_addr_union buffer; + const char *e, *l; + unsigned char k; + int r; + + assert(p); + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + e = strchr(p, '/'); + if (e) { + str = strndup(p, e - p); + if (!str) + return -ENOMEM; + + l = str; + } else + l = p; + + r = in_addr_from_string(family, l, &buffer); + if (r < 0) + return r; + + if (e) { + r = in_addr_parse_prefixlen(family, e+1, &k); + if (r < 0) + return r; + } else + k = FAMILY_ADDRESS_SIZE(family) * 8; + + if (ret_prefix) + *ret_prefix = buffer; + if (ret_prefixlen) + *ret_prefixlen = k; + + return 0; +} + +int in_addr_prefix_from_string_auto_internal( + const char *p, + InAddrPrefixLenMode mode, + int *ret_family, + union in_addr_union *ret_prefix, + unsigned char *ret_prefixlen) { + + _cleanup_free_ char *str = NULL; + union in_addr_union buffer; + const char *e, *l; + unsigned char k; + int family, r; + + assert(p); + + e = strchr(p, '/'); + if (e) { + str = strndup(p, e - p); + if (!str) + return -ENOMEM; + + l = str; + } else + l = p; + + r = in_addr_from_string_auto(l, &family, &buffer); + if (r < 0) + return r; + + if (e) { + r = in_addr_parse_prefixlen(family, e+1, &k); + if (r < 0) + return r; + } else + switch (mode) { + case PREFIXLEN_FULL: + k = FAMILY_ADDRESS_SIZE(family) * 8; + break; + case PREFIXLEN_REFUSE: + return -ENOANO; /* To distinguish this error from others. */ + case PREFIXLEN_LEGACY: + if (family == AF_INET) { + r = in4_addr_default_prefixlen(&buffer.in, &k); + if (r < 0) + return r; + } else + k = 0; + break; + default: + assert_not_reached("Invalid prefixlen mode"); + } + + if (ret_family) + *ret_family = family; + if (ret_prefix) + *ret_prefix = buffer; + if (ret_prefixlen) + *ret_prefixlen = k; + + return 0; + +} + +static void in_addr_data_hash_func(const struct in_addr_data *a, struct siphash *state) { + siphash24_compress(&a->family, sizeof(a->family), state); + siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state); +} + +static int in_addr_data_compare_func(const struct in_addr_data *x, const struct in_addr_data *y) { + int r; + + r = CMP(x->family, y->family); + if (r != 0) + return r; + + return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family)); +} + +DEFINE_HASH_OPS(in_addr_data_hash_ops, struct in_addr_data, in_addr_data_hash_func, in_addr_data_compare_func); diff --git a/src/basic/in-addr-util.h b/src/basic/in-addr-util.h new file mode 100644 index 0000000..3069790 --- /dev/null +++ b/src/basic/in-addr-util.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <netinet/in.h> +#include <stddef.h> +#include <sys/socket.h> + +#include "hash-funcs.h" +#include "macro.h" +#include "util.h" + +union in_addr_union { + struct in_addr in; + struct in6_addr in6; +}; + +struct in_addr_data { + int family; + union in_addr_union address; +}; + +bool in4_addr_is_null(const struct in_addr *a); +int in_addr_is_null(int family, const union in_addr_union *u); + +int in_addr_is_multicast(int family, const union in_addr_union *u); + +bool in4_addr_is_link_local(const struct in_addr *a); +int in_addr_is_link_local(int family, const union in_addr_union *u); + +bool in4_addr_is_localhost(const struct in_addr *a); +int in_addr_is_localhost(int family, const union in_addr_union *u); + +int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b); +int in_addr_prefix_intersect(int family, const union in_addr_union *a, unsigned aprefixlen, const union in_addr_union *b, unsigned bprefixlen); +int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen); +int in_addr_to_string(int family, const union in_addr_union *u, char **ret); +int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret); +int in_addr_from_string(int family, const char *s, union in_addr_union *ret); +int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret); +int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex); +unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr); +struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen); +int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen); +int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask); +int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen); +int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address); +int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret); +int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen); + +typedef enum InAddrPrefixLenMode { + PREFIXLEN_FULL, /* Default to prefixlen of address size, 32 for IPv4 or 128 for IPv6, if not specified. */ + PREFIXLEN_REFUSE, /* Fail with -ENOANO if prefixlen is not specified. */ + PREFIXLEN_LEGACY, /* Default to legacy default prefixlen calculation from address if not specified. */ +} InAddrPrefixLenMode; + +int in_addr_prefix_from_string_auto_internal(const char *p, InAddrPrefixLenMode mode, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen); +static inline int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen) { + return in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_FULL, ret_family, ret_prefix, ret_prefixlen); +} + +static inline size_t FAMILY_ADDRESS_SIZE(int family) { + assert(IN_SET(family, AF_INET, AF_INET6)); + return family == AF_INET6 ? 16 : 4; +} + +/* Workaround for clang, explicitly specify the maximum-size element here. + * See also oss-fuzz#11344. */ +#define IN_ADDR_NULL ((union in_addr_union) { .in6 = {} }) + +extern const struct hash_ops in_addr_data_hash_ops; diff --git a/src/basic/io-util.c b/src/basic/io-util.c new file mode 100644 index 0000000..575398f --- /dev/null +++ b/src/basic/io-util.c @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <limits.h> +#include <poll.h> +#include <stdio.h> +#include <time.h> +#include <unistd.h> + +#include "io-util.h" +#include "string-util.h" +#include "time-util.h" + +int flush_fd(int fd) { + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN, + }; + int count = 0; + + /* Read from the specified file descriptor, until POLLIN is not set anymore, throwing away everything + * read. Note that some file descriptors (notable IP sockets) will trigger POLLIN even when no data can be read + * (due to IP packet checksum mismatches), hence this function is only safe to be non-blocking if the fd used + * was set to non-blocking too. */ + + for (;;) { + char buf[LINE_MAX]; + ssize_t l; + int r; + + r = poll(&pollfd, 1, 0); + if (r < 0) { + if (errno == EINTR) + continue; + + return -errno; + + } else if (r == 0) + return count; + + l = read(fd, buf, sizeof(buf)); + if (l < 0) { + + if (errno == EINTR) + continue; + + if (errno == EAGAIN) + return count; + + return -errno; + } else if (l == 0) + return count; + + count += (int) l; + } +} + +ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) { + uint8_t *p = buf; + ssize_t n = 0; + + assert(fd >= 0); + assert(buf); + + /* If called with nbytes == 0, let's call read() at least + * once, to validate the operation */ + + if (nbytes > (size_t) SSIZE_MAX) + return -EINVAL; + + do { + ssize_t k; + + k = read(fd, p, nbytes); + if (k < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN && do_poll) { + + /* We knowingly ignore any return value here, + * and expect that any error/EOF is reported + * via read() */ + + (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY); + continue; + } + + return n > 0 ? n : -errno; + } + + if (k == 0) + return n; + + assert((size_t) k <= nbytes); + + p += k; + nbytes -= k; + n += k; + } while (nbytes > 0); + + return n; +} + +int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll) { + ssize_t n; + + n = loop_read(fd, buf, nbytes, do_poll); + if (n < 0) + return (int) n; + if ((size_t) n != nbytes) + return -EIO; + + return 0; +} + +int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) { + const uint8_t *p = buf; + + assert(fd >= 0); + assert(buf); + + if (_unlikely_(nbytes > (size_t) SSIZE_MAX)) + return -EINVAL; + + do { + ssize_t k; + + k = write(fd, p, nbytes); + if (k < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN && do_poll) { + /* We knowingly ignore any return value here, + * and expect that any error/EOF is reported + * via write() */ + + (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY); + continue; + } + + return -errno; + } + + if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */ + return -EIO; + + assert((size_t) k <= nbytes); + + p += k; + nbytes -= k; + } while (nbytes > 0); + + return 0; +} + +int pipe_eof(int fd) { + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN|POLLHUP, + }; + + int r; + + r = poll(&pollfd, 1, 0); + if (r < 0) + return -errno; + + if (r == 0) + return 0; + + return pollfd.revents & POLLHUP; +} + +int fd_wait_for_event(int fd, int event, usec_t t) { + + struct pollfd pollfd = { + .fd = fd, + .events = event, + }; + + struct timespec ts; + int r; + + r = ppoll(&pollfd, 1, t == USEC_INFINITY ? NULL : timespec_store(&ts, t), NULL); + if (r < 0) + return -errno; + if (r == 0) + return 0; + + return pollfd.revents; +} + +static size_t nul_length(const uint8_t *p, size_t sz) { + size_t n = 0; + + while (sz > 0) { + if (*p != 0) + break; + + n++; + p++; + sz--; + } + + return n; +} + +ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length) { + const uint8_t *q, *w, *e; + ssize_t l; + + q = w = p; + e = q + sz; + while (q < e) { + size_t n; + + n = nul_length(q, e - q); + + /* If there are more than the specified run length of + * NUL bytes, or if this is the beginning or the end + * of the buffer, then seek instead of write */ + if ((n > run_length) || + (n > 0 && q == p) || + (n > 0 && q + n >= e)) { + if (q > w) { + l = write(fd, w, q - w); + if (l < 0) + return -errno; + if (l != q -w) + return -EIO; + } + + if (lseek(fd, n, SEEK_CUR) == (off_t) -1) + return -errno; + + q += n; + w = q; + } else if (n > 0) + q += n; + else + q++; + } + + if (q > w) { + l = write(fd, w, q - w); + if (l < 0) + return -errno; + if (l != q - w) + return -EIO; + } + + return q - (const uint8_t*) p; +} + +char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value) { + char *x; + + x = strappend(field, value); + if (x) + iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x); + return x; +} diff --git a/src/basic/io-util.h b/src/basic/io-util.h new file mode 100644 index 0000000..792a64a --- /dev/null +++ b/src/basic/io-util.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include "macro.h" +#include "time-util.h" + +int flush_fd(int fd); + +ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll); +int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll); +int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll); + +int pipe_eof(int fd); + +int fd_wait_for_event(int fd, int event, usec_t timeout); + +ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length); + +static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, size_t n) { + size_t j, r = 0; + + for (j = 0; j < n; j++) + r += i[j].iov_len; + + return r; +} + +static inline size_t IOVEC_INCREMENT(struct iovec *i, size_t n, size_t k) { + size_t j; + + for (j = 0; j < n; j++) { + size_t sub; + + if (_unlikely_(k <= 0)) + break; + + sub = MIN(i[j].iov_len, k); + i[j].iov_len -= sub; + i[j].iov_base = (uint8_t*) i[j].iov_base + sub; + k -= sub; + } + + return k; +} + +static inline bool FILE_SIZE_VALID(uint64_t l) { + /* ftruncate() and friends take an unsigned file size, but actually cannot deal with file sizes larger than + * 2^63 since the kernel internally handles it as signed value. This call allows checking for this early. */ + + return (l >> 63) == 0; +} + +static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) { + + /* Same as above, but allows one extra value: -1 as indication for infinity. */ + + if (l == (uint64_t) -1) + return true; + + return FILE_SIZE_VALID(l); + +} + +#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) } +#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len) +#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string)) +#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string) + +char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value); diff --git a/src/basic/ioprio.h b/src/basic/ioprio.h new file mode 100644 index 0000000..3fb168d --- /dev/null +++ b/src/basic/ioprio.h @@ -0,0 +1,56 @@ +#ifndef IOPRIO_H +#define IOPRIO_H + +/* This is minimal version of Linux' linux/ioprio.h header file, which + * is licensed GPL2 */ + +#include <sys/syscall.h> +#include <unistd.h> + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_BITS 16 +#define IOPRIO_N_CLASSES 8 +#define IOPRIO_CLASS_SHIFT 13 +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +static inline int ioprio_set(int which, int who, int ioprio) { + return syscall(__NR_ioprio_set, which, who, ioprio); +} + +static inline int ioprio_get(int which, int who) { + return syscall(__NR_ioprio_get, which, who); +} + +#endif diff --git a/src/basic/khash.c b/src/basic/khash.c new file mode 100644 index 0000000..847f257 --- /dev/null +++ b/src/basic/khash.c @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <linux/if_alg.h> +#include <stdbool.h> +#include <sys/socket.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "hexdecoct.h" +#include "khash.h" +#include "macro.h" +#include "missing.h" +#include "string-util.h" +#include "util.h" + +/* On current kernels the maximum digest (according to "grep digestsize /proc/crypto | sort -u") is actually 32, but + * let's add some extra room, the few wasted bytes don't really matter... */ +#define LONGEST_DIGEST 128 + +struct khash { + int fd; + char *algorithm; + uint8_t digest[LONGEST_DIGEST+1]; + size_t digest_size; + bool digest_valid; +}; + +int khash_supported(void) { + static const union { + struct sockaddr sa; + struct sockaddr_alg alg; + } sa = { + .alg.salg_family = AF_ALG, + .alg.salg_type = "hash", + .alg.salg_name = "sha256", /* a very common algorithm */ + }; + + static int cached = -1; + + if (cached < 0) { + _cleanup_close_ int fd1 = -1, fd2 = -1; + uint8_t buf[LONGEST_DIGEST+1]; + + fd1 = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); + if (fd1 < 0) { + /* The kernel returns EAFNOSUPPORT if AF_ALG is not supported at all */ + if (IN_SET(errno, EAFNOSUPPORT, EOPNOTSUPP)) + return (cached = false); + + return -errno; + } + + if (bind(fd1, &sa.sa, sizeof(sa)) < 0) { + /* The kernel returns ENOENT if the selected algorithm is not supported at all. We use a check + * for SHA256 as a proxy for whether the whole API is supported at all. After all it's one of + * the most common hash functions, and if it isn't supported, that's ample indication that + * something is really off. */ + + if (IN_SET(errno, ENOENT, EOPNOTSUPP)) + return (cached = false); + + return -errno; + } + + fd2 = accept4(fd1, NULL, 0, SOCK_CLOEXEC); + if (fd2 < 0) { + if (errno == EOPNOTSUPP) + return (cached = false); + + return -errno; + } + + if (recv(fd2, buf, sizeof(buf), 0) < 0) { + /* On some kernels we get ENOKEY for non-keyed hash functions (such as sha256), let's refuse + * using the API in those cases, since the kernel is + * broken. https://github.com/systemd/systemd/issues/8278 */ + + if (IN_SET(errno, ENOKEY, EOPNOTSUPP)) + return (cached = false); + } + + cached = true; + } + + return cached; +} + +int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size) { + union { + struct sockaddr sa; + struct sockaddr_alg alg; + } sa = { + .alg.salg_family = AF_ALG, + .alg.salg_type = "hash", + }; + + _cleanup_(khash_unrefp) khash *h = NULL; + _cleanup_close_ int fd = -1; + int supported; + ssize_t n; + + assert(ret); + assert(key || key_size == 0); + + /* Filter out an empty algorithm early, as we do not support an algorithm by that name. */ + if (isempty(algorithm)) + return -EINVAL; + + /* Overly long hash algorithm names we definitely do not support */ + if (strlen(algorithm) >= sizeof(sa.alg.salg_name)) + return -EOPNOTSUPP; + + supported = khash_supported(); + if (supported < 0) + return supported; + if (supported == 0) + return -EOPNOTSUPP; + + fd = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + strcpy((char*) sa.alg.salg_name, algorithm); + if (bind(fd, &sa.sa, sizeof(sa)) < 0) { + if (errno == ENOENT) + return -EOPNOTSUPP; + return -errno; + } + + if (key) { + if (setsockopt(fd, SOL_ALG, ALG_SET_KEY, key, key_size) < 0) + return -errno; + } + + h = new0(khash, 1); + if (!h) + return -ENOMEM; + + h->fd = accept4(fd, NULL, 0, SOCK_CLOEXEC); + if (h->fd < 0) + return -errno; + + h->algorithm = strdup(algorithm); + if (!h->algorithm) + return -ENOMEM; + + /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */ + (void) send(h->fd, NULL, 0, 0); + + /* Figure out the digest size */ + n = recv(h->fd, h->digest, sizeof(h->digest), 0); + if (n < 0) + return -errno; + if (n >= LONGEST_DIGEST) /* longer than what we expected? If so, we don't support this */ + return -EOPNOTSUPP; + + h->digest_size = (size_t) n; + h->digest_valid = true; + + /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */ + (void) send(h->fd, NULL, 0, 0); + + *ret = h; + h = NULL; + + return 0; +} + +int khash_new(khash **ret, const char *algorithm) { + return khash_new_with_key(ret, algorithm, NULL, 0); +} + +khash* khash_unref(khash *h) { + if (!h) + return NULL; + + safe_close(h->fd); + free(h->algorithm); + return mfree(h); +} + +int khash_dup(khash *h, khash **ret) { + _cleanup_(khash_unrefp) khash *copy = NULL; + + assert(h); + assert(ret); + + copy = newdup(khash, h, 1); + if (!copy) + return -ENOMEM; + + copy->fd = -1; + copy->algorithm = strdup(h->algorithm); + if (!copy->algorithm) + return -ENOMEM; + + copy->fd = accept4(h->fd, NULL, 0, SOCK_CLOEXEC); + if (copy->fd < 0) + return -errno; + + *ret = TAKE_PTR(copy); + + return 0; +} + +const char *khash_get_algorithm(khash *h) { + assert(h); + + return h->algorithm; +} + +size_t khash_get_size(khash *h) { + assert(h); + + return h->digest_size; +} + +int khash_reset(khash *h) { + ssize_t n; + + assert(h); + + n = send(h->fd, NULL, 0, 0); + if (n < 0) + return -errno; + + h->digest_valid = false; + + return 0; +} + +int khash_put(khash *h, const void *buffer, size_t size) { + ssize_t n; + + assert(h); + assert(buffer || size == 0); + + if (size <= 0) + return 0; + + n = send(h->fd, buffer, size, MSG_MORE); + if (n < 0) + return -errno; + + h->digest_valid = false; + + return 0; +} + +int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n) { + struct msghdr mh = { + mh.msg_iov = (struct iovec*) iovec, + mh.msg_iovlen = n, + }; + ssize_t k; + + assert(h); + assert(iovec || n == 0); + + if (n <= 0) + return 0; + + k = sendmsg(h->fd, &mh, MSG_MORE); + if (k < 0) + return -errno; + + h->digest_valid = false; + + return 0; +} + +static int retrieve_digest(khash *h) { + ssize_t n; + + assert(h); + + if (h->digest_valid) + return 0; + + n = recv(h->fd, h->digest, h->digest_size, 0); + if (n < 0) + return n; + if ((size_t) n != h->digest_size) /* digest size changed? */ + return -EIO; + + h->digest_valid = true; + + return 0; +} + +int khash_digest_data(khash *h, const void **ret) { + int r; + + assert(h); + assert(ret); + + r = retrieve_digest(h); + if (r < 0) + return r; + + *ret = h->digest; + return 0; +} + +int khash_digest_string(khash *h, char **ret) { + int r; + char *p; + + assert(h); + assert(ret); + + r = retrieve_digest(h); + if (r < 0) + return r; + + p = hexmem(h->digest, h->digest_size); + if (!p) + return -ENOMEM; + + *ret = p; + return 0; +} diff --git a/src/basic/khash.h b/src/basic/khash.h new file mode 100644 index 0000000..a3013b9 --- /dev/null +++ b/src/basic/khash.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include "macro.h" + +typedef struct khash khash; + +int khash_supported(void); + +/* For plain hash functions. Hash functions commonly supported on today's kernels are: crc32c, crct10dif, crc32, + * sha224, sha256, sha512, sha384, sha1, md5, md4, sha3-224, sha3-256, sha3-384, sha3-512, and more. */ +int khash_new(khash **ret, const char *algorithm); + +/* For keyed hash functions. Hash functions commonly supported on today's kernels are: hmac(sha256), cmac(aes), + * cmac(des3_ede), hmac(sha3-512), hmac(sha3-384), hmac(sha3-256), hmac(sha3-224), hmac(rmd160), hmac(rmd128), + * hmac(sha224), hmac(sha512), hmac(sha384), hmac(sha1), hmac(md5), and more. */ +int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size); + +int khash_dup(khash *h, khash **ret); +khash* khash_unref(khash *h); + +const char *khash_get_algorithm(khash *h); +size_t khash_get_size(khash *h); + +int khash_reset(khash *h); + +int khash_put(khash *h, const void *buffer, size_t size); +int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n); + +int khash_digest_data(khash *h, const void **ret); +int khash_digest_string(khash *h, char **ret); + +DEFINE_TRIVIAL_CLEANUP_FUNC(khash*, khash_unref); diff --git a/src/basic/label.c b/src/basic/label.c new file mode 100644 index 0000000..12a7fb0 --- /dev/null +++ b/src/basic/label.c @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "btrfs-util.h" +#include "label.h" +#include "macro.h" +#include "selinux-util.h" +#include "smack-util.h" + +int label_fix(const char *path, LabelFixFlags flags) { + int r, q; + + r = mac_selinux_fix(path, flags); + q = mac_smack_fix(path, flags); + + if (r < 0) + return r; + if (q < 0) + return q; + + return 0; +} + +int symlink_label(const char *old_path, const char *new_path) { + int r; + + assert(old_path); + assert(new_path); + + r = mac_selinux_create_file_prepare(new_path, S_IFLNK); + if (r < 0) + return r; + + if (symlink(old_path, new_path) < 0) + r = -errno; + + mac_selinux_create_file_clear(); + + if (r < 0) + return r; + + return mac_smack_fix(new_path, 0); +} + +int btrfs_subvol_make_label(const char *path) { + int r; + + assert(path); + + r = mac_selinux_create_file_prepare(path, S_IFDIR); + if (r < 0) + return r; + + r = btrfs_subvol_make(path); + mac_selinux_create_file_clear(); + + if (r < 0) + return r; + + return mac_smack_fix(path, 0); +} diff --git a/src/basic/label.h b/src/basic/label.h new file mode 100644 index 0000000..594fd65 --- /dev/null +++ b/src/basic/label.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +typedef enum LabelFixFlags { + LABEL_IGNORE_ENOENT = 1 << 0, + LABEL_IGNORE_EROFS = 1 << 1, +} LabelFixFlags; + +int label_fix(const char *path, LabelFixFlags flags); + +int mkdir_label(const char *path, mode_t mode); +int mkdirat_label(int dirfd, const char *path, mode_t mode); +int symlink_label(const char *old_path, const char *new_path); + +int btrfs_subvol_make_label(const char *path); diff --git a/src/basic/list.h b/src/basic/list.h new file mode 100644 index 0000000..f7f9700 --- /dev/null +++ b/src/basic/list.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "macro.h" + +/* The head of the linked list. Use this in the structure that shall + * contain the head of the linked list */ +#define LIST_HEAD(t,name) \ + t *name + +/* The pointers in the linked list's items. Use this in the item structure */ +#define LIST_FIELDS(t,name) \ + t *name##_next, *name##_prev + +/* Initialize the list's head */ +#define LIST_HEAD_INIT(head) \ + do { \ + (head) = NULL; \ + } while (false) + +/* Initialize a list item */ +#define LIST_INIT(name,item) \ + do { \ + typeof(*(item)) *_item = (item); \ + assert(_item); \ + _item->name##_prev = _item->name##_next = NULL; \ + } while (false) + +/* Prepend an item to the list */ +#define LIST_PREPEND(name,head,item) \ + do { \ + typeof(*(head)) **_head = &(head), *_item = (item); \ + assert(_item); \ + if ((_item->name##_next = *_head)) \ + _item->name##_next->name##_prev = _item; \ + _item->name##_prev = NULL; \ + *_head = _item; \ + } while (false) + +/* Append an item to the list */ +#define LIST_APPEND(name,head,item) \ + do { \ + typeof(*(head)) **_hhead = &(head), *_tail; \ + LIST_FIND_TAIL(name, *_hhead, _tail); \ + LIST_INSERT_AFTER(name, *_hhead, _tail, item); \ + } while (false) + +/* Remove an item from the list */ +#define LIST_REMOVE(name,head,item) \ + do { \ + typeof(*(head)) **_head = &(head), *_item = (item); \ + assert(_item); \ + if (_item->name##_next) \ + _item->name##_next->name##_prev = _item->name##_prev; \ + if (_item->name##_prev) \ + _item->name##_prev->name##_next = _item->name##_next; \ + else { \ + assert(*_head == _item); \ + *_head = _item->name##_next; \ + } \ + _item->name##_next = _item->name##_prev = NULL; \ + } while (false) + +/* Find the head of the list */ +#define LIST_FIND_HEAD(name,item,head) \ + do { \ + typeof(*(item)) *_item = (item); \ + if (!_item) \ + (head) = NULL; \ + else { \ + while (_item->name##_prev) \ + _item = _item->name##_prev; \ + (head) = _item; \ + } \ + } while (false) + +/* Find the tail of the list */ +#define LIST_FIND_TAIL(name,item,tail) \ + do { \ + typeof(*(item)) *_item = (item); \ + if (!_item) \ + (tail) = NULL; \ + else { \ + while (_item->name##_next) \ + _item = _item->name##_next; \ + (tail) = _item; \ + } \ + } while (false) + +/* Insert an item after another one (a = where, b = what) */ +#define LIST_INSERT_AFTER(name,head,a,b) \ + do { \ + typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \ + assert(_b); \ + if (!_a) { \ + if ((_b->name##_next = *_head)) \ + _b->name##_next->name##_prev = _b; \ + _b->name##_prev = NULL; \ + *_head = _b; \ + } else { \ + if ((_b->name##_next = _a->name##_next)) \ + _b->name##_next->name##_prev = _b; \ + _b->name##_prev = _a; \ + _a->name##_next = _b; \ + } \ + } while (false) + +/* Insert an item before another one (a = where, b = what) */ +#define LIST_INSERT_BEFORE(name,head,a,b) \ + do { \ + typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \ + assert(_b); \ + if (!_a) { \ + if (!*_head) { \ + _b->name##_next = NULL; \ + _b->name##_prev = NULL; \ + *_head = _b; \ + } else { \ + typeof(*(head)) *_tail = (head); \ + while (_tail->name##_next) \ + _tail = _tail->name##_next; \ + _b->name##_next = NULL; \ + _b->name##_prev = _tail; \ + _tail->name##_next = _b; \ + } \ + } else { \ + if ((_b->name##_prev = _a->name##_prev)) \ + _b->name##_prev->name##_next = _b; \ + else \ + *_head = _b; \ + _b->name##_next = _a; \ + _a->name##_prev = _b; \ + } \ + } while (false) + +#define LIST_JUST_US(name,item) \ + (!(item)->name##_prev && !(item)->name##_next) \ + +#define LIST_FOREACH(name,i,head) \ + for ((i) = (head); (i); (i) = (i)->name##_next) + +#define LIST_FOREACH_SAFE(name,i,n,head) \ + for ((i) = (head); (i) && (((n) = (i)->name##_next), 1); (i) = (n)) + +#define LIST_FOREACH_BEFORE(name,i,p) \ + for ((i) = (p)->name##_prev; (i); (i) = (i)->name##_prev) + +#define LIST_FOREACH_AFTER(name,i,p) \ + for ((i) = (p)->name##_next; (i); (i) = (i)->name##_next) + +/* Iterate through all the members of the list p is included in, but skip over p */ +#define LIST_FOREACH_OTHERS(name,i,p) \ + for (({ \ + (i) = (p); \ + while ((i) && (i)->name##_prev) \ + (i) = (i)->name##_prev; \ + if ((i) == (p)) \ + (i) = (p)->name##_next; \ + }); \ + (i); \ + (i) = (i)->name##_next == (p) ? (p)->name##_next : (i)->name##_next) + +/* Loop starting from p->next until p->prev. + p can be adjusted meanwhile. */ +#define LIST_LOOP_BUT_ONE(name,i,head,p) \ + for ((i) = (p)->name##_next ? (p)->name##_next : (head); \ + (i) != (p); \ + (i) = (i)->name##_next ? (i)->name##_next : (head)) + +#define LIST_IS_EMPTY(head) \ + (!(head)) diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c new file mode 100644 index 0000000..fc1577a --- /dev/null +++ b/src/basic/locale-util.c @@ -0,0 +1,460 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <ftw.h> +#include <langinfo.h> +#include <libintl.h> +#include <locale.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "def.h" +#include "dirent-util.h" +#include "env-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "locale-util.h" +#include "path-util.h" +#include "set.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +static int add_locales_from_archive(Set *locales) { + /* Stolen from glibc... */ + + struct locarhead { + uint32_t magic; + /* Serial number. */ + uint32_t serial; + /* Name hash table. */ + uint32_t namehash_offset; + uint32_t namehash_used; + uint32_t namehash_size; + /* String table. */ + uint32_t string_offset; + uint32_t string_used; + uint32_t string_size; + /* Table with locale records. */ + uint32_t locrectab_offset; + uint32_t locrectab_used; + uint32_t locrectab_size; + /* MD5 sum hash table. */ + uint32_t sumhash_offset; + uint32_t sumhash_used; + uint32_t sumhash_size; + }; + + struct namehashent { + /* Hash value of the name. */ + uint32_t hashval; + /* Offset of the name in the string table. */ + uint32_t name_offset; + /* Offset of the locale record. */ + uint32_t locrec_offset; + }; + + const struct locarhead *h; + const struct namehashent *e; + const void *p = MAP_FAILED; + _cleanup_close_ int fd = -1; + size_t sz = 0; + struct stat st; + size_t i; + int r; + + fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return errno == ENOENT ? 0 : -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISREG(st.st_mode)) + return -EBADMSG; + + if (st.st_size < (off_t) sizeof(struct locarhead)) + return -EBADMSG; + + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + return -errno; + + h = (const struct locarhead *) p; + if (h->magic != 0xde020109 || + h->namehash_offset + h->namehash_size > st.st_size || + h->string_offset + h->string_size > st.st_size || + h->locrectab_offset + h->locrectab_size > st.st_size || + h->sumhash_offset + h->sumhash_size > st.st_size) { + r = -EBADMSG; + goto finish; + } + + e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset); + for (i = 0; i < h->namehash_size; i++) { + char *z; + + if (e[i].locrec_offset == 0) + continue; + + if (!utf8_is_valid((char*) p + e[i].name_offset)) + continue; + + z = strdup((char*) p + e[i].name_offset); + if (!z) { + r = -ENOMEM; + goto finish; + } + + r = set_consume(locales, z); + if (r < 0) + goto finish; + } + + r = 0; + + finish: + if (p != MAP_FAILED) + munmap((void*) p, sz); + + return r; +} + +static int add_locales_from_libdir (Set *locales) { + _cleanup_closedir_ DIR *dir = NULL; + struct dirent *entry; + int r; + + dir = opendir("/usr/lib/locale"); + if (!dir) + return errno == ENOENT ? 0 : -errno; + + FOREACH_DIRENT(entry, dir, return -errno) { + char *z; + + dirent_ensure_type(dir, entry); + + if (entry->d_type != DT_DIR) + continue; + + z = strdup(entry->d_name); + if (!z) + return -ENOMEM; + + r = set_consume(locales, z); + if (r < 0 && r != -EEXIST) + return r; + } + + return 0; +} + +int get_locales(char ***ret) { + _cleanup_set_free_ Set *locales = NULL; + _cleanup_strv_free_ char **l = NULL; + int r; + + locales = set_new(&string_hash_ops); + if (!locales) + return -ENOMEM; + + r = add_locales_from_archive(locales); + if (r < 0 && r != -ENOENT) + return r; + + r = add_locales_from_libdir(locales); + if (r < 0) + return r; + + l = set_get_strv(locales); + if (!l) + return -ENOMEM; + + strv_sort(l); + + *ret = TAKE_PTR(l); + + return 0; +} + +bool locale_is_valid(const char *name) { + + if (isempty(name)) + return false; + + if (strlen(name) >= 128) + return false; + + if (!utf8_is_valid(name)) + return false; + + if (!filename_is_valid(name)) + return false; + + if (!string_is_safe(name)) + return false; + + return true; +} + +void init_gettext(void) { + setlocale(LC_ALL, ""); + textdomain(GETTEXT_PACKAGE); +} + +bool is_locale_utf8(void) { + const char *set; + static int cached_answer = -1; + + /* Note that we default to 'true' here, since today UTF8 is + * pretty much supported everywhere. */ + + if (cached_answer >= 0) + goto out; + + if (!setlocale(LC_ALL, "")) { + cached_answer = true; + goto out; + } + + set = nl_langinfo(CODESET); + if (!set) { + cached_answer = true; + goto out; + } + + if (streq(set, "UTF-8")) { + cached_answer = true; + goto out; + } + + /* For LC_CTYPE=="C" return true, because CTYPE is effectly + * unset and everything can do to UTF-8 nowadays. */ + set = setlocale(LC_CTYPE, NULL); + if (!set) { + cached_answer = true; + goto out; + } + + /* Check result, but ignore the result if C was set + * explicitly. */ + cached_answer = + STR_IN_SET(set, "C", "POSIX") && + !getenv("LC_ALL") && + !getenv("LC_CTYPE") && + !getenv("LANG"); + +out: + return (bool) cached_answer; +} + +static thread_local Set *keymaps = NULL; + +static int nftw_cb( + const char *fpath, + const struct stat *sb, + int tflag, + struct FTW *ftwbuf) { + + char *p, *e; + int r; + + if (tflag != FTW_F) + return 0; + + if (!endswith(fpath, ".map") && + !endswith(fpath, ".map.gz")) + return 0; + + p = strdup(basename(fpath)); + if (!p) + return FTW_STOP; + + e = endswith(p, ".map"); + if (e) + *e = 0; + + e = endswith(p, ".map.gz"); + if (e) + *e = 0; + + r = set_consume(keymaps, p); + if (r < 0 && r != -EEXIST) + return r; + + return 0; +} + +int get_keymaps(char ***ret) { + _cleanup_strv_free_ char **l = NULL; + const char *dir; + int r; + + keymaps = set_new(&string_hash_ops); + if (!keymaps) + return -ENOMEM; + + NULSTR_FOREACH(dir, KBD_KEYMAP_DIRS) { + r = nftw(dir, nftw_cb, 20, FTW_PHYS|FTW_ACTIONRETVAL); + + if (r == FTW_STOP) + log_debug("Directory not found %s", dir); + else if (r < 0) + log_debug_errno(r, "Can't add keymap: %m"); + } + + l = set_get_strv(keymaps); + if (!l) { + set_free_free(keymaps); + return -ENOMEM; + } + + set_free(keymaps); + + if (strv_isempty(l)) + return -ENOENT; + + strv_sort(l); + + *ret = TAKE_PTR(l); + + return 0; +} + +bool keymap_is_valid(const char *name) { + + if (isempty(name)) + return false; + + if (strlen(name) >= 128) + return false; + + if (!utf8_is_valid(name)) + return false; + + if (!filename_is_valid(name)) + return false; + + if (!string_is_safe(name)) + return false; + + return true; +} + +static bool emoji_enabled(void) { + static int cached_emoji_enabled = -1; + + if (cached_emoji_enabled < 0) { + int val; + + val = getenv_bool("SYSTEMD_EMOJI"); + if (val < 0) + cached_emoji_enabled = + is_locale_utf8() && + !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux"); + else + cached_emoji_enabled = val; + } + + return cached_emoji_enabled; +} + +const char *special_glyph(SpecialGlyph code) { + + /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be + * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still + * works reasonably well on the Linux console. For details see: + * + * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr + */ + + static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = { + /* ASCII fallback */ + [false] = { + [SPECIAL_GLYPH_TREE_VERTICAL] = "| ", + [SPECIAL_GLYPH_TREE_BRANCH] = "|-", + [SPECIAL_GLYPH_TREE_RIGHT] = "`-", + [SPECIAL_GLYPH_TREE_SPACE] = " ", + [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">", + [SPECIAL_GLYPH_BLACK_CIRCLE] = "*", + [SPECIAL_GLYPH_BULLET] = "*", + [SPECIAL_GLYPH_ARROW] = "->", + [SPECIAL_GLYPH_MDASH] = "-", + [SPECIAL_GLYPH_ELLIPSIS] = "...", + [SPECIAL_GLYPH_MU] = "u", + [SPECIAL_GLYPH_CHECK_MARK] = "+", + [SPECIAL_GLYPH_CROSS_MARK] = "-", + [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]", + [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}", + [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)", + [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|", + [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(", + [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{️", + [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[", + }, + + /* UTF-8 */ + [true] = { + [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */ + [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */ + [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */ + [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */ + [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */ + [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */ + [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */ + [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → */ + [SPECIAL_GLYPH_MDASH] = "\342\200\223", /* – */ + [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … */ + [SPECIAL_GLYPH_MU] = "\316\274", /* μ */ + [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */ + [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ */ + [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 */ + [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 */ + [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 */ + [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 */ + [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 */ + [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨️️ */ + [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 */ + }, + }; + + assert(code < _SPECIAL_GLYPH_MAX); + + return draw_table[code >= _SPECIAL_GLYPH_FIRST_SMILEY ? emoji_enabled() : is_locale_utf8()][code]; +} + +void locale_variables_free(char *l[_VARIABLE_LC_MAX]) { + LocaleVariable i; + + if (!l) + return; + + for (i = 0; i < _VARIABLE_LC_MAX; i++) + l[i] = mfree(l[i]); +} + +static const char * const locale_variable_table[_VARIABLE_LC_MAX] = { + [VARIABLE_LANG] = "LANG", + [VARIABLE_LANGUAGE] = "LANGUAGE", + [VARIABLE_LC_CTYPE] = "LC_CTYPE", + [VARIABLE_LC_NUMERIC] = "LC_NUMERIC", + [VARIABLE_LC_TIME] = "LC_TIME", + [VARIABLE_LC_COLLATE] = "LC_COLLATE", + [VARIABLE_LC_MONETARY] = "LC_MONETARY", + [VARIABLE_LC_MESSAGES] = "LC_MESSAGES", + [VARIABLE_LC_PAPER] = "LC_PAPER", + [VARIABLE_LC_NAME] = "LC_NAME", + [VARIABLE_LC_ADDRESS] = "LC_ADDRESS", + [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE", + [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT", + [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION" +}; + +DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable); diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h new file mode 100644 index 0000000..e64f0ce --- /dev/null +++ b/src/basic/locale-util.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <libintl.h> +#include <stdbool.h> +#include <locale.h> + +#include "macro.h" + +typedef enum LocaleVariable { + /* We don't list LC_ALL here on purpose. People should be + * using LANG instead. */ + + VARIABLE_LANG, + VARIABLE_LANGUAGE, + VARIABLE_LC_CTYPE, + VARIABLE_LC_NUMERIC, + VARIABLE_LC_TIME, + VARIABLE_LC_COLLATE, + VARIABLE_LC_MONETARY, + VARIABLE_LC_MESSAGES, + VARIABLE_LC_PAPER, + VARIABLE_LC_NAME, + VARIABLE_LC_ADDRESS, + VARIABLE_LC_TELEPHONE, + VARIABLE_LC_MEASUREMENT, + VARIABLE_LC_IDENTIFICATION, + _VARIABLE_LC_MAX, + _VARIABLE_LC_INVALID = -1 +} LocaleVariable; + +int get_locales(char ***l); +bool locale_is_valid(const char *name); + +#define _(String) gettext(String) +#define N_(String) String +void init_gettext(void); + +bool is_locale_utf8(void); + +typedef enum { + SPECIAL_GLYPH_TREE_VERTICAL, + SPECIAL_GLYPH_TREE_BRANCH, + SPECIAL_GLYPH_TREE_RIGHT, + SPECIAL_GLYPH_TREE_SPACE, + SPECIAL_GLYPH_TRIANGULAR_BULLET, + SPECIAL_GLYPH_BLACK_CIRCLE, + SPECIAL_GLYPH_BULLET, + SPECIAL_GLYPH_ARROW, + SPECIAL_GLYPH_MDASH, + SPECIAL_GLYPH_ELLIPSIS, + SPECIAL_GLYPH_MU, + SPECIAL_GLYPH_CHECK_MARK, + SPECIAL_GLYPH_CROSS_MARK, + _SPECIAL_GLYPH_FIRST_SMILEY, + SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_SMILEY, + SPECIAL_GLYPH_HAPPY_SMILEY, + SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY, + SPECIAL_GLYPH_NEUTRAL_SMILEY, + SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY, + SPECIAL_GLYPH_UNHAPPY_SMILEY, + SPECIAL_GLYPH_DEPRESSED_SMILEY, + _SPECIAL_GLYPH_MAX +} SpecialGlyph; + +const char *special_glyph(SpecialGlyph code) _const_; + +const char* locale_variable_to_string(LocaleVariable i) _const_; +LocaleVariable locale_variable_from_string(const char *s) _pure_; + +int get_keymaps(char ***l); +bool keymap_is_valid(const char *name); + +static inline void freelocalep(locale_t *p) { + if (*p == (locale_t) 0) + return; + + freelocale(*p); +} + +void locale_variables_free(char* l[_VARIABLE_LC_MAX]); +static inline void locale_variables_freep(char*(*l)[_VARIABLE_LC_MAX]) { + locale_variables_free(*l); +} diff --git a/src/basic/log.c b/src/basic/log.c new file mode 100644 index 0000000..0486027 --- /dev/null +++ b/src/basic/log.c @@ -0,0 +1,1324 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/signalfd.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/uio.h> +#include <sys/un.h> +#include <time.h> +#include <unistd.h> + +#include "sd-messages.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "signal-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "syslog-util.h" +#include "terminal-util.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" + +#define SNDBUF_SIZE (8*1024*1024) + +static LogTarget log_target = LOG_TARGET_CONSOLE; +static int log_max_level[] = {LOG_INFO, LOG_INFO}; +assert_cc(ELEMENTSOF(log_max_level) == _LOG_REALM_MAX); +static int log_facility = LOG_DAEMON; + +static int console_fd = STDERR_FILENO; +static int syslog_fd = -1; +static int kmsg_fd = -1; +static int journal_fd = -1; + +static bool syslog_is_stream = false; + +static bool show_color = false; +static bool show_location = false; + +static bool upgrade_syslog_to_journal = false; +static bool always_reopen_console = false; +static bool open_when_needed = false; +static bool prohibit_ipc = false; + +/* Akin to glibc's __abort_msg; which is private and we hence cannot + * use here. */ +static char *log_abort_msg = NULL; + +/* An assert to use in logging functions that does not call recursively + * into our logging functions (since that might lead to a loop). */ +#define assert_raw(expr) \ + do { \ + if (_unlikely_(!(expr))) { \ + fputs(#expr "\n", stderr); \ + abort(); \ + } \ + } while (false) + +static void log_close_console(void) { + console_fd = safe_close_above_stdio(console_fd); +} + +static int log_open_console(void) { + + if (!always_reopen_console) { + console_fd = STDERR_FILENO; + return 0; + } + + if (console_fd < 3) { + console_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (console_fd < 0) + return console_fd; + + console_fd = fd_move_above_stdio(console_fd); + } + + return 0; +} + +static void log_close_kmsg(void) { + kmsg_fd = safe_close(kmsg_fd); +} + +static int log_open_kmsg(void) { + + if (kmsg_fd >= 0) + return 0; + + kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (kmsg_fd < 0) + return -errno; + + kmsg_fd = fd_move_above_stdio(kmsg_fd); + return 0; +} + +static void log_close_syslog(void) { + syslog_fd = safe_close(syslog_fd); +} + +static int create_log_socket(int type) { + struct timeval tv; + int fd; + + fd = socket(AF_UNIX, type|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); + + /* We need a blocking fd here since we'd otherwise lose messages way too early. However, let's not hang forever + * in the unlikely case of a deadlock. */ + if (getpid_cached() == 1) + timeval_store(&tv, 10 * USEC_PER_MSEC); + else + timeval_store(&tv, 10 * USEC_PER_SEC); + (void) setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); + + return fd; +} + +static int log_open_syslog(void) { + + static const union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/dev/log", + }; + + int r; + + if (syslog_fd >= 0) + return 0; + + syslog_fd = create_log_socket(SOCK_DGRAM); + if (syslog_fd < 0) { + r = syslog_fd; + goto fail; + } + + if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) { + safe_close(syslog_fd); + + /* Some legacy syslog systems still use stream + * sockets. They really shouldn't. But what can we + * do... */ + syslog_fd = create_log_socket(SOCK_STREAM); + if (syslog_fd < 0) { + r = syslog_fd; + goto fail; + } + + if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) { + r = -errno; + goto fail; + } + + syslog_is_stream = true; + } else + syslog_is_stream = false; + + return 0; + +fail: + log_close_syslog(); + return r; +} + +static void log_close_journal(void) { + journal_fd = safe_close(journal_fd); +} + +static int log_open_journal(void) { + + static const union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/run/systemd/journal/socket", + }; + + int r; + + if (journal_fd >= 0) + return 0; + + journal_fd = create_log_socket(SOCK_DGRAM); + if (journal_fd < 0) { + r = journal_fd; + goto fail; + } + + if (connect(journal_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) { + r = -errno; + goto fail; + } + + return 0; + +fail: + log_close_journal(); + return r; +} + +int log_open(void) { + int r; + + /* Do not call from library code. */ + + /* If we don't use the console we close it here, to not get + * killed by SAK. If we don't use syslog we close it here so + * that we are not confused by somebody deleting the socket in + * the fs, and to make sure we don't use it if prohibit_ipc is + * set. If we don't use /dev/kmsg we still keep it open, + * because there is no reason to close it. */ + + if (log_target == LOG_TARGET_NULL) { + log_close_journal(); + log_close_syslog(); + log_close_console(); + return 0; + } + + if (log_target != LOG_TARGET_AUTO || + getpid_cached() == 1 || + isatty(STDERR_FILENO) <= 0) { + + if (!prohibit_ipc && + IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL)) { + r = log_open_journal(); + if (r >= 0) { + log_close_syslog(); + log_close_console(); + return r; + } + } + + if (!prohibit_ipc && + IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_SYSLOG)) { + r = log_open_syslog(); + if (r >= 0) { + log_close_journal(); + log_close_console(); + return r; + } + } + + if (IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_KMSG)) { + r = log_open_kmsg(); + if (r >= 0) { + log_close_journal(); + log_close_syslog(); + log_close_console(); + return r; + } + } + } + + log_close_journal(); + log_close_syslog(); + + return log_open_console(); +} + +void log_set_target(LogTarget target) { + assert(target >= 0); + assert(target < _LOG_TARGET_MAX); + + if (upgrade_syslog_to_journal) { + if (target == LOG_TARGET_SYSLOG) + target = LOG_TARGET_JOURNAL; + else if (target == LOG_TARGET_SYSLOG_OR_KMSG) + target = LOG_TARGET_JOURNAL_OR_KMSG; + } + + log_target = target; +} + +void log_close(void) { + /* Do not call from library code. */ + + log_close_journal(); + log_close_syslog(); + log_close_kmsg(); + log_close_console(); +} + +void log_forget_fds(void) { + /* Do not call from library code. */ + + console_fd = kmsg_fd = syslog_fd = journal_fd = -1; +} + +void log_set_max_level_realm(LogRealm realm, int level) { + assert((level & LOG_PRIMASK) == level); + assert(realm < ELEMENTSOF(log_max_level)); + + log_max_level[realm] = level; +} + +void log_set_facility(int facility) { + log_facility = facility; +} + +static int write_to_console( + int level, + int error, + const char *file, + int line, + const char *func, + const char *buffer) { + + char location[256], prefix[1 + DECIMAL_STR_MAX(int) + 2]; + struct iovec iovec[6] = {}; + bool highlight; + size_t n = 0; + + if (console_fd < 0) + return 0; + + if (log_target == LOG_TARGET_CONSOLE_PREFIXED) { + xsprintf(prefix, "<%i>", level); + iovec[n++] = IOVEC_MAKE_STRING(prefix); + } + + highlight = LOG_PRI(level) <= LOG_ERR && show_color; + + if (show_location) { + (void) snprintf(location, sizeof location, "(%s:%i) ", file, line); + iovec[n++] = IOVEC_MAKE_STRING(location); + } + + if (highlight) + iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED); + iovec[n++] = IOVEC_MAKE_STRING(buffer); + if (highlight) + iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL); + iovec[n++] = IOVEC_MAKE_STRING("\n"); + + if (writev(console_fd, iovec, n) < 0) { + + if (errno == EIO && getpid_cached() == 1) { + + /* If somebody tried to kick us from our + * console tty (via vhangup() or suchlike), + * try to reconnect */ + + log_close_console(); + log_open_console(); + + if (console_fd < 0) + return 0; + + if (writev(console_fd, iovec, n) < 0) + return -errno; + } else + return -errno; + } + + return 1; +} + +static int write_to_syslog( + int level, + int error, + const char *file, + int line, + const char *func, + const char *buffer) { + + char header_priority[2 + DECIMAL_STR_MAX(int) + 1], + header_time[64], + header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1]; + struct iovec iovec[5] = {}; + struct msghdr msghdr = { + .msg_iov = iovec, + .msg_iovlen = ELEMENTSOF(iovec), + }; + time_t t; + struct tm tm; + + if (syslog_fd < 0) + return 0; + + xsprintf(header_priority, "<%i>", level); + + t = (time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC); + if (!localtime_r(&t, &tm)) + return -EINVAL; + + if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0) + return -EINVAL; + + xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached()); + + iovec[0] = IOVEC_MAKE_STRING(header_priority); + iovec[1] = IOVEC_MAKE_STRING(header_time); + iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name); + iovec[3] = IOVEC_MAKE_STRING(header_pid); + iovec[4] = IOVEC_MAKE_STRING(buffer); + + /* When using syslog via SOCK_STREAM separate the messages by NUL chars */ + if (syslog_is_stream) + iovec[4].iov_len++; + + for (;;) { + ssize_t n; + + n = sendmsg(syslog_fd, &msghdr, MSG_NOSIGNAL); + if (n < 0) + return -errno; + + if (!syslog_is_stream || + (size_t) n >= IOVEC_TOTAL_SIZE(iovec, ELEMENTSOF(iovec))) + break; + + IOVEC_INCREMENT(iovec, ELEMENTSOF(iovec), n); + } + + return 1; +} + +static int write_to_kmsg( + int level, + int error, + const char *file, + int line, + const char *func, + const char *buffer) { + + char header_priority[2 + DECIMAL_STR_MAX(int) + 1], + header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1]; + struct iovec iovec[5] = {}; + + if (kmsg_fd < 0) + return 0; + + xsprintf(header_priority, "<%i>", level); + xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached()); + + iovec[0] = IOVEC_MAKE_STRING(header_priority); + iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name); + iovec[2] = IOVEC_MAKE_STRING(header_pid); + iovec[3] = IOVEC_MAKE_STRING(buffer); + iovec[4] = IOVEC_MAKE_STRING("\n"); + + if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0) + return -errno; + + return 1; +} + +static int log_do_header( + char *header, + size_t size, + int level, + int error, + const char *file, int line, const char *func, + const char *object_field, const char *object, + const char *extra_field, const char *extra) { + int r; + + error = IS_SYNTHETIC_ERRNO(error) ? 0 : ERRNO_VALUE(error); + + r = snprintf(header, size, + "PRIORITY=%i\n" + "SYSLOG_FACILITY=%i\n" + "%s%.256s%s" /* CODE_FILE */ + "%s%.*i%s" /* CODE_LINE */ + "%s%.256s%s" /* CODE_FUNC */ + "%s%.*i%s" /* ERRNO */ + "%s%.256s%s" /* object */ + "%s%.256s%s" /* extra */ + "SYSLOG_IDENTIFIER=%.256s\n", + LOG_PRI(level), + LOG_FAC(level), + isempty(file) ? "" : "CODE_FILE=", + isempty(file) ? "" : file, + isempty(file) ? "" : "\n", + line ? "CODE_LINE=" : "", + line ? 1 : 0, line, /* %.0d means no output too, special case for 0 */ + line ? "\n" : "", + isempty(func) ? "" : "CODE_FUNC=", + isempty(func) ? "" : func, + isempty(func) ? "" : "\n", + error ? "ERRNO=" : "", + error ? 1 : 0, error, + error ? "\n" : "", + isempty(object) ? "" : object_field, + isempty(object) ? "" : object, + isempty(object) ? "" : "\n", + isempty(extra) ? "" : extra_field, + isempty(extra) ? "" : extra, + isempty(extra) ? "" : "\n", + program_invocation_short_name); + assert_raw((size_t) r < size); + + return 0; +} + +static int write_to_journal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *buffer) { + + char header[LINE_MAX]; + struct iovec iovec[4] = {}; + struct msghdr mh = {}; + + if (journal_fd < 0) + return 0; + + log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra); + + iovec[0] = IOVEC_MAKE_STRING(header); + iovec[1] = IOVEC_MAKE_STRING("MESSAGE="); + iovec[2] = IOVEC_MAKE_STRING(buffer); + iovec[3] = IOVEC_MAKE_STRING("\n"); + + mh.msg_iov = iovec; + mh.msg_iovlen = ELEMENTSOF(iovec); + + if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) < 0) + return -errno; + + return 1; +} + +int log_dispatch_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + char *buffer) { + + assert_raw(buffer); + + if (log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + /* Patch in LOG_DAEMON facility if necessary */ + if ((level & LOG_FACMASK) == 0) + level |= log_facility; + + if (open_when_needed) + log_open(); + + do { + char *e; + int k = 0; + + buffer += strspn(buffer, NEWLINE); + + if (buffer[0] == 0) + break; + + if ((e = strpbrk(buffer, NEWLINE))) + *(e++) = 0; + + if (IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL)) { + + k = write_to_journal(level, error, file, line, func, object_field, object, extra_field, extra, buffer); + if (k < 0 && k != -EAGAIN) + log_close_journal(); + } + + if (IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_SYSLOG)) { + + k = write_to_syslog(level, error, file, line, func, buffer); + if (k < 0 && k != -EAGAIN) + log_close_syslog(); + } + + if (k <= 0 && + IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_KMSG)) { + + if (k < 0) + log_open_kmsg(); + + k = write_to_kmsg(level, error, file, line, func, buffer); + if (k < 0) { + log_close_kmsg(); + log_open_console(); + } + } + + if (k <= 0) + (void) write_to_console(level, error, file, line, func, buffer); + + buffer = e; + } while (buffer); + + if (open_when_needed) + log_close(); + + return -ERRNO_VALUE(error); +} + +int log_dump_internal( + int level, + int error, + const char *file, + int line, + const char *func, + char *buffer) { + + LogRealm realm = LOG_REALM_REMOVE_LEVEL(level); + PROTECT_ERRNO; + + /* This modifies the buffer... */ + + if (_likely_(LOG_PRI(level) > log_max_level[realm])) + return -ERRNO_VALUE(error); + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer); +} + +int log_internalv_realm( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, + va_list ap) { + + LogRealm realm = LOG_REALM_REMOVE_LEVEL(level); + char buffer[LINE_MAX]; + PROTECT_ERRNO; + + if (_likely_(LOG_PRI(level) > log_max_level[realm])) + return -ERRNO_VALUE(error); + + /* Make sure that %m maps to the specified error (or "Success"). */ + errno = ERRNO_VALUE(error); + + (void) vsnprintf(buffer, sizeof buffer, format, ap); + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer); +} + +int log_internal_realm( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + va_list ap; + int r; + + va_start(ap, format); + r = log_internalv_realm(level, error, file, line, func, format, ap); + va_end(ap); + + return r; +} + +_printf_(10,0) +static int log_object_internalv( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, + va_list ap) { + + PROTECT_ERRNO; + char *buffer, *b; + + if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD])) + return -ERRNO_VALUE(error); + + /* Make sure that %m maps to the specified error (or "Success"). */ + errno = ERRNO_VALUE(error); + + /* Prepend the object name before the message */ + if (object) { + size_t n; + + n = strlen(object); + buffer = newa(char, n + 2 + LINE_MAX); + b = stpcpy(stpcpy(buffer, object), ": "); + } else + b = buffer = newa(char, LINE_MAX); + + (void) vsnprintf(b, LINE_MAX, format, ap); + + return log_dispatch_internal(level, error, file, line, func, + object_field, object, extra_field, extra, buffer); +} + +int log_object_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, ...) { + + va_list ap; + int r; + + va_start(ap, format); + r = log_object_internalv(level, error, file, line, func, object_field, object, extra_field, extra, format, ap); + va_end(ap); + + return r; +} + +static void log_assert( + int level, + const char *text, + const char *file, + int line, + const char *func, + const char *format) { + + static char buffer[LINE_MAX]; + LogRealm realm = LOG_REALM_REMOVE_LEVEL(level); + + if (_likely_(LOG_PRI(level) > log_max_level[realm])) + return; + + DISABLE_WARNING_FORMAT_NONLITERAL; + (void) snprintf(buffer, sizeof buffer, format, text, file, line, func); + REENABLE_WARNING; + + log_abort_msg = buffer; + + log_dispatch_internal(level, 0, file, line, func, NULL, NULL, NULL, NULL, buffer); +} + +_noreturn_ void log_assert_failed_realm( + LogRealm realm, + const char *text, + const char *file, + int line, + const char *func) { + log_open(); + log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func, + "Assertion '%s' failed at %s:%u, function %s(). Aborting."); + abort(); +} + +_noreturn_ void log_assert_failed_unreachable_realm( + LogRealm realm, + const char *text, + const char *file, + int line, + const char *func) { + log_open(); + log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func, + "Code should not be reached '%s' at %s:%u, function %s(). Aborting."); + abort(); +} + +void log_assert_failed_return_realm( + LogRealm realm, + const char *text, + const char *file, + int line, + const char *func) { + PROTECT_ERRNO; + log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_DEBUG), text, file, line, func, + "Assertion '%s' failed at %s:%u, function %s(). Ignoring."); +} + +int log_oom_internal(LogRealm realm, const char *file, int line, const char *func) { + return log_internal_realm(LOG_REALM_PLUS_LEVEL(realm, LOG_ERR), + ENOMEM, file, line, func, "Out of memory."); +} + +int log_format_iovec( + struct iovec *iovec, + size_t iovec_len, + size_t *n, + bool newline_separator, + int error, + const char *format, + va_list ap) { + + static const char nl = '\n'; + + while (format && *n + 1 < iovec_len) { + va_list aq; + char *m; + int r; + + /* We need to copy the va_list structure, + * since vasprintf() leaves it afterwards at + * an undefined location */ + + errno = ERRNO_VALUE(error); + + va_copy(aq, ap); + r = vasprintf(&m, format, aq); + va_end(aq); + if (r < 0) + return -EINVAL; + + /* Now, jump enough ahead, so that we point to + * the next format string */ + VA_FORMAT_ADVANCE(format, ap); + + iovec[(*n)++] = IOVEC_MAKE_STRING(m); + + if (newline_separator) { + iovec[*n] = IOVEC_MAKE((char *)&nl, 1); + (*n)++; + } + + format = va_arg(ap, char *); + } + return 0; +} + +int log_struct_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + LogRealm realm = LOG_REALM_REMOVE_LEVEL(level); + char buf[LINE_MAX]; + bool found = false; + PROTECT_ERRNO; + va_list ap; + + if (_likely_(LOG_PRI(level) > log_max_level[realm]) || + log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + if ((level & LOG_FACMASK) == 0) + level |= log_facility; + + if (IN_SET(log_target, + LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL)) { + + if (open_when_needed) + log_open_journal(); + + if (journal_fd >= 0) { + char header[LINE_MAX]; + struct iovec iovec[17] = {}; + size_t n = 0, i; + int r; + struct msghdr mh = { + .msg_iov = iovec, + }; + bool fallback = false; + + /* If the journal is available do structured logging. + * Do not report the errno if it is synthetic. */ + log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL); + iovec[n++] = IOVEC_MAKE_STRING(header); + + va_start(ap, format); + r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap); + if (r < 0) + fallback = true; + else { + mh.msg_iovlen = n; + (void) sendmsg(journal_fd, &mh, MSG_NOSIGNAL); + } + + va_end(ap); + for (i = 1; i < n; i += 2) + free(iovec[i].iov_base); + + if (!fallback) { + if (open_when_needed) + log_close(); + + return -ERRNO_VALUE(error); + } + } + } + + /* Fallback if journal logging is not available or didn't work. */ + + va_start(ap, format); + while (format) { + va_list aq; + + errno = ERRNO_VALUE(error); + + va_copy(aq, ap); + (void) vsnprintf(buf, sizeof buf, format, aq); + va_end(aq); + + if (startswith(buf, "MESSAGE=")) { + found = true; + break; + } + + VA_FORMAT_ADVANCE(format, ap); + + format = va_arg(ap, char *); + } + va_end(ap); + + if (!found) { + if (open_when_needed) + log_close(); + + return -ERRNO_VALUE(error); + } + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8); +} + +int log_struct_iovec_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const struct iovec input_iovec[], + size_t n_input_iovec) { + + LogRealm realm = LOG_REALM_REMOVE_LEVEL(level); + PROTECT_ERRNO; + size_t i; + char *m; + + if (_likely_(LOG_PRI(level) > log_max_level[realm]) || + log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + if ((level & LOG_FACMASK) == 0) + level |= log_facility; + + if (IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL) && + journal_fd >= 0) { + + struct iovec iovec[1 + n_input_iovec*2]; + char header[LINE_MAX]; + struct msghdr mh = { + .msg_iov = iovec, + .msg_iovlen = 1 + n_input_iovec*2, + }; + + log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL); + iovec[0] = IOVEC_MAKE_STRING(header); + + for (i = 0; i < n_input_iovec; i++) { + iovec[1+i*2] = input_iovec[i]; + iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n"); + } + + if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0) + return -ERRNO_VALUE(error); + } + + for (i = 0; i < n_input_iovec; i++) + if (memory_startswith(input_iovec[i].iov_base, input_iovec[i].iov_len, "MESSAGE=")) + break; + + if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */ + return -ERRNO_VALUE(error); + + m = strndupa(input_iovec[i].iov_base + STRLEN("MESSAGE="), + input_iovec[i].iov_len - STRLEN("MESSAGE=")); + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m); +} + +int log_set_target_from_string(const char *e) { + LogTarget t; + + t = log_target_from_string(e); + if (t < 0) + return -EINVAL; + + log_set_target(t); + return 0; +} + +int log_set_max_level_from_string_realm(LogRealm realm, const char *e) { + int t; + + t = log_level_from_string(e); + if (t < 0) + return -EINVAL; + + log_set_max_level_realm(realm, t); + return 0; +} + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + + /* + * The systemd.log_xyz= settings are parsed by all tools, and + * so is "debug". + * + * However, "quiet" is only parsed by PID 1, and only turns of + * status output to /dev/console, but does not alter the log + * level. + */ + + if (streq(key, "debug") && !value) + log_set_max_level(LOG_DEBUG); + + else if (proc_cmdline_key_streq(key, "systemd.log_target")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (log_set_target_from_string(value) < 0) + log_warning("Failed to parse log target '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_level")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (log_set_max_level_from_string(value) < 0) + log_warning("Failed to parse log level '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_color")) { + + if (log_show_color_from_string(value ?: "1") < 0) + log_warning("Failed to parse log color setting '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_location")) { + + if (log_show_location_from_string(value ?: "1") < 0) + log_warning("Failed to parse log location setting '%s'. Ignoring.", value); + } + + return 0; +} + +void log_parse_environment_realm(LogRealm realm) { + /* Do not call from library code. */ + + const char *e; + + if (get_ctty_devnr(0, NULL) < 0) + /* Only try to read the command line in daemons. We assume that anything that has a controlling tty is + user stuff. */ + (void) proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX); + + e = getenv("SYSTEMD_LOG_TARGET"); + if (e && log_set_target_from_string(e) < 0) + log_warning("Failed to parse log target '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_LEVEL"); + if (e && log_set_max_level_from_string_realm(realm, e) < 0) + log_warning("Failed to parse log level '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_COLOR"); + if (e && log_show_color_from_string(e) < 0) + log_warning("Failed to parse bool '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_LOCATION"); + if (e && log_show_location_from_string(e) < 0) + log_warning("Failed to parse bool '%s'. Ignoring.", e); +} + +LogTarget log_get_target(void) { + return log_target; +} + +int log_get_max_level_realm(LogRealm realm) { + return log_max_level[realm]; +} + +void log_show_color(bool b) { + show_color = b; +} + +bool log_get_show_color(void) { + return show_color; +} + +void log_show_location(bool b) { + show_location = b; +} + +bool log_get_show_location(void) { + return show_location; +} + +int log_show_color_from_string(const char *e) { + int t; + + t = parse_boolean(e); + if (t < 0) + return t; + + log_show_color(t); + return 0; +} + +int log_show_location_from_string(const char *e) { + int t; + + t = parse_boolean(e); + if (t < 0) + return t; + + log_show_location(t); + return 0; +} + +bool log_on_console(void) { + if (IN_SET(log_target, LOG_TARGET_CONSOLE, + LOG_TARGET_CONSOLE_PREFIXED)) + return true; + + return syslog_fd < 0 && kmsg_fd < 0 && journal_fd < 0; +} + +static const char *const log_target_table[_LOG_TARGET_MAX] = { + [LOG_TARGET_CONSOLE] = "console", + [LOG_TARGET_CONSOLE_PREFIXED] = "console-prefixed", + [LOG_TARGET_KMSG] = "kmsg", + [LOG_TARGET_JOURNAL] = "journal", + [LOG_TARGET_JOURNAL_OR_KMSG] = "journal-or-kmsg", + [LOG_TARGET_SYSLOG] = "syslog", + [LOG_TARGET_SYSLOG_OR_KMSG] = "syslog-or-kmsg", + [LOG_TARGET_AUTO] = "auto", + [LOG_TARGET_NULL] = "null", +}; + +DEFINE_STRING_TABLE_LOOKUP(log_target, LogTarget); + +void log_received_signal(int level, const struct signalfd_siginfo *si) { + assert(si); + + if (pid_is_valid(si->ssi_pid)) { + _cleanup_free_ char *p = NULL; + + (void) get_process_comm(si->ssi_pid, &p); + + log_full(level, + "Received SIG%s from PID %"PRIu32" (%s).", + signal_to_string(si->ssi_signo), + si->ssi_pid, strna(p)); + } else + log_full(level, + "Received SIG%s.", + signal_to_string(si->ssi_signo)); +} + +int log_syntax_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + PROTECT_ERRNO; + char buffer[LINE_MAX]; + va_list ap; + const char *unit_fmt = NULL; + + if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]) || + log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + errno = error; + + va_start(ap, format); + (void) vsnprintf(buffer, sizeof buffer, format, ap); + va_end(ap); + + if (unit) + unit_fmt = getpid_cached() == 1 ? "UNIT=%s" : "USER_UNIT=%s"; + + return log_struct_internal( + LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level), + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + "CONFIG_FILE=%s", config_file, + "CONFIG_LINE=%u", config_line, + LOG_MESSAGE("%s:%u: %s", config_file, config_line, buffer), + unit_fmt, unit, + NULL); +} + +int log_syntax_invalid_utf8_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + const char *file, + int line, + const char *func, + const char *rvalue) { + + _cleanup_free_ char *p = NULL; + + if (rvalue) + p = utf8_escape_invalid(rvalue); + + log_syntax_internal(unit, level, config_file, config_line, 0, file, line, func, + "String is not UTF-8 clean, ignoring assignment: %s", strna(p)); + + return -EINVAL; +} + +void log_set_upgrade_syslog_to_journal(bool b) { + upgrade_syslog_to_journal = b; + + /* Make the change effective immediately */ + if (b) { + if (log_target == LOG_TARGET_SYSLOG) + log_target = LOG_TARGET_JOURNAL; + else if (log_target == LOG_TARGET_SYSLOG_OR_KMSG) + log_target = LOG_TARGET_JOURNAL_OR_KMSG; + } +} + +void log_set_always_reopen_console(bool b) { + always_reopen_console = b; +} + +void log_set_open_when_needed(bool b) { + open_when_needed = b; +} + +void log_set_prohibit_ipc(bool b) { + prohibit_ipc = b; +} + +int log_emergency_level(void) { + /* Returns the log level to use for log_emergency() logging. We use LOG_EMERG only when we are PID 1, as only + * then the system of the whole system is obviously affected. */ + + return getpid_cached() == 1 ? LOG_EMERG : LOG_ERR; +} + +int log_dup_console(void) { + int copy; + + /* Duplicate the fd we use for fd logging if it's < 3 and use the copy from now on. This call is useful + * whenever we want to continue logging through the original fd, but want to rearrange stderr. */ + + if (console_fd >= 3) + return 0; + + copy = fcntl(console_fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + console_fd = copy; + return 0; +} + +void log_setup_service(void) { + /* Sets up logging the way it is most appropriate for running a program as a service. Note that using this + * doesn't make the binary unsuitable for invocation on the command line, as log output will still go to the + * terminal if invoked interactively. */ + + log_set_target(LOG_TARGET_AUTO); + log_parse_environment(); + log_open(); +} diff --git a/src/basic/log.h b/src/basic/log.h new file mode 100644 index 0000000..17438d7 --- /dev/null +++ b/src/basic/log.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdarg.h> +#include <stdbool.h> +#include <stdlib.h> +#include <syslog.h> + +#include "macro.h" + +/* Some structures we reference but don't want to pull in headers for */ +struct iovec; +struct signalfd_siginfo; + +typedef enum LogRealm { + LOG_REALM_SYSTEMD, + LOG_REALM_UDEV, + _LOG_REALM_MAX, +} LogRealm; + +#ifndef LOG_REALM +# define LOG_REALM LOG_REALM_SYSTEMD +#endif + +typedef enum LogTarget{ + LOG_TARGET_CONSOLE, + LOG_TARGET_CONSOLE_PREFIXED, + LOG_TARGET_KMSG, + LOG_TARGET_JOURNAL, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_SYSLOG, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_AUTO, /* console if stderr is tty, JOURNAL_OR_KMSG otherwise */ + LOG_TARGET_NULL, + _LOG_TARGET_MAX, + _LOG_TARGET_INVALID = -1 +} LogTarget; + +/* Note to readers: << and >> have lower precedence than & and | */ +#define LOG_REALM_PLUS_LEVEL(realm, level) ((realm) << 10 | (level)) +#define LOG_REALM_REMOVE_LEVEL(realm_level) ((realm_level) >> 10) +#define SYNTHETIC_ERRNO(num) (1 << 30 | (num)) +#define IS_SYNTHETIC_ERRNO(val) ((val) >> 30 & 1) +#define ERRNO_VALUE(val) (abs(val) & 255) + +void log_set_target(LogTarget target); +void log_set_max_level_realm(LogRealm realm, int level); +#define log_set_max_level(level) \ + log_set_max_level_realm(LOG_REALM, (level)) + +void log_set_facility(int facility); + +int log_set_target_from_string(const char *e); +int log_set_max_level_from_string_realm(LogRealm realm, const char *e); +#define log_set_max_level_from_string(e) \ + log_set_max_level_from_string_realm(LOG_REALM, (e)) + +void log_show_color(bool b); +bool log_get_show_color(void) _pure_; +void log_show_location(bool b); +bool log_get_show_location(void) _pure_; + +int log_show_color_from_string(const char *e); +int log_show_location_from_string(const char *e); + +LogTarget log_get_target(void) _pure_; +int log_get_max_level_realm(LogRealm realm) _pure_; +#define log_get_max_level() \ + log_get_max_level_realm(LOG_REALM) + +/* Functions below that open and close logs or configure logging based on the + * environment should not be called from library code — this is always a job + * for the application itself. + */ + +int log_open(void); +void log_close(void); +void log_forget_fds(void); + +void log_parse_environment_realm(LogRealm realm); +#define log_parse_environment() \ + log_parse_environment_realm(LOG_REALM) + +int log_dispatch_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra, + const char *extra_field, + char *buffer); + +int log_internal_realm( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) _printf_(6,7); +#define log_internal(level, ...) \ + log_internal_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__) + +int log_internalv_realm( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, + va_list ap) _printf_(6,0); +#define log_internalv(level, ...) \ + log_internalv_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__) + +/* Realm is fixed to LOG_REALM_SYSTEMD for those */ +int log_object_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, ...) _printf_(10,11); + +int log_struct_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) _printf_(6,0) _sentinel_; + +int log_oom_internal( + LogRealm realm, + const char *file, + int line, + const char *func); + +int log_format_iovec( + struct iovec *iovec, + size_t iovec_len, + size_t *n, + bool newline_separator, + int error, + const char *format, + va_list ap) _printf_(6, 0); + +int log_struct_iovec_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const struct iovec *input_iovec, + size_t n_input_iovec); + +/* This modifies the buffer passed! */ +int log_dump_internal( + int level, + int error, + const char *file, + int line, + const char *func, + char *buffer); + +/* Logging for various assertions */ +_noreturn_ void log_assert_failed_realm( + LogRealm realm, + const char *text, + const char *file, + int line, + const char *func); +#define log_assert_failed(text, ...) \ + log_assert_failed_realm(LOG_REALM, (text), __VA_ARGS__) + +_noreturn_ void log_assert_failed_unreachable_realm( + LogRealm realm, + const char *text, + const char *file, + int line, + const char *func); +#define log_assert_failed_unreachable(text, ...) \ + log_assert_failed_unreachable_realm(LOG_REALM, (text), __VA_ARGS__) + +void log_assert_failed_return_realm( + LogRealm realm, + const char *text, + const char *file, + int line, + const char *func); +#define log_assert_failed_return(text, ...) \ + log_assert_failed_return_realm(LOG_REALM, (text), __VA_ARGS__) + +#define log_dispatch(level, error, buffer) \ + log_dispatch_internal(level, error, __FILE__, __LINE__, __func__, NULL, NULL, NULL, NULL, buffer) + +/* Logging with level */ +#define log_full_errno_realm(realm, level, error, ...) \ + ({ \ + int _level = (level), _e = (error), _realm = (realm); \ + (log_get_max_level_realm(_realm) >= LOG_PRI(_level)) \ + ? log_internal_realm(LOG_REALM_PLUS_LEVEL(_realm, _level), _e, \ + __FILE__, __LINE__, __func__, __VA_ARGS__) \ + : -ERRNO_VALUE(_e); \ + }) + +#define log_full_errno(level, error, ...) \ + log_full_errno_realm(LOG_REALM, (level), (error), __VA_ARGS__) + +#define log_full(level, ...) log_full_errno((level), 0, __VA_ARGS__) + +int log_emergency_level(void); + +/* Normal logging */ +#define log_debug(...) log_full(LOG_DEBUG, __VA_ARGS__) +#define log_info(...) log_full(LOG_INFO, __VA_ARGS__) +#define log_notice(...) log_full(LOG_NOTICE, __VA_ARGS__) +#define log_warning(...) log_full(LOG_WARNING, __VA_ARGS__) +#define log_error(...) log_full(LOG_ERR, __VA_ARGS__) +#define log_emergency(...) log_full(log_emergency_level(), __VA_ARGS__) + +/* Logging triggered by an errno-like error */ +#define log_debug_errno(error, ...) log_full_errno(LOG_DEBUG, error, __VA_ARGS__) +#define log_info_errno(error, ...) log_full_errno(LOG_INFO, error, __VA_ARGS__) +#define log_notice_errno(error, ...) log_full_errno(LOG_NOTICE, error, __VA_ARGS__) +#define log_warning_errno(error, ...) log_full_errno(LOG_WARNING, error, __VA_ARGS__) +#define log_error_errno(error, ...) log_full_errno(LOG_ERR, error, __VA_ARGS__) +#define log_emergency_errno(error, ...) log_full_errno(log_emergency_level(), error, __VA_ARGS__) + +#ifdef LOG_TRACE +# define log_trace(...) log_debug(__VA_ARGS__) +#else +# define log_trace(...) do {} while (0) +#endif + +/* Structured logging */ +#define log_struct_errno(level, error, ...) \ + log_struct_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \ + error, __FILE__, __LINE__, __func__, __VA_ARGS__, NULL) +#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__) + +#define log_struct_iovec_errno(level, error, iovec, n_iovec) \ + log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \ + error, __FILE__, __LINE__, __func__, iovec, n_iovec) +#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec) + +/* This modifies the buffer passed! */ +#define log_dump(level, buffer) \ + log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \ + 0, __FILE__, __LINE__, __func__, buffer) + +#define log_oom() log_oom_internal(LOG_REALM, __FILE__, __LINE__, __func__) + +bool log_on_console(void) _pure_; + +const char *log_target_to_string(LogTarget target) _const_; +LogTarget log_target_from_string(const char *s) _pure_; + +/* Helper to prepare various field for structured logging */ +#define LOG_MESSAGE(fmt, ...) "MESSAGE=" fmt, ##__VA_ARGS__ + +void log_received_signal(int level, const struct signalfd_siginfo *si); + +/* If turned on, any requests for a log target involving "syslog" will be implicitly upgraded to the equivalent journal target */ +void log_set_upgrade_syslog_to_journal(bool b); + +/* If turned on, and log_open() is called, we'll not use STDERR_FILENO for logging ever, but rather open /dev/console */ +void log_set_always_reopen_console(bool b); + +/* If turned on, we'll open the log stream implicitly if needed on each individual log call. This is normally not + * desired as we want to reuse our logging streams. It is useful however */ +void log_set_open_when_needed(bool b); + +/* If turned on, then we'll never use IPC-based logging, i.e. never log to syslog or the journal. We'll only log to + * stderr, the console or kmsg */ +void log_set_prohibit_ipc(bool b); + +int log_dup_console(void); + +int log_syntax_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) _printf_(9, 10); + +int log_syntax_invalid_utf8_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + const char *file, + int line, + const char *func, + const char *rvalue); + +#define log_syntax(unit, level, config_file, config_line, error, ...) \ + ({ \ + int _level = (level), _e = (error); \ + (log_get_max_level() >= LOG_PRI(_level)) \ + ? log_syntax_internal(unit, _level, config_file, config_line, _e, __FILE__, __LINE__, __func__, __VA_ARGS__) \ + : -abs(_e); \ + }) + +#define log_syntax_invalid_utf8(unit, level, config_file, config_line, rvalue) \ + ({ \ + int _level = (level); \ + (log_get_max_level() >= LOG_PRI(_level)) \ + ? log_syntax_invalid_utf8_internal(unit, _level, config_file, config_line, __FILE__, __LINE__, __func__, rvalue) \ + : -EINVAL; \ + }) + +#define DEBUG_LOGGING _unlikely_(log_get_max_level() >= LOG_DEBUG) + +void log_setup_service(void); diff --git a/src/basic/login-util.c b/src/basic/login-util.c new file mode 100644 index 0000000..085ccd0 --- /dev/null +++ b/src/basic/login-util.c @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <string.h> + +#include "login-util.h" +#include "string-util.h" + +bool session_id_valid(const char *id) { + + if (isempty(id)) + return false; + + return id[strspn(id, LETTERS DIGITS)] == '\0'; +} diff --git a/src/basic/login-util.h b/src/basic/login-util.h new file mode 100644 index 0000000..e1e62e1 --- /dev/null +++ b/src/basic/login-util.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <unistd.h> + +bool session_id_valid(const char *id); + +static inline bool logind_running(void) { + return access("/run/systemd/seats/", F_OK) >= 0; +} diff --git a/src/basic/macro.h b/src/basic/macro.h new file mode 100644 index 0000000..1971e91 --- /dev/null +++ b/src/basic/macro.h @@ -0,0 +1,549 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <assert.h> +#include <errno.h> +#include <inttypes.h> +#include <stdbool.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +#define _printf_(a, b) __attribute__((__format__(printf, a, b))) +#ifdef __clang__ +# define _alloc_(...) +#else +# define _alloc_(...) __attribute__((__alloc_size__(__VA_ARGS__))) +#endif +#define _sentinel_ __attribute__((__sentinel__)) +#define _section_(x) __attribute__((__section__(x))) +#define _used_ __attribute__((__used__)) +#define _unused_ __attribute__((__unused__)) +#define _destructor_ __attribute__((__destructor__)) +#define _pure_ __attribute__((__pure__)) +#define _const_ __attribute__((__const__)) +#define _deprecated_ __attribute__((__deprecated__)) +#define _packed_ __attribute__((__packed__)) +#define _malloc_ __attribute__((__malloc__)) +#define _weak_ __attribute__((__weak__)) +#define _likely_(x) (__builtin_expect(!!(x), 1)) +#define _unlikely_(x) (__builtin_expect(!!(x), 0)) +#define _public_ __attribute__((__visibility__("default"))) +#define _hidden_ __attribute__((__visibility__("hidden"))) +#define _weakref_(x) __attribute__((__weakref__(#x))) +#define _align_(x) __attribute__((__aligned__(x))) +#define _alignas_(x) __attribute__((__aligned__(__alignof(x)))) +#define _alignptr_ __attribute__((__aligned__(sizeof(void*)))) +#define _cleanup_(x) __attribute__((__cleanup__(x))) +#if __GNUC__ >= 7 +#define _fallthrough_ __attribute__((__fallthrough__)) +#else +#define _fallthrough_ +#endif +/* Define C11 noreturn without <stdnoreturn.h> and even on older gcc + * compiler versions */ +#ifndef _noreturn_ +#if __STDC_VERSION__ >= 201112L +#define _noreturn_ _Noreturn +#else +#define _noreturn_ __attribute__((__noreturn__)) +#endif +#endif + +#if !defined(HAS_FEATURE_MEMORY_SANITIZER) +# if defined(__has_feature) +# if __has_feature(memory_sanitizer) +# define HAS_FEATURE_MEMORY_SANITIZER 1 +# endif +# endif +# if !defined(HAS_FEATURE_MEMORY_SANITIZER) +# define HAS_FEATURE_MEMORY_SANITIZER 0 +# endif +#endif + +#if !defined(HAS_FEATURE_ADDRESS_SANITIZER) +# ifdef __SANITIZE_ADDRESS__ +# define HAS_FEATURE_ADDRESS_SANITIZER 1 +# elif defined(__has_feature) +# if __has_feature(address_sanitizer) +# define HAS_FEATURE_ADDRESS_SANITIZER 1 +# endif +# endif +# if !defined(HAS_FEATURE_ADDRESS_SANITIZER) +# define HAS_FEATURE_ADDRESS_SANITIZER 0 +# endif +#endif + +/* Note: on GCC "no_sanitize_address" is a function attribute only, on llvm it may also be applied to global + * variables. We define a specific macro which knows this. Note that on GCC we don't need this decorator so much, since + * our primary usecase for this attribute is registration structures placed in named ELF sections which shall not be + * padded, but GCC doesn't pad those anyway if AddressSanitizer is enabled. */ +#if HAS_FEATURE_ADDRESS_SANITIZER && defined(__clang__) +#define _variable_no_sanitize_address_ __attribute__((__no_sanitize_address__)) +#else +#define _variable_no_sanitize_address_ +#endif + +/* Temporarily disable some warnings */ +#define DISABLE_WARNING_FORMAT_NONLITERAL \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wformat-nonliteral\"") + +#define DISABLE_WARNING_MISSING_PROTOTYPES \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wmissing-prototypes\"") + +#define DISABLE_WARNING_NONNULL \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wnonnull\"") + +#define DISABLE_WARNING_SHADOW \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wshadow\"") + +#define DISABLE_WARNING_INCOMPATIBLE_POINTER_TYPES \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wincompatible-pointer-types\"") + +#define REENABLE_WARNING \ + _Pragma("GCC diagnostic pop") + +/* automake test harness */ +#define EXIT_TEST_SKIP 77 + +#define XSTRINGIFY(x) #x +#define STRINGIFY(x) XSTRINGIFY(x) + +#define XCONCATENATE(x, y) x ## y +#define CONCATENATE(x, y) XCONCATENATE(x, y) + +#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq)) +#define UNIQ __COUNTER__ + +/* builtins */ +#if __SIZEOF_INT__ == 4 +#define BUILTIN_FFS_U32(x) __builtin_ffs(x); +#elif __SIZEOF_LONG__ == 4 +#define BUILTIN_FFS_U32(x) __builtin_ffsl(x); +#else +#error "neither int nor long are four bytes long?!?" +#endif + +/* Rounds up */ + +#define ALIGN4(l) (((l) + 3) & ~3) +#define ALIGN8(l) (((l) + 7) & ~7) + +#if __SIZEOF_POINTER__ == 8 +#define ALIGN(l) ALIGN8(l) +#elif __SIZEOF_POINTER__ == 4 +#define ALIGN(l) ALIGN4(l) +#else +#error "Wut? Pointers are neither 4 nor 8 bytes long?" +#endif + +#define ALIGN_PTR(p) ((void*) ALIGN((unsigned long) (p))) +#define ALIGN4_PTR(p) ((void*) ALIGN4((unsigned long) (p))) +#define ALIGN8_PTR(p) ((void*) ALIGN8((unsigned long) (p))) + +static inline size_t ALIGN_TO(size_t l, size_t ali) { + return ((l + ali - 1) & ~(ali - 1)); +} + +#define ALIGN_TO_PTR(p, ali) ((void*) ALIGN_TO((unsigned long) (p), (ali))) + +/* align to next higher power-of-2 (except for: 0 => 0, overflow => 0) */ +static inline unsigned long ALIGN_POWER2(unsigned long u) { + /* clz(0) is undefined */ + if (u == 1) + return 1; + + /* left-shift overflow is undefined */ + if (__builtin_clzl(u - 1UL) < 1) + return 0; + + return 1UL << (sizeof(u) * 8 - __builtin_clzl(u - 1UL)); +} + +#ifndef __COVERITY__ +# define VOID_0 ((void)0) +#else +# define VOID_0 ((void*)0) +#endif + +#define ELEMENTSOF(x) \ + (__builtin_choose_expr( \ + !__builtin_types_compatible_p(typeof(x), typeof(&*(x))), \ + sizeof(x)/sizeof((x)[0]), \ + VOID_0)) + +/* + * STRLEN - return the length of a string literal, minus the trailing NUL byte. + * Contrary to strlen(), this is a constant expression. + * @x: a string literal. + */ +#define STRLEN(x) (sizeof(""x"") - 1) + +/* + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + */ +#define container_of(ptr, type, member) __container_of(UNIQ, (ptr), type, member) +#define __container_of(uniq, ptr, type, member) \ + ({ \ + const typeof( ((type*)0)->member ) *UNIQ_T(A, uniq) = (ptr); \ + (type*)( (char *)UNIQ_T(A, uniq) - offsetof(type, member) ); \ + }) + +#undef MAX +#define MAX(a, b) __MAX(UNIQ, (a), UNIQ, (b)) +#define __MAX(aq, a, bq, b) \ + ({ \ + const typeof(a) UNIQ_T(A, aq) = (a); \ + const typeof(b) UNIQ_T(B, bq) = (b); \ + UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \ + }) + +/* evaluates to (void) if _A or _B are not constant or of different types */ +#define CONST_MAX(_A, _B) \ + (__builtin_choose_expr( \ + __builtin_constant_p(_A) && \ + __builtin_constant_p(_B) && \ + __builtin_types_compatible_p(typeof(_A), typeof(_B)), \ + ((_A) > (_B)) ? (_A) : (_B), \ + VOID_0)) + +/* takes two types and returns the size of the larger one */ +#define MAXSIZE(A, B) (sizeof(union _packed_ { typeof(A) a; typeof(B) b; })) + +#define MAX3(x, y, z) \ + ({ \ + const typeof(x) _c = MAX(x, y); \ + MAX(_c, z); \ + }) + +#undef MIN +#define MIN(a, b) __MIN(UNIQ, (a), UNIQ, (b)) +#define __MIN(aq, a, bq, b) \ + ({ \ + const typeof(a) UNIQ_T(A, aq) = (a); \ + const typeof(b) UNIQ_T(B, bq) = (b); \ + UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \ + }) + +#define MIN3(x, y, z) \ + ({ \ + const typeof(x) _c = MIN(x, y); \ + MIN(_c, z); \ + }) + +#define LESS_BY(a, b) __LESS_BY(UNIQ, (a), UNIQ, (b)) +#define __LESS_BY(aq, a, bq, b) \ + ({ \ + const typeof(a) UNIQ_T(A, aq) = (a); \ + const typeof(b) UNIQ_T(B, bq) = (b); \ + UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) - UNIQ_T(B, bq) : 0; \ + }) + +#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b)) +#define __CMP(aq, a, bq, b) \ + ({ \ + const typeof(a) UNIQ_T(A, aq) = (a); \ + const typeof(b) UNIQ_T(B, bq) = (b); \ + UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 : \ + UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0; \ + }) + +#undef CLAMP +#define CLAMP(x, low, high) __CLAMP(UNIQ, (x), UNIQ, (low), UNIQ, (high)) +#define __CLAMP(xq, x, lowq, low, highq, high) \ + ({ \ + const typeof(x) UNIQ_T(X, xq) = (x); \ + const typeof(low) UNIQ_T(LOW, lowq) = (low); \ + const typeof(high) UNIQ_T(HIGH, highq) = (high); \ + UNIQ_T(X, xq) > UNIQ_T(HIGH, highq) ? \ + UNIQ_T(HIGH, highq) : \ + UNIQ_T(X, xq) < UNIQ_T(LOW, lowq) ? \ + UNIQ_T(LOW, lowq) : \ + UNIQ_T(X, xq); \ + }) + +/* [(x + y - 1) / y] suffers from an integer overflow, even though the + * computation should be possible in the given type. Therefore, we use + * [x / y + !!(x % y)]. Note that on "Real CPUs" a division returns both the + * quotient and the remainder, so both should be equally fast. */ +#define DIV_ROUND_UP(x, y) __DIV_ROUND_UP(UNIQ, (x), UNIQ, (y)) +#define __DIV_ROUND_UP(xq, x, yq, y) \ + ({ \ + const typeof(x) UNIQ_T(X, xq) = (x); \ + const typeof(y) UNIQ_T(Y, yq) = (y); \ + (UNIQ_T(X, xq) / UNIQ_T(Y, yq) + !!(UNIQ_T(X, xq) % UNIQ_T(Y, yq))); \ + }) + +#ifdef __COVERITY__ + +/* Use special definitions of assertion macros in order to prevent + * false positives of ASSERT_SIDE_EFFECT on Coverity static analyzer + * for uses of assert_se() and assert_return(). + * + * These definitions make expression go through a (trivial) function + * call to ensure they are not discarded. Also use ! or !! to ensure + * the boolean expressions are seen as such. + * + * This technique has been described and recommended in: + * https://community.synopsys.com/s/question/0D534000046Yuzb/suppressing-assertsideeffect-for-functions-that-allow-for-sideeffects + */ + +extern void __coverity_panic__(void); + +static inline int __coverity_check__(int condition) { + return condition; +} + +#define assert_message_se(expr, message) \ + do { \ + if (__coverity_check__(!(expr))) \ + __coverity_panic__(); \ + } while (false) + +#define assert_log(expr, message) __coverity_check__(!!(expr)) + +#else /* ! __COVERITY__ */ + +#define assert_message_se(expr, message) \ + do { \ + if (_unlikely_(!(expr))) \ + log_assert_failed(message, __FILE__, __LINE__, __PRETTY_FUNCTION__); \ + } while (false) + +#define assert_log(expr, message) ((_likely_(expr)) \ + ? (true) \ + : (log_assert_failed_return(message, __FILE__, __LINE__, __PRETTY_FUNCTION__), false)) + +#endif /* __COVERITY__ */ + +#define assert_se(expr) assert_message_se(expr, #expr) + +/* We override the glibc assert() here. */ +#undef assert +#ifdef NDEBUG +#define assert(expr) do {} while (false) +#else +#define assert(expr) assert_message_se(expr, #expr) +#endif + +#define assert_not_reached(t) \ + do { \ + log_assert_failed_unreachable(t, __FILE__, __LINE__, __PRETTY_FUNCTION__); \ + } while (false) + +#if defined(static_assert) +#define assert_cc(expr) \ + static_assert(expr, #expr); +#else +#define assert_cc(expr) \ + struct CONCATENATE(_assert_struct_, __COUNTER__) { \ + char x[(expr) ? 0 : -1]; \ + }; +#endif + +#define assert_return(expr, r) \ + do { \ + if (!assert_log(expr, #expr)) \ + return (r); \ + } while (false) + +#define assert_return_errno(expr, r, err) \ + do { \ + if (!assert_log(expr, #expr)) { \ + errno = err; \ + return (r); \ + } \ + } while (false) + +#define return_with_errno(r, err) \ + do { \ + errno = abs(err); \ + return r; \ + } while (false) + +#define PTR_TO_INT(p) ((int) ((intptr_t) (p))) +#define INT_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_UINT(p) ((unsigned) ((uintptr_t) (p))) +#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_LONG(p) ((long) ((intptr_t) (p))) +#define LONG_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_ULONG(p) ((unsigned long) ((uintptr_t) (p))) +#define ULONG_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_INT32(p) ((int32_t) ((intptr_t) (p))) +#define INT32_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_UINT32(p) ((uint32_t) ((uintptr_t) (p))) +#define UINT32_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_INT64(p) ((int64_t) ((intptr_t) (p))) +#define INT64_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_UINT64(p) ((uint64_t) ((uintptr_t) (p))) +#define UINT64_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_SIZE(p) ((size_t) ((uintptr_t) (p))) +#define SIZE_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define CHAR_TO_STR(x) ((char[2]) { x, 0 }) + +#define char_array_0(x) x[sizeof(x)-1] = 0; + +/* Returns the number of chars needed to format variables of the + * specified type as a decimal string. Adds in extra space for a + * negative '-' prefix (hence works correctly on signed + * types). Includes space for the trailing NUL. */ +#define DECIMAL_STR_MAX(type) \ + (2+(sizeof(type) <= 1 ? 3 : \ + sizeof(type) <= 2 ? 5 : \ + sizeof(type) <= 4 ? 10 : \ + sizeof(type) <= 8 ? 20 : sizeof(int[-2*(sizeof(type) > 8)]))) + +#define DECIMAL_STR_WIDTH(x) \ + ({ \ + typeof(x) _x_ = (x); \ + unsigned ans = 1; \ + while ((_x_ /= 10) != 0) \ + ans++; \ + ans; \ + }) + +#define SET_FLAG(v, flag, b) \ + (v) = (b) ? ((v) | (flag)) : ((v) & ~(flag)) +#define FLAGS_SET(v, flags) \ + ((~(v) & (flags)) == 0) + +#define CASE_F(X) case X: +#define CASE_F_1(CASE, X) CASE_F(X) +#define CASE_F_2(CASE, X, ...) CASE(X) CASE_F_1(CASE, __VA_ARGS__) +#define CASE_F_3(CASE, X, ...) CASE(X) CASE_F_2(CASE, __VA_ARGS__) +#define CASE_F_4(CASE, X, ...) CASE(X) CASE_F_3(CASE, __VA_ARGS__) +#define CASE_F_5(CASE, X, ...) CASE(X) CASE_F_4(CASE, __VA_ARGS__) +#define CASE_F_6(CASE, X, ...) CASE(X) CASE_F_5(CASE, __VA_ARGS__) +#define CASE_F_7(CASE, X, ...) CASE(X) CASE_F_6(CASE, __VA_ARGS__) +#define CASE_F_8(CASE, X, ...) CASE(X) CASE_F_7(CASE, __VA_ARGS__) +#define CASE_F_9(CASE, X, ...) CASE(X) CASE_F_8(CASE, __VA_ARGS__) +#define CASE_F_10(CASE, X, ...) CASE(X) CASE_F_9(CASE, __VA_ARGS__) +#define CASE_F_11(CASE, X, ...) CASE(X) CASE_F_10(CASE, __VA_ARGS__) +#define CASE_F_12(CASE, X, ...) CASE(X) CASE_F_11(CASE, __VA_ARGS__) +#define CASE_F_13(CASE, X, ...) CASE(X) CASE_F_12(CASE, __VA_ARGS__) +#define CASE_F_14(CASE, X, ...) CASE(X) CASE_F_13(CASE, __VA_ARGS__) +#define CASE_F_15(CASE, X, ...) CASE(X) CASE_F_14(CASE, __VA_ARGS__) +#define CASE_F_16(CASE, X, ...) CASE(X) CASE_F_15(CASE, __VA_ARGS__) +#define CASE_F_17(CASE, X, ...) CASE(X) CASE_F_16(CASE, __VA_ARGS__) +#define CASE_F_18(CASE, X, ...) CASE(X) CASE_F_17(CASE, __VA_ARGS__) +#define CASE_F_19(CASE, X, ...) CASE(X) CASE_F_18(CASE, __VA_ARGS__) +#define CASE_F_20(CASE, X, ...) CASE(X) CASE_F_19(CASE, __VA_ARGS__) + +#define GET_CASE_F(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,NAME,...) NAME +#define FOR_EACH_MAKE_CASE(...) \ + GET_CASE_F(__VA_ARGS__,CASE_F_20,CASE_F_19,CASE_F_18,CASE_F_17,CASE_F_16,CASE_F_15,CASE_F_14,CASE_F_13,CASE_F_12,CASE_F_11, \ + CASE_F_10,CASE_F_9,CASE_F_8,CASE_F_7,CASE_F_6,CASE_F_5,CASE_F_4,CASE_F_3,CASE_F_2,CASE_F_1) \ + (CASE_F,__VA_ARGS__) + +#define IN_SET(x, ...) \ + ({ \ + bool _found = false; \ + /* If the build breaks in the line below, you need to extend the case macros. (We use "long double" as \ + * type for the array, in the hope that checkers such as ubsan don't complain that the initializers for \ + * the array are not representable by the base type. Ideally we'd use typeof(x) as base type, but that \ + * doesn't work, as we want to use this on bitfields and gcc refuses typeof() on bitfields.) */ \ + assert_cc((sizeof((long double[]){__VA_ARGS__})/sizeof(long double)) <= 20); \ + switch(x) { \ + FOR_EACH_MAKE_CASE(__VA_ARGS__) \ + _found = true; \ + break; \ + default: \ + break; \ + } \ + _found; \ + }) + +#define SWAP_TWO(x, y) do { \ + typeof(x) _t = (x); \ + (x) = (y); \ + (y) = (_t); \ + } while (false) + +/* Define C11 thread_local attribute even on older gcc compiler + * version */ +#ifndef thread_local +/* + * Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__ + * see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769 + */ +#if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16)) +#define thread_local _Thread_local +#else +#define thread_local __thread +#endif +#endif + +#define DEFINE_TRIVIAL_DESTRUCTOR(name, type, func) \ + static inline void name(type *p) { \ + func(p); \ + } + +#define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \ + static inline void func##p(type *p) { \ + if (*p) \ + func(*p); \ + } + +#define _DEFINE_TRIVIAL_REF_FUNC(type, name, scope) \ + scope type *name##_ref(type *p) { \ + if (!p) \ + return NULL; \ + \ + assert(p->n_ref > 0); \ + p->n_ref++; \ + return p; \ + } + +#define _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, scope) \ + scope type *name##_unref(type *p) { \ + if (!p) \ + return NULL; \ + \ + assert(p->n_ref > 0); \ + p->n_ref--; \ + if (p->n_ref > 0) \ + return NULL; \ + \ + return free_func(p); \ + } + +#define DEFINE_TRIVIAL_REF_FUNC(type, name) \ + _DEFINE_TRIVIAL_REF_FUNC(type, name,) +#define DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name) \ + _DEFINE_TRIVIAL_REF_FUNC(type, name, static) +#define DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name) \ + _DEFINE_TRIVIAL_REF_FUNC(type, name, _public_) + +#define DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func) \ + _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func,) +#define DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func) \ + _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, static) +#define DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func) \ + _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, _public_) + +#define DEFINE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_TRIVIAL_REF_FUNC(type, name); \ + DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func); + +#define DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name); \ + DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func); + +#define DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name); \ + DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func); + +#include "log.h" diff --git a/src/basic/memfd-util.c b/src/basic/memfd-util.c new file mode 100644 index 0000000..f88f0fc --- /dev/null +++ b/src/basic/memfd-util.c @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> +#if HAVE_LINUX_MEMFD_H +#include <linux/memfd.h> +#endif +#include <stdio.h> +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "macro.h" +#include "memfd-util.h" +#include "missing.h" +#include "string-util.h" +#include "utf8.h" + +int memfd_new(const char *name) { + _cleanup_free_ char *g = NULL; + int fd; + + if (!name) { + char pr[17] = {}; + + /* If no name is specified we generate one. We include + * a hint indicating our library implementation, and + * add the thread name to it */ + + assert_se(prctl(PR_GET_NAME, (unsigned long) pr) >= 0); + + if (isempty(pr)) + name = "sd"; + else { + _cleanup_free_ char *e = NULL; + + e = utf8_escape_invalid(pr); + if (!e) + return -ENOMEM; + + g = strappend("sd-", e); + if (!g) + return -ENOMEM; + + name = g; + } + } + + fd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); + if (fd < 0) + return -errno; + + return fd; +} + +int memfd_map(int fd, uint64_t offset, size_t size, void **p) { + void *q; + int sealed; + + assert(fd >= 0); + assert(size > 0); + assert(p); + + sealed = memfd_get_sealed(fd); + if (sealed < 0) + return sealed; + + if (sealed) + q = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, offset); + else + q = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); + + if (q == MAP_FAILED) + return -errno; + + *p = q; + return 0; +} + +int memfd_set_sealed(int fd) { + int r; + + assert(fd >= 0); + + r = fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL); + if (r < 0) + return -errno; + + return 0; +} + +int memfd_get_sealed(int fd) { + int r; + + assert(fd >= 0); + + r = fcntl(fd, F_GET_SEALS); + if (r < 0) + return -errno; + + return r == (F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL); +} + +int memfd_get_size(int fd, uint64_t *sz) { + struct stat stat; + int r; + + assert(fd >= 0); + assert(sz); + + r = fstat(fd, &stat); + if (r < 0) + return -errno; + + *sz = stat.st_size; + return 0; +} + +int memfd_set_size(int fd, uint64_t sz) { + int r; + + assert(fd >= 0); + + r = ftruncate(fd, sz); + if (r < 0) + return -errno; + + return 0; +} + +int memfd_new_and_map(const char *name, size_t sz, void **p) { + _cleanup_close_ int fd = -1; + int r; + + assert(sz > 0); + assert(p); + + fd = memfd_new(name); + if (fd < 0) + return fd; + + r = memfd_set_size(fd, sz); + if (r < 0) + return r; + + r = memfd_map(fd, 0, sz, p); + if (r < 0) + return r; + + return TAKE_FD(fd); +} diff --git a/src/basic/memfd-util.h b/src/basic/memfd-util.h new file mode 100644 index 0000000..5ebb519 --- /dev/null +++ b/src/basic/memfd-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> + +int memfd_new(const char *name); +int memfd_new_and_map(const char *name, size_t sz, void **p); + +int memfd_map(int fd, uint64_t offset, size_t size, void **p); + +int memfd_set_sealed(int fd); +int memfd_get_sealed(int fd); + +int memfd_get_size(int fd, uint64_t *sz); +int memfd_set_size(int fd, uint64_t sz); diff --git a/src/basic/mempool.c b/src/basic/mempool.c new file mode 100644 index 0000000..159c963 --- /dev/null +++ b/src/basic/mempool.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdint.h> +#include <stdlib.h> + +#include "env-util.h" +#include "macro.h" +#include "mempool.h" +#include "process-util.h" +#include "util.h" + +struct pool { + struct pool *next; + size_t n_tiles; + size_t n_used; +}; + +void* mempool_alloc_tile(struct mempool *mp) { + size_t i; + + /* When a tile is released we add it to the list and simply + * place the next pointer at its offset 0. */ + + assert(mp->tile_size >= sizeof(void*)); + assert(mp->at_least > 0); + + if (mp->freelist) { + void *r; + + r = mp->freelist; + mp->freelist = * (void**) mp->freelist; + return r; + } + + if (_unlikely_(!mp->first_pool) || + _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) { + size_t size, n; + struct pool *p; + + n = mp->first_pool ? mp->first_pool->n_tiles : 0; + n = MAX(mp->at_least, n * 2); + size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size); + n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size; + + p = malloc(size); + if (!p) + return NULL; + + p->next = mp->first_pool; + p->n_tiles = n; + p->n_used = 0; + + mp->first_pool = p; + } + + i = mp->first_pool->n_used++; + + return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size; +} + +void* mempool_alloc0_tile(struct mempool *mp) { + void *p; + + p = mempool_alloc_tile(mp); + if (p) + memzero(p, mp->tile_size); + return p; +} + +void mempool_free_tile(struct mempool *mp, void *p) { + * (void**) p = mp->freelist; + mp->freelist = p; +} + +bool mempool_enabled(void) { + static int b = -1; + + if (!is_main_thread()) + return false; + + if (!mempool_use_allowed) + b = false; + if (b < 0) + b = getenv_bool("SYSTEMD_MEMPOOL") != 0; + + return b; +} + +#if VALGRIND +void mempool_drop(struct mempool *mp) { + struct pool *p = mp->first_pool; + while (p) { + struct pool *n; + n = p->next; + free(p); + p = n; + } +} +#endif diff --git a/src/basic/mempool.h b/src/basic/mempool.h new file mode 100644 index 0000000..0eecca0 --- /dev/null +++ b/src/basic/mempool.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> + +struct pool; + +struct mempool { + struct pool *first_pool; + void *freelist; + size_t tile_size; + unsigned at_least; +}; + +void* mempool_alloc_tile(struct mempool *mp); +void* mempool_alloc0_tile(struct mempool *mp); +void mempool_free_tile(struct mempool *mp, void *p); + +#define DEFINE_MEMPOOL(pool_name, tile_type, alloc_at_least) \ +static struct mempool pool_name = { \ + .tile_size = sizeof(tile_type), \ + .at_least = alloc_at_least, \ +} + +extern const bool mempool_use_allowed; +bool mempool_enabled(void); + +#if VALGRIND +void mempool_drop(struct mempool *mp); +#endif diff --git a/src/basic/meson.build b/src/basic/meson.build new file mode 100644 index 0000000..e5852f3 --- /dev/null +++ b/src/basic/meson.build @@ -0,0 +1,310 @@ +# SPDX-License-Identifier: LGPL-2.1+ + +basic_sources = files(''' + MurmurHash2.c + MurmurHash2.h + af-list.c + af-list.h + alloc-util.c + alloc-util.h + architecture.c + architecture.h + arphrd-list.c + arphrd-list.h + async.c + async.h + audit-util.c + audit-util.h + blockdev-util.c + blockdev-util.h + btrfs-util.c + btrfs-util.h + build.h + bus-label.c + bus-label.h + cap-list.c + cap-list.h + capability-util.c + capability-util.h + cgroup-util.c + cgroup-util.h + chattr-util.c + chattr-util.h + conf-files.c + conf-files.h + copy.c + copy.h + def.h + device-nodes.c + device-nodes.h + dirent-util.c + dirent-util.h + env-file.c + env-file.h + env-util.c + env-util.h + errno-list.c + errno-list.h + escape.c + escape.h + ether-addr-util.c + ether-addr-util.h + extract-word.c + extract-word.h + fd-util.c + fd-util.h + fileio.c + fileio.h + format-util.h + fs-util.c + fs-util.h + glob-util.c + glob-util.h + gunicode.c + gunicode.h + hash-funcs.c + hash-funcs.h + hashmap.c + hashmap.h + hexdecoct.c + hexdecoct.h + hostname-util.c + hostname-util.h + in-addr-util.c + in-addr-util.h + io-util.c + io-util.h + ioprio.h + khash.c + khash.h + label.c + label.h + list.h + locale-util.c + locale-util.h + log.c + log.h + login-util.c + login-util.h + macro.h + memfd-util.c + memfd-util.h + mempool.c + mempool.h + missing.h + missing_audit.h + missing_btrfs.h + missing_btrfs_tree.h + missing_capability.h + missing_drm.h + missing_ethtool.h + missing_fcntl.h + missing_fib_rules.h + missing_fou.h + missing_fs.h + missing_if_bridge.h + missing_if_link.h + missing_if_tunnel.h + missing_input.h + missing_keyctl.h + missing_magic.h + missing_mman.h + missing_network.h + missing_prctl.h + missing_random.h + missing_resource.h + missing_sched.h + missing_securebits.h + missing_socket.h + missing_stat.h + missing_stdlib.h + missing_syscall.h + missing_timerfd.h + missing_type.h + missing_vxcan.h + mkdir-label.c + mkdir.c + mkdir.h + mountpoint-util.c + mountpoint-util.h + nss-util.h + ordered-set.c + ordered-set.h + parse-util.c + parse-util.h + path-util.c + path-util.h + prioq.c + prioq.h + proc-cmdline.c + proc-cmdline.h + process-util.c + process-util.h + procfs-util.c + procfs-util.h + random-util.c + random-util.h + ratelimit.c + ratelimit.h + raw-clone.h + raw-reboot.h + refcnt.h + replace-var.c + replace-var.h + rlimit-util.c + rlimit-util.h + rm-rf.c + rm-rf.h + selinux-util.c + selinux-util.h + set.h + sigbus.c + sigbus.h + signal-util.c + signal-util.h + siphash24.c + siphash24.h + smack-util.c + smack-util.h + socket-label.c + socket-util.c + socket-util.h + sparse-endian.h + special.h + stat-util.c + stat-util.h + static-destruct.h + stdio-util.h + strbuf.c + strbuf.h + string-table.c + string-table.h + string-util.c + string-util.h + strv.c + strv.h + strxcpyx.c + strxcpyx.h + syslog-util.c + syslog-util.h + terminal-util.c + terminal-util.h + time-util.c + time-util.h + tmpfile-util.c + tmpfile-util.h + umask-util.h + unaligned.h + unit-def.c + unit-def.h + unit-name.c + unit-name.h + user-util.c + user-util.h + utf8.c + utf8.h + util.c + util.h + virt.c + virt.h + xattr-util.c + xattr-util.h +'''.split()) + +missing_audit_h = files('missing_audit.h') +missing_capability_h = files('missing_capability.h') +missing_network_h = files('missing_network.h') +missing_socket_h = files('missing_socket.h') + +generate_af_list = find_program('generate-af-list.sh') +af_list_txt = custom_target( + 'af-list.txt', + output : 'af-list.txt', + command : [generate_af_list, cpp, config_h, missing_socket_h], + capture : true) + +generate_arphrd_list = find_program('generate-arphrd-list.sh') +arphrd_list_txt = custom_target( + 'arphrd-list.txt', + output : 'arphrd-list.txt', + command : [generate_arphrd_list, cpp, config_h, missing_network_h], + capture : true) + +generate_cap_list = find_program('generate-cap-list.sh') +cap_list_txt = custom_target( + 'cap-list.txt', + output : 'cap-list.txt', + command : [generate_cap_list, cpp, config_h, missing_capability_h], + capture : true) + +generate_errno_list = find_program('generate-errno-list.sh') +errno_list_txt = custom_target( + 'errno-list.txt', + output : 'errno-list.txt', + command : [generate_errno_list, cpp], + capture : true) + +generated_gperf_headers = [] +foreach item : [['af', af_list_txt, 'af', ''], + ['arphrd', arphrd_list_txt, 'arphrd', 'ARPHRD_'], + ['cap', cap_list_txt, 'capability', ''], + ['errno', errno_list_txt, 'errno', '']] + + fname = '@0@-from-name.gperf'.format(item[0]) + gperf_file = custom_target( + fname, + input : item[1], + output : fname, + command : [generate_gperfs, item[2], item[3], '@INPUT@'], + capture : true) + + fname = '@0@-from-name.h'.format(item[0]) + target1 = custom_target( + fname, + input : gperf_file, + output : fname, + command : [gperf, + '-L', 'ANSI-C', '-t', '--ignore-case', + '-N', 'lookup_@0@'.format(item[2]), + '-H', 'hash_@0@_name'.format(item[2]), + '-p', '-C', + '@INPUT@'], + capture : true) + + fname = '@0@-to-name.h'.format(item[0]) + awkscript = '@0@-to-name.awk'.format(item[0]) + target2 = custom_target( + fname, + input : [awkscript, item[1]], + output : fname, + command : [awk, '-f', '@INPUT0@', '@INPUT1@'], + capture : true) + + generated_gperf_headers += [target1, target2] +endforeach + +basic_sources += generated_gperf_headers +basic_gcrypt_sources = files( + 'gcrypt-util.c', + 'gcrypt-util.h') + +libbasic = static_library( + 'basic', + basic_sources, + include_directories : includes, + dependencies : [versiondep, + threads, + libcap, + libselinux, + libm], + c_args : ['-fvisibility=default'], + install : false) + +# A convenience library that is separate from libbasic to avoid +# unnecessary linking to libgcrypt. +libbasic_gcrypt = static_library( + 'basic-gcrypt', + basic_gcrypt_sources, + include_directories : includes, + dependencies : [libgcrypt], + c_args : ['-fvisibility=default']) diff --git a/src/basic/missing.h b/src/basic/missing.h new file mode 100644 index 0000000..5067c8f --- /dev/null +++ b/src/basic/missing.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* Missing glibc definitions to access certain kernel APIs */ + +#include "missing_audit.h" +#include "missing_btrfs_tree.h" +#include "missing_capability.h" +#include "missing_drm.h" +#include "missing_fcntl.h" +#include "missing_fs.h" +#include "missing_input.h" +#include "missing_magic.h" +#include "missing_mman.h" +#include "missing_network.h" +#include "missing_prctl.h" +#include "missing_random.h" +#include "missing_resource.h" +#include "missing_sched.h" +#include "missing_socket.h" +#include "missing_stdlib.h" +#include "missing_timerfd.h" +#include "missing_type.h" + +#include "missing_syscall.h" diff --git a/src/basic/missing_audit.h b/src/basic/missing_audit.h new file mode 100644 index 0000000..b00d537 --- /dev/null +++ b/src/basic/missing_audit.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/audit.h> + +#if HAVE_AUDIT +#include <libaudit.h> +#endif + +#ifndef AUDIT_SERVICE_START +#define AUDIT_SERVICE_START 1130 /* Service (daemon) start */ +#endif + +#ifndef AUDIT_SERVICE_STOP +#define AUDIT_SERVICE_STOP 1131 /* Service (daemon) stop */ +#endif + +#ifndef MAX_AUDIT_MESSAGE_LENGTH +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +#endif + +#ifndef AUDIT_NLGRP_MAX +#define AUDIT_NLGRP_READLOG 1 +#endif diff --git a/src/basic/missing_btrfs.h b/src/basic/missing_btrfs.h new file mode 100644 index 0000000..34c382f --- /dev/null +++ b/src/basic/missing_btrfs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* Old btrfs.h requires stddef.h to be included before btrfs.h */ +#include <stddef.h> + +#include <linux/btrfs.h> + +/* linux@57254b6ebce4ceca02d9c8b615f6059c56c19238 (3.11) */ +#ifndef BTRFS_IOC_QUOTA_RESCAN_WAIT +#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46) +#endif + +/* linux@83288b60bf6668933689078973136e0c9d387b38 (4.7) */ +#ifndef BTRFS_QGROUP_LIMIT_MAX_RFER +#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) +#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) +#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) +#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) +#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) +#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) +#endif diff --git a/src/basic/missing_btrfs_tree.h b/src/basic/missing_btrfs_tree.h new file mode 100644 index 0000000..555f90f --- /dev/null +++ b/src/basic/missing_btrfs_tree.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/types.h> + +#include "missing_btrfs.h" + +/* linux@db6711600e27c885aed89751f04e727f3af26715 (4.7) */ +#if HAVE_LINUX_BTRFS_TREE_H +#include <linux/btrfs_tree.h> +#else +#define BTRFS_ROOT_TREE_OBJECTID 1 +#define BTRFS_QUOTA_TREE_OBJECTID 8 +#define BTRFS_FIRST_FREE_OBJECTID 256 +#define BTRFS_LAST_FREE_OBJECTID -256ULL + +#define BTRFS_ROOT_ITEM_KEY 132 +#define BTRFS_ROOT_BACKREF_KEY 144 +#define BTRFS_QGROUP_STATUS_KEY 240 +#define BTRFS_QGROUP_INFO_KEY 242 +#define BTRFS_QGROUP_LIMIT_KEY 244 +#define BTRFS_QGROUP_RELATION_KEY 246 + +struct btrfs_disk_key { + __le64 objectid; + __u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_timespec { + __le64 sec; + __le32 nsec; +} __attribute__ ((__packed__)); + +struct btrfs_inode_item { + __le64 generation; + __le64 transid; + __le64 size; + __le64 nbytes; + __le64 block_group; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le64 rdev; + __le64 flags; + __le64 sequence; + __le64 reserved[4]; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; +} __attribute__ ((__packed__)); + +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + +struct btrfs_root_item { + struct btrfs_inode_item inode; + __le64 generation; + __le64 root_dirid; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; + __le64 last_snapshot; + __le64 flags; + __le32 refs; + struct btrfs_disk_key drop_progress; + __u8 drop_level; + __u8 level; + + __le64 generation_v2; + __u8 uuid[BTRFS_UUID_SIZE]; + __u8 parent_uuid[BTRFS_UUID_SIZE]; + __u8 received_uuid[BTRFS_UUID_SIZE]; + __le64 ctransid; /* updated when an inode changes */ + __le64 otransid; /* trans when created */ + __le64 stransid; /* trans when sent. non-zero for received subvol */ + __le64 rtransid; /* trans when received. non-zero for received subvol */ + struct btrfs_timespec ctime; + struct btrfs_timespec otime; + struct btrfs_timespec stime; + struct btrfs_timespec rtime; + __le64 reserved[8]; /* for future */ +} __attribute__ ((__packed__)); + +struct btrfs_root_ref { + __le64 dirid; + __le64 sequence; + __le16 name_len; +} __attribute__ ((__packed__)); + +#define BTRFS_QGROUP_LEVEL_SHIFT 48 + +struct btrfs_qgroup_info_item { + __le64 generation; + __le64 rfer; + __le64 rfer_cmpr; + __le64 excl; + __le64 excl_cmpr; +} __attribute__ ((__packed__)); + +struct btrfs_qgroup_limit_item { + __le64 flags; + __le64 max_rfer; + __le64 max_excl; + __le64 rsv_rfer; + __le64 rsv_excl; +} __attribute__ ((__packed__)); +#endif diff --git a/src/basic/missing_capability.h b/src/basic/missing_capability.h new file mode 100644 index 0000000..1308a3d --- /dev/null +++ b/src/basic/missing_capability.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/capability.h> + +/* 3a101b8de0d39403b2c7e5c23fd0b005668acf48 (3.16) */ +#ifndef CAP_AUDIT_READ +#define CAP_AUDIT_READ 37 + +#undef CAP_LAST_CAP +#define CAP_LAST_CAP CAP_AUDIT_READ +#endif diff --git a/src/basic/missing_drm.h b/src/basic/missing_drm.h new file mode 100644 index 0000000..a64f74e --- /dev/null +++ b/src/basic/missing_drm.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#ifndef DRM_IOCTL_SET_MASTER +#define DRM_IOCTL_SET_MASTER _IO('d', 0x1e) +#endif + +#ifndef DRM_IOCTL_DROP_MASTER +#define DRM_IOCTL_DROP_MASTER _IO('d', 0x1f) +#endif diff --git a/src/basic/missing_ethtool.h b/src/basic/missing_ethtool.h new file mode 100644 index 0000000..9ba929c --- /dev/null +++ b/src/basic/missing_ethtool.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/types.h> + +/* Missing definitions in ethtool.h */ + +#if !HAVE_ETHTOOL_LINK_MODE_10baseT_Half_BIT /* linux@3f1ac7a700d039c61d8d8b99f28d605d489a60cf (4.6) */ + +#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ +#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ + +struct ethtool_link_settings { + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u8 transceiver; + __u8 reserved1[3]; + __u32 reserved[7]; + __u32 link_mode_masks[0]; + /* layout of link_mode_masks fields: + * __u32 map_supported[link_mode_masks_nwords]; + * __u32 map_advertising[link_mode_masks_nwords]; + * __u32 map_lp_advertising[link_mode_masks_nwords]; + */ +}; + +enum ethtool_link_mode_bit_indices { + ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0, + ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1, + ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2, + ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5, + ETHTOOL_LINK_MODE_Autoneg_BIT = 6, + ETHTOOL_LINK_MODE_TP_BIT = 7, + ETHTOOL_LINK_MODE_AUI_BIT = 8, + ETHTOOL_LINK_MODE_MII_BIT = 9, + ETHTOOL_LINK_MODE_FIBRE_BIT = 10, + ETHTOOL_LINK_MODE_BNC_BIT = 11, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12, + ETHTOOL_LINK_MODE_Pause_BIT = 13, + ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14, + ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15, + ETHTOOL_LINK_MODE_Backplane_BIT = 16, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20, + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27, + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, + ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, + + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, + + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit + * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* + * macro for bits > 31. The only way to use indices > 31 is to + * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. + */ + + __ETHTOOL_LINK_MODE_LAST + = ETHTOOL_LINK_MODE_FEC_BASER_BIT, +}; +#else +#if !HAVE_ETHTOOL_LINK_MODE_25000baseCR_Full_BIT /* linux@3851112e4737cd52aaeda0ce8d084be9ee128106 (4.7) */ +#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 +#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 +#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 +#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 +#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 +#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 +#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 +#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 +#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 +#endif +#if !HAVE_ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT /* linux@89da45b8b5b2187734a11038b8593714f964ffd1 (4.8) */ +#define ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT 40 +#endif +#if !HAVE_ETHTOOL_LINK_MODE_1000baseX_Full_BIT /* linux@5711a98221443aec54c4c81ee98c6ae46acccb65 (4.9) */ +#define ETHTOOL_LINK_MODE_1000baseX_Full_BIT 41 +#define ETHTOOL_LINK_MODE_10000baseCR_Full_BIT 42 +#define ETHTOOL_LINK_MODE_10000baseSR_Full_BIT 43 +#define ETHTOOL_LINK_MODE_10000baseLR_Full_BIT 44 +#define ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT 45 +#define ETHTOOL_LINK_MODE_10000baseER_Full_BIT 46 +#endif +#if !HAVE_ETHTOOL_LINK_MODE_2500baseT_Full_BIT /* linux@94842b4fc4d6b1691cfc86c6f5251f299d27f4ba (4.10) */ +#define ETHTOOL_LINK_MODE_2500baseT_Full_BIT 47 +#define ETHTOOL_LINK_MODE_5000baseT_Full_BIT 48 +#endif +#if !HAVE_ETHTOOL_LINK_MODE_FEC_NONE_BIT /* linux@1a5f3da20bd966220931239fbd31e6ac6ff42251 (4.14) */ +#define ETHTOOL_LINK_MODE_FEC_NONE_BIT 49 +#define ETHTOOL_LINK_MODE_FEC_RS_BIT 50 +#define ETHTOOL_LINK_MODE_FEC_BASER_BIT 51 +#endif +#endif diff --git a/src/basic/missing_fcntl.h b/src/basic/missing_fcntl.h new file mode 100644 index 0000000..5d1c635 --- /dev/null +++ b/src/basic/missing_fcntl.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <fcntl.h> + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_SETPIPE_SZ +#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +#endif + +#ifndef F_GETPIPE_SZ +#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#ifndef F_OFD_GETLK +#define F_OFD_GETLK 36 +#define F_OFD_SETLK 37 +#define F_OFD_SETLKW 38 +#endif + +#ifndef MAX_HANDLE_SZ +#define MAX_HANDLE_SZ 128 +#endif + +/* The precise definition of __O_TMPFILE is arch specific; use the + * values defined by the kernel (note: some are hexa, some are octal, + * duplicated as-is from the kernel definitions): + * - alpha, parisc, sparc: each has a specific value; + * - others: they use the "generic" value. + */ + +#ifndef __O_TMPFILE +#if defined(__alpha__) +#define __O_TMPFILE 0100000000 +#elif defined(__parisc__) || defined(__hppa__) +#define __O_TMPFILE 0400000000 +#elif defined(__sparc__) || defined(__sparc64__) +#define __O_TMPFILE 0x2000000 +#else +#define __O_TMPFILE 020000000 +#endif +#endif + +/* a horrid kludge trying to make sure that this will fail on old kernels */ +#ifndef O_TMPFILE +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#endif diff --git a/src/basic/missing_fib_rules.h b/src/basic/missing_fib_rules.h new file mode 100644 index 0000000..df120d7 --- /dev/null +++ b/src/basic/missing_fib_rules.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/types.h> + +#if !HAVE_FRA_TUN_ID /* linux@e7030878fc8448492b6e5cecd574043f63271298 (4.3) */ +#define FRA_TUN_ID 12 +#endif + +#if !HAVE_FRA_SUPPRESS_PREFIXLEN /* linux@6ef94cfafba159d6b1a902ccb3349ac6a34ff6ad, 73f5698e77219bfc3ea1903759fe8e20ab5b285e (3.12) */ +#define FRA_SUPPRESS_IFGROUP 13 +#define FRA_SUPPRESS_PREFIXLEN 14 +#endif + +#if !HAVE_FRA_PAD /* linux@b46f6ded906ef0be52a4881ba50a084aeca64d7e (4.7) */ +#define FRA_PAD 18 +#endif + +#if !HAVE_FRA_L3MDEV /* linux@96c63fa7393d0a346acfe5a91e0c7d4c7782641b (4.8) */ +#define FRA_L3MDEV 19 +#endif + +#if !HAVE_FRA_UID_RANGE /* linux@622ec2c9d52405973c9f1ca5116eb1c393adfc7d (4.10) */ +#define FRA_UID_RANGE 20 + +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; +#endif + +#if !HAVE_FRA_DPORT_RANGE /* linux@1b71af6053af1bd2f849e9fda4f71c1e3f145dcf, bfff4862653bb96001ab57c1edd6d03f48e5f035 (4.17) */ +#define FRA_PROTOCOL 21 +#define FRA_IP_PROTO 22 +#define FRA_SPORT_RANGE 23 +#define FRA_DPORT_RANGE 24 + +#undef FRA_MAX +#define FRA_MAX 24 + +struct fib_rule_port_range { + __u16 start; + __u16 end; +}; +#endif diff --git a/src/basic/missing_fou.h b/src/basic/missing_fou.h new file mode 100644 index 0000000..d8c7435 --- /dev/null +++ b/src/basic/missing_fou.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if !HAVE_LINUX_FOU_H /* linux@23461551c00628c3f3fe9cf837bf53cf8f212b63 (3.18) */ + +#define FOU_GENL_NAME "fou" +#define FOU_GENL_VERSION 0x1 + +enum { + FOU_ATTR_UNSPEC, + FOU_ATTR_PORT, /* u16 */ + FOU_ATTR_AF, /* u8 */ + FOU_ATTR_IPPROTO, /* u8 */ + FOU_ATTR_TYPE, /* u8 */ + FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ + + __FOU_ATTR_MAX, +}; + +#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1) + +enum { + FOU_CMD_UNSPEC, + FOU_CMD_ADD, + FOU_CMD_DEL, + FOU_CMD_GET, + + __FOU_CMD_MAX, +}; + +enum { + FOU_ENCAP_UNSPEC, + FOU_ENCAP_DIRECT, + FOU_ENCAP_GUE, +}; + +#define FOU_CMD_MAX (__FOU_CMD_MAX - 1) + +#else + +#if !HAVE_FOU_ATTR_REMCSUM_NOPARTIAL /* linux@fe881ef11cf0220f118816181930494d484c4883 (4.0) */ +#define FOU_ATTR_REMCSUM_NOPARTIAL 5 + +#undef FOU_ATTR_MAX +#define FOU_ATTR_MAX 5 +#endif + +#if !HAVE_FOU_CMD_GET /* linux@7a6c8c34e5b71ac50e39588e20b39494a9e1d8e5 (4.1) */ +#define FOU_CMD_GET 3 + +#undef FOU_CMD_MAX +#define FOU_CMD_MAX 3 +#endif + +#endif diff --git a/src/basic/missing_fs.h b/src/basic/missing_fs.h new file mode 100644 index 0000000..48c1af0 --- /dev/null +++ b/src/basic/missing_fs.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* linux/fs.h */ +#ifndef RENAME_NOREPLACE /* 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c (3.15) */ +#define RENAME_NOREPLACE (1 << 0) +#endif + +/* linux/fs.h or sys/mount.h */ +#ifndef MS_MOVE +#define MS_MOVE 8192 +#endif + +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_PRIVATE +#define MS_PRIVATE (1<<18) +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1<<20) +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1<<21) +#endif + +#ifndef MS_KERNMOUNT +#define MS_KERNMOUNT (1<<22) +#endif + +#ifndef MS_I_VERSION +#define MS_I_VERSION (1<<23) +#endif + +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1<<24) +#endif + +#ifndef MS_LAZYTIME +#define MS_LAZYTIME (1<<25) +#endif + +/* Not exposed yet. Defined at fs/ext4/ext4.h */ +#ifndef EXT4_IOC_RESIZE_FS +#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) +#endif + +/* Not exposed yet. Defined at fs/cifs/cifsglob.h */ +#ifndef CIFS_MAGIC_NUMBER +#define CIFS_MAGIC_NUMBER 0xFF534D42 +#endif + +/* linux/nsfs.h */ +#ifndef NS_GET_NSTYPE /* d95fa3c76a66b6d76b1e109ea505c55e66360f3c (4.11) */ +#define NS_GET_NSTYPE _IO(0xb7, 0x3) +#endif diff --git a/src/basic/missing_if_bridge.h b/src/basic/missing_if_bridge.h new file mode 100644 index 0000000..9306062 --- /dev/null +++ b/src/basic/missing_if_bridge.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if !HAVE_IFLA_BRIDGE_VLAN_TUNNEL_INFO /* linux@b3c7ef0adadc5768e0baa786213c6bd1ce521a77 (4.11) */ +#define IFLA_BRIDGE_VLAN_TUNNEL_INFO 3 + +#undef IFLA_BRIDGE_MAX +#define IFLA_BRIDGE_MAX 3 +#endif + +#ifndef BRIDGE_VLAN_INFO_RANGE_BEGIN +#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */ +#endif + +#ifndef BRIDGE_VLAN_INFO_RANGE_END +#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */ +#endif + +#ifndef BRIDGE_VLAN_INFO_BRENTRY +#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */ +#endif diff --git a/src/basic/missing_if_link.h b/src/basic/missing_if_link.h new file mode 100644 index 0000000..761797f --- /dev/null +++ b/src/basic/missing_if_link.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if !HAVE_IFLA_INET6_ADDR_GEN_MODE /* linux@bc91b0f07ada5535427373a4e2050877bcc12218 (3.17) */ +#define IFLA_INET6_ADDR_GEN_MODE 8 + +#undef IFLA_INET6_MAX +#define IFLA_INET6_MAX 8 + +enum in6_addr_gen_mode { + IN6_ADDR_GEN_MODE_EUI64, + IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, +}; +#else +#if !HAVE_IN6_ADDR_GEN_MODE_STABLE_PRIVACY /* linux@622c81d57b392cc9be836670eb464a4dfaa9adfe (4.1) */ +#define IN6_ADDR_GEN_MODE_STABLE_PRIVACY 2 +#endif +#if !HAVE_IN6_ADDR_GEN_MODE_RANDOM /* linux@cc9da6cc4f56e05cc9e591459fe0192727ff58b3 (4.5) */ +#define IN6_ADDR_GEN_MODE_RANDOM 3 +#endif +#endif /* !HAVE_IFLA_INET6_ADDR_GEN_MODE */ + +#if !HAVE_IFLA_IPVLAN_MODE /* linux@2ad7bf3638411cb547f2823df08166c13ab04269 (3.19) */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + IFLA_IPVLAN_FLAGS, + __IFLA_IPVLAN_MAX +}; +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, + IPVLAN_MODE_MAX +}; +#else +#if !HAVE_IPVLAN_MODE_L3S /* linux@4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 (4.9) */ +#define IPVLAN_MODE_L3S 2 +#define IPVLAN_MODE_MAX 3 +#endif +#if !HAVE_IFLA_IPVLAN_FLAGS /* linux@a190d04db93710ae166749055b6985397c6d13f5 (4.15) */ +#define IFLA_IPVLAN_FLAGS 2 + +#undef IFLA_IPVLAN_MAX +#define IFLA_IPVLAN_MAX 2 +#endif +#endif /* !HAVE_IFLA_IPVLAN_MODE */ + +/* linux@a190d04db93710ae166749055b6985397c6d13f5 (4.15) */ +#ifndef IPVLAN_F_PRIVATE +#define IPVLAN_F_PRIVATE 0x01 +#endif + +/* linux@fe89aa6b250c1011ccf425fbb7998e96bd54263f (4.15) */ +#ifndef IPVLAN_F_VEPA +#define IPVLAN_F_VEPA 0x02 +#endif + +#if !HAVE_IFLA_PHYS_PORT_ID /* linux@66cae9ed6bc46b8cc57a9693f99f69926f3cc7ef (3.12) */ +#define IFLA_PHYS_PORT_ID 34 +#endif +#if !HAVE_IFLA_CARRIER_CHANGES /* linux@2d3b479df41a10e2f41f9259fcba775bd34de6e4 (3.15) */ +#define IFLA_CARRIER_CHANGES 35 +#endif +#if !HAVE_IFLA_PHYS_SWITCH_ID /* linux@82f2841291cfaf4d225aa1766424280254d3e3b2 (3.19) */ +#define IFLA_PHYS_SWITCH_ID 36 +#endif +#if !HAVE_IFLA_LINK_NETNSID /* linux@d37512a277dfb2cef8a578e25a3246f61399a55a (4.0) */ +#define IFLA_LINK_NETNSID 37 +#endif +#if !HAVE_IFLA_PHYS_PORT_NAME /* linux@db24a9044ee191c397dcd1c6574f56d67d7c8df5 (4.1) */ +#define IFLA_PHYS_PORT_NAME 38 +#endif +#if !HAVE_IFLA_PROTO_DOWN /* linux@88d6378bd6c096cb8440face3ae3f33d55a2e6e4 (4.3) */ +#define IFLA_PROTO_DOWN 39 +#endif +#if !HAVE_IFLA_GSO_MAX_SIZE /* linux@c70ce028e834f8e51306217dbdbd441d851c64d3 (4.6) */ +#define IFLA_GSO_MAX_SEGS 40 +#define IFLA_GSO_MAX_SIZE 41 +#endif +#if !HAVE_IFLA_PAD /* linux@18402843bf88c2e9674e1a3a05c73b7d9b09ee05 (4.7) */ +#define IFLA_PAD 42 +#endif +#if !HAVE_IFLA_XDP /* linux@d1fdd9138682e0f272beee0cb08b6328c5478b26 (4.8) */ +#define IFLA_XDP 43 +#endif +#if !HAVE_IFLA_EVENT /* linux@3d3ea5af5c0b382bc9d9aed378fd814fb5d4a011 (4.13) */ +#define IFLA_EVENT 44 +#endif +#if !HAVE_IFLA_IF_NETNSID /* linux@6621dd29eb9b5e6774ec7a9a75161352fdea47fc, 79e1ad148c844f5c8b9d76b36b26e3886dca95ae (4.15) */ +#define IFLA_IF_NETNSID 45 +#define IFLA_NEW_NETNSID 46 +#endif +#if !HAVE_IFLA_TARGET_NETNSID /* linux@19d8f1ad12fd746e60707a58d954980013c7a35a (4.20) */ +#define IFLA_TARGET_NETNSID IFLA_IF_NETNSID +#endif +#if !HAVE_IFLA_NEW_IFINDEX /* linux@b2d3bcfa26a7a8de41f358a6cae8b848673b3c6e, 38e01b30563a5b5ade7b54e5d739d16a2b02fe82 (4.16) */ +#define IFLA_CARRIER_UP_COUNT 47 +#define IFLA_CARRIER_DOWN_COUNT 48 +#define IFLA_NEW_IFINDEX 49 +#endif +#if !HAVE_IFLA_MAX_MTU /* linux@3e7a50ceb11ea75c27e944f1a01e478fd62a2d8d (4.19) */ +#define IFLA_MIN_MTU 50 +#define IFLA_MAX_MTU 51 + +#undef IFLA_MAX +#define IFLA_MAX 51 +#endif + +#if !HAVE_IFLA_BOND_MODE /* linux@90af231106c0b8d223c27d35464af95cb3d9cacf (3.13) */ +#define IFLA_BOND_MODE 1 +#endif +#if !HAVE_IFLA_BOND_ACTIVE_SLAVE /* linux@ec76aa49855f6d6fea5e01de179fb57dd47c619d (3.13) */ +#define IFLA_BOND_ACTIVE_SLAVE 2 +#endif +#if !HAVE_IFLA_BOND_AD_INFO /* linux@4ee7ac7526d4a9413cafa733d824edfe49fdcc46 (3.14) */ +#define IFLA_BOND_MIIMON 3 +#define IFLA_BOND_UPDELAY 4 +#define IFLA_BOND_DOWNDELAY 5 +#define IFLA_BOND_USE_CARRIER 6 +#define IFLA_BOND_ARP_INTERVAL 7 +#define IFLA_BOND_ARP_IP_TARGET 8 +#define IFLA_BOND_ARP_VALIDATE 9 +#define IFLA_BOND_ARP_ALL_TARGETS 10 +#define IFLA_BOND_PRIMARY 11 +#define IFLA_BOND_PRIMARY_RESELECT 12 +#define IFLA_BOND_FAIL_OVER_MAC 13 +#define IFLA_BOND_XMIT_HASH_POLICY 14 +#define IFLA_BOND_RESEND_IGMP 15 +#define IFLA_BOND_NUM_PEER_NOTIF 16 +#define IFLA_BOND_ALL_SLAVES_ACTIVE 17 +#define IFLA_BOND_MIN_LINKS 18 +#define IFLA_BOND_LP_INTERVAL 19 +#define IFLA_BOND_PACKETS_PER_SLAVE 20 +#define IFLA_BOND_AD_LACP_RATE 21 +#define IFLA_BOND_AD_SELECT 22 +#define IFLA_BOND_AD_INFO 23 +#endif +#if !HAVE_IFLA_BOND_AD_ACTOR_SYSTEM /* linux@171a42c38c6e1a5a076d6276e94e55a0b5b7868c (4.2) */ +#define IFLA_BOND_AD_ACTOR_SYS_PRIO 24 +#define IFLA_BOND_AD_USER_PORT_KEY 25 +#define IFLA_BOND_AD_ACTOR_SYSTEM 26 +#endif +#if !HAVE_IFLA_BOND_TLB_DYNAMIC_LB /* linux@0f7bffd9e512b77279bbce704fad3cb1d6887958 (4.3) */ +#define IFLA_BOND_TLB_DYNAMIC_LB 27 + +#undef IFLA_BOND_MAX +#define IFLA_BOND_MAX 27 +#endif + +#if !HAVE_IFLA_VXLAN_UDP_ZERO_CSUM6_RX /* linux@359a0ea9875ef4f32c8425bbe1ae348e1fd2ed2a (3.16) */ +#define IFLA_VXLAN_UDP_CSUM 18 +#define IFLA_VXLAN_UDP_ZERO_CSUM6_TX 19 +#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20 +#endif +#if !HAVE_IFLA_VXLAN_REMCSUM_NOPARTIAL /* linux@dfd8645ea1bd91277f841e74c33e1f4dbbede808..0ace2ca89cbd6bcdf2b9d2df1fa0fa24ea9d1653 (4.0) */ +#define IFLA_VXLAN_REMCSUM_TX 21 +#define IFLA_VXLAN_REMCSUM_RX 22 +#define IFLA_VXLAN_GBP 23 +#define IFLA_VXLAN_REMCSUM_NOPARTIAL 24 +#endif +#if !HAVE_IFLA_VXLAN_COLLECT_METADATA /* linux@f8a9b1bc1b238eed9987da747a0e52f5bb009980 (4.3) */ +#define IFLA_VXLAN_COLLECT_METADATA 25 +#endif +#if !HAVE_IFLA_VXLAN_LABEL /* linux@e7f70af111f086a20800ad2e17f544b2e3e0f375 (4.6) */ +#define IFLA_VXLAN_LABEL 26 +#endif +#if !HAVE_IFLA_VXLAN_GPE /* linux@e1e5314de08ba6003b358125eafc9ad9e75a950c (4.7) */ +#define IFLA_VXLAN_GPE 27 +#endif +#if !HAVE_IFLA_VXLAN_TTL_INHERIT /* linux@72f6d71e491e6ce269b564865b21fab0a4402dd3 (4.18) */ +#define IFLA_VXLAN_TTL_INHERIT 28 + +#undef IFLA_VXLAN_MAX +#define IFLA_VXLAN_MAX 28 +#endif + +#if !HAVE_IFLA_GENEVE_TOS /* linux@2d07dc79fe04a43d82a346ced6bbf07bdb523f1b..d89511251f6519599b109dc6cda87a6ab314ed8c (4.2) */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + IFLA_GENEVE_TTL, + IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, + IFLA_GENEVE_REMOTE6, + IFLA_GENEVE_UDP_CSUM, + IFLA_GENEVE_UDP_ZERO_CSUM6_TX, + IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) +#else +#if !HAVE_IFLA_GENEVE_COLLECT_METADATA /* linux@e305ac6cf5a1e1386aedce7ef9cb773635d5845c (4.3) */ +#define IFLA_GENEVE_PORT 5 +#define IFLA_GENEVE_COLLECT_METADATA 6 +#endif +#if !HAVE_IFLA_GENEVE_REMOTE6 /* linux@8ed66f0e8235118a31720acdab3bbbe9debd0f6a (4.4) */ +#define IFLA_GENEVE_REMOTE6 7 +#endif +#if !HAVE_IFLA_GENEVE_UDP_ZERO_CSUM6_RX /* linux@abe492b4f50c3ae2ebcfaa2f5c16176aebaa1c68 (4.5) */ +#define IFLA_GENEVE_UDP_CSUM 8 +#define IFLA_GENEVE_UDP_ZERO_CSUM6_TX 9 +#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10 +#endif +#if !HAVE_IFLA_GENEVE_LABEL /* linux@8eb3b99554b82da968d1fbc00df9f3156c5e2d63 (4.6) */ +#define IFLA_GENEVE_LABEL 11 +#endif +#if !HAVE_IFLA_GENEVE_TTL_INHERIT /* linux@52d0d404d39dd9eac71a181615d6ca15e23d8e38 (4.20) */ +#define IFLA_GENEVE_TTL_INHERIT 12 + +#undef IFLA_GENEVE_MAX +#define IFLA_GENEVE_MAX 12 +#endif +#endif + +#if !HAVE_IFLA_BR_MAX_AGE /* linux@e5c3ea5c668033b303e7ac835d7d91da32d97958 (3.18) */ +enum { + IFLA_BR_UNSPEC, + IFLA_BR_FORWARD_DELAY, + IFLA_BR_HELLO_TIME, + IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, + IFLA_BR_GROUP_FWD_MASK, + IFLA_BR_ROOT_ID, + IFLA_BR_BRIDGE_ID, + IFLA_BR_ROOT_PORT, + IFLA_BR_ROOT_PATH_COST, + IFLA_BR_TOPOLOGY_CHANGE, + IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + IFLA_BR_HELLO_TIMER, + IFLA_BR_TCN_TIMER, + IFLA_BR_TOPOLOGY_CHANGE_TIMER, + IFLA_BR_GC_TIMER, + IFLA_BR_GROUP_ADDR, + IFLA_BR_FDB_FLUSH, + IFLA_BR_MCAST_ROUTER, + IFLA_BR_MCAST_SNOOPING, + IFLA_BR_MCAST_QUERY_USE_IFADDR, + IFLA_BR_MCAST_QUERIER, + IFLA_BR_MCAST_HASH_ELASTICITY, + IFLA_BR_MCAST_HASH_MAX, + IFLA_BR_MCAST_LAST_MEMBER_CNT, + IFLA_BR_MCAST_STARTUP_QUERY_CNT, + IFLA_BR_MCAST_LAST_MEMBER_INTVL, + IFLA_BR_MCAST_MEMBERSHIP_INTVL, + IFLA_BR_MCAST_QUERIER_INTVL, + IFLA_BR_MCAST_QUERY_INTVL, + IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, + IFLA_BR_MCAST_STARTUP_QUERY_INTVL, + IFLA_BR_NF_CALL_IPTABLES, + IFLA_BR_NF_CALL_IP6TABLES, + IFLA_BR_NF_CALL_ARPTABLES, + IFLA_BR_VLAN_DEFAULT_PVID, + IFLA_BR_PAD, + IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, + __IFLA_BR_MAX, +}; + +#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) +#else +#if !HAVE_IFLA_BR_PRIORITY /* linux@af615762e972be0c66cf1d156ca4fac13b93c0b0 (4.1) */ +#define IFLA_BR_AGEING_TIME 4 +#define IFLA_BR_STP_STATE 5 +#define IFLA_BR_PRIORITY 6 +#endif +#if !HAVE_IFLA_BR_VLAN_PROTOCOL /* linux@a7854037da006a7472c48773e3190db55217ec9b, d2d427b3927bd7a0348fc7f323d0e291f79a2779 (4.3) */ +#define IFLA_BR_VLAN_FILTERING 7 +#define IFLA_BR_VLAN_PROTOCOL 8 +#endif +#if !HAVE_IFLA_BR_VLAN_DEFAULT_PVID /* linux@7910228b6bb35f3c8e0bc72a8d84c29616cb1b90..0f963b7592ef9e054974b6672b86ec1edd84b4bc (4.4) */ +#define IFLA_BR_GROUP_FWD_MASK 9 +#define IFLA_BR_ROOT_ID 10 +#define IFLA_BR_BRIDGE_ID 11 +#define IFLA_BR_ROOT_PORT 12 +#define IFLA_BR_ROOT_PATH_COST 13 +#define IFLA_BR_TOPOLOGY_CHANGE 14 +#define IFLA_BR_TOPOLOGY_CHANGE_DETECTED 15 +#define IFLA_BR_HELLO_TIMER 16 +#define IFLA_BR_TCN_TIMER 17 +#define IFLA_BR_TOPOLOGY_CHANGE_TIMER 18 +#define IFLA_BR_GC_TIMER 19 +#define IFLA_BR_GROUP_ADDR 20 +#define IFLA_BR_FDB_FLUSH 21 +#define IFLA_BR_MCAST_ROUTER 22 +#define IFLA_BR_MCAST_SNOOPING 23 +#define IFLA_BR_MCAST_QUERY_USE_IFADDR 24 +#define IFLA_BR_MCAST_QUERIER 25 +#define IFLA_BR_MCAST_HASH_ELASTICITY 26 +#define IFLA_BR_MCAST_HASH_MAX 27 +#define IFLA_BR_MCAST_LAST_MEMBER_CNT 28 +#define IFLA_BR_MCAST_STARTUP_QUERY_CNT 29 +#define IFLA_BR_MCAST_LAST_MEMBER_INTVL 30 +#define IFLA_BR_MCAST_MEMBERSHIP_INTVL 31 +#define IFLA_BR_MCAST_QUERIER_INTVL 32 +#define IFLA_BR_MCAST_QUERY_INTVL 33 +#define IFLA_BR_MCAST_QUERY_RESPONSE_INTVL 34 +#define IFLA_BR_MCAST_STARTUP_QUERY_INTVL 35 +#define IFLA_BR_NF_CALL_IPTABLES 36 +#define IFLA_BR_NF_CALL_IP6TABLES 37 +#define IFLA_BR_NF_CALL_ARPTABLES 38 +#define IFLA_BR_VLAN_DEFAULT_PVID 39 +#endif +#if !HAVE_IFLA_BR_VLAN_STATS_ENABLED /* linux@12a0faa3bd76157b9dc096758d6818ff535e4586, 6dada9b10a0818ba72c249526a742c8c41274a73 (4.7) */ +#define IFLA_BR_PAD 40 +#define IFLA_BR_VLAN_STATS_ENABLED 41 +#endif +#if !HAVE_IFLA_BR_MCAST_STATS_ENABLED /* linux@1080ab95e3c7bdd77870e209aff83c763fdcf439 (4.8) */ +#define IFLA_BR_MCAST_STATS_ENABLED 42 +#endif +#if !HAVE_IFLA_BR_MCAST_MLD_VERSION /* linux@5e9235853d652a295d5f56cb8652950b6b5bf56b, aa2ae3e71c74cc00ec22f133dc900b3817415785 (4.10) */ +#define IFLA_BR_MCAST_IGMP_VERSION 43 +#define IFLA_BR_MCAST_MLD_VERSION 44 +#endif +#if !HAVE_IFLA_BR_VLAN_STATS_PER_PORT /* linux@9163a0fc1f0c0980f117cc25f4fa6ba9b0750a36 (4.20) */ +#define IFLA_BR_VLAN_STATS_PER_PORT 45 + +#undef IFLA_BR_MAX +#define IFLA_BR_MAX 45 +#endif +#endif + +#if !HAVE_IFLA_BRPORT_LEARNING_SYNC /* linux@958501163ddd6ea22a98f94fa0e7ce6d4734e5c4, efacacdaf7cb5a0592ed772e3731636b2742e34a (3.19)*/ +#define IFLA_BRPORT_PROXYARP 10 +#define IFLA_BRPORT_LEARNING_SYNC 11 +#endif +#if !HAVE_IFLA_BRPORT_PROXYARP_WIFI /* linux@842a9ae08a25671db3d4f689eed68b4d64be15b5 (4.1) */ +#define IFLA_BRPORT_PROXYARP_WIFI 12 +#endif +#if !HAVE_IFLA_BRPORT_MULTICAST_ROUTER /* linux@4ebc7660ab4559cad10b6595e05f70562bb26dc5..5d6ae479ab7ddf77bb22bdf739268581453ff886 (4.4) */ +#define IFLA_BRPORT_ROOT_ID 13 +#define IFLA_BRPORT_BRIDGE_ID 14 +#define IFLA_BRPORT_DESIGNATED_PORT 15 +#define IFLA_BRPORT_DESIGNATED_COST 16 +#define IFLA_BRPORT_ID 17 +#define IFLA_BRPORT_NO 18 +#define IFLA_BRPORT_TOPOLOGY_CHANGE_ACK 19 +#define IFLA_BRPORT_CONFIG_PENDING 20 +#define IFLA_BRPORT_MESSAGE_AGE_TIMER 21 +#define IFLA_BRPORT_FORWARD_DELAY_TIMER 22 +#define IFLA_BRPORT_HOLD_TIMER 23 +#define IFLA_BRPORT_FLUSH 24 +#define IFLA_BRPORT_MULTICAST_ROUTER 25 +#endif +#if !HAVE_IFLA_BRPORT_PAD /* linux@12a0faa3bd76157b9dc096758d6818ff535e4586 (4.7) */ +#define IFLA_BRPORT_PAD 26 +#endif +#if !HAVE_IFLA_BRPORT_MCAST_FLOOD /* linux@b6cb5ac8331b6bcfe9ce38c7f7f58db6e1d6270a (4.9) */ +#define IFLA_BRPORT_MCAST_FLOOD 27 +#endif +#if !HAVE_IFLA_BRPORT_VLAN_TUNNEL /* linux@6db6f0eae6052b70885562e1733896647ec1d807, b3c7ef0adadc5768e0baa786213c6bd1ce521a77 (4.11) */ +#define IFLA_BRPORT_MCAST_TO_UCAST 28 +#define IFLA_BRPORT_VLAN_TUNNEL 29 +#endif +#if !HAVE_IFLA_BRPORT_BCAST_FLOOD /* linux@99f906e9ad7b6e79ffeda30f45906a8448b9d6a2 (4.12) */ +#define IFLA_BRPORT_BCAST_FLOOD 30 +#endif +#if !HAVE_IFLA_BRPORT_NEIGH_SUPPRESS /* linux@5af48b59f35cf712793badabe1a574a0d0ce3bd3, 821f1b21cabb46827ce39ddf82e2789680b5042a (4.15) */ +#define IFLA_BRPORT_GROUP_FWD_MASK 31 +#define IFLA_BRPORT_NEIGH_SUPPRESS 32 +#endif +#if !HAVE_IFLA_BRPORT_ISOLATED /* linux@7d850abd5f4edb1b1ca4b4141a4453305736f564 (4.18) */ +#define IFLA_BRPORT_ISOLATED 33 +#endif +#if !HAVE_IFLA_BRPORT_BACKUP_PORT /* linux@2756f68c314917d03eb348084edb08bb929139d9 (4.19) */ +#define IFLA_BRPORT_BACKUP_PORT 34 + +#undef IFLA_BRPORT_MAX +#define IFLA_BRPORT_MAX 34 +#endif + +#if !HAVE_IFLA_VRF_TABLE /* linux@4e3c89920cd3a6cfce22c6f537690747c26128dd (4.3) */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) +#endif diff --git a/src/basic/missing_if_tunnel.h b/src/basic/missing_if_tunnel.h new file mode 100644 index 0000000..f51fdd1 --- /dev/null +++ b/src/basic/missing_if_tunnel.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if !HAVE_IFLA_VTI_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */ +#define IFLA_VTI_FWMARK 6 + +#undef IFLA_VTI_MAX +#define IFLA_VTI_MAX 6 +#endif + +#if !HAVE_IFLA_IPTUN_ENCAP_DPORT /* linux@56328486539ddd07cbaafec7a542a2c8a3043623 (3.18)*/ +#define IFLA_IPTUN_ENCAP_TYPE 15 +#define IFLA_IPTUN_ENCAP_FLAGS 16 +#define IFLA_IPTUN_ENCAP_SPORT 17 +#define IFLA_IPTUN_ENCAP_DPORT 18 +#endif + +#if !HAVE_IFLA_IPTUN_COLLECT_METADATA /* linux@cfc7381b3002756b1dcada32979e942aa3126e31 (4.9) */ +#define IFLA_IPTUN_COLLECT_METADATA 19 +#endif + +#if !HAVE_IFLA_IPTUN_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */ +#define IFLA_IPTUN_FWMARK 20 + +#undef IFLA_IPTUN_MAX +#define IFLA_IPTUN_MAX 20 +#endif + +#if !HAVE_IFLA_GRE_ENCAP_DPORT /* linux@4565e9919cda747815547e2e5d7b78f15efbffdf (3.18) */ +#define IFLA_GRE_ENCAP_TYPE 14 +#define IFLA_GRE_ENCAP_FLAGS 15 +#define IFLA_GRE_ENCAP_SPORT 16 +#define IFLA_GRE_ENCAP_DPORT 17 +#endif + +#if !HAVE_IFLA_GRE_COLLECT_METADATA /* linux@2e15ea390e6f4466655066d97e22ec66870a042c (4.3) */ +#define IFLA_GRE_COLLECT_METADATA 18 +#endif + +#if !HAVE_IFLA_GRE_IGNORE_DF /* linux@22a59be8b7693eb2d0897a9638f5991f2f8e4ddd (4.8) */ +#define IFLA_GRE_IGNORE_DF 19 +#endif + +#if !HAVE_IFLA_GRE_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */ +#define IFLA_GRE_FWMARK 20 +#endif + +#if !HAVE_IFLA_GRE_ERSPAN_INDEX /* linux@84e54fe0a5eaed696dee4019c396f8396f5a908b (4.14) */ +#define IFLA_GRE_ERSPAN_INDEX 21 +#endif + +#if !HAVE_IFLA_GRE_ERSPAN_HWID /* linux@f551c91de262ba36b20c3ac19538afb4f4507441 (4.16) */ +#define IFLA_GRE_ERSPAN_VER 22 +#define IFLA_GRE_ERSPAN_DIR 23 +#define IFLA_GRE_ERSPAN_HWID 24 + +#undef IFLA_GRE_MAX +#define IFLA_GRE_MAX 24 +#endif diff --git a/src/basic/missing_input.h b/src/basic/missing_input.h new file mode 100644 index 0000000..b91ccb6 --- /dev/null +++ b/src/basic/missing_input.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/input.h> +#include <linux/types.h> + +/* linux@c7dc65737c9a607d3e6f8478659876074ad129b8 (3.12) */ +#ifndef EVIOCREVOKE +#define EVIOCREVOKE _IOW('E', 0x91, int) +#endif + +/* linux@06a16293f71927f756dcf37558a79c0b05a91641 (4.4) */ +#ifndef EVIOCSMASK +struct input_mask { + __u32 type; + __u32 codes_size; + __u64 codes_ptr; +}; + +#define EVIOCGMASK _IOR('E', 0x92, struct input_mask) +#define EVIOCSMASK _IOW('E', 0x93, struct input_mask) +#endif + +/* linux@7611392fe8ff95ecae528b01a815ae3d72ca6b95 (3.17) */ +#ifndef INPUT_PROP_POINTING_STICK +#define INPUT_PROP_POINTING_STICK 0x05 +#endif + +/* linux@500d4160abe9a2e88b12e319c13ae3ebd1e18108 (4.0) */ +#ifndef INPUT_PROP_ACCELEROMETER +#define INPUT_PROP_ACCELEROMETER 0x06 +#endif + +/* linux@d09bbfd2a8408a995419dff0d2ba906013cf4cc9 (3.11) */ +#ifndef BTN_DPAD_UP +#define BTN_DPAD_UP 0x220 +#define BTN_DPAD_DOWN 0x221 +#define BTN_DPAD_LEFT 0x222 +#define BTN_DPAD_RIGHT 0x223 +#endif + +/* linux@358f24704f2f016af7d504b357cdf32606091d07 (3.13) */ +#ifndef KEY_ALS_TOGGLE +#define KEY_ALS_TOGGLE 0x230 +#endif diff --git a/src/basic/missing_keyctl.h b/src/basic/missing_keyctl.h new file mode 100644 index 0000000..7eb7095 --- /dev/null +++ b/src/basic/missing_keyctl.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/keyctl.h> + +#ifndef KEYCTL_JOIN_SESSION_KEYRING +#define KEYCTL_JOIN_SESSION_KEYRING 1 +#endif + +#ifndef KEYCTL_CHOWN +#define KEYCTL_CHOWN 4 +#endif + +#ifndef KEYCTL_SETPERM +#define KEYCTL_SETPERM 5 +#endif + +#ifndef KEYCTL_DESCRIBE +#define KEYCTL_DESCRIBE 6 +#endif + +#ifndef KEYCTL_LINK +#define KEYCTL_LINK 8 +#endif + +#ifndef KEYCTL_READ +#define KEYCTL_READ 11 +#endif + +#ifndef KEYCTL_SET_TIMEOUT +#define KEYCTL_SET_TIMEOUT 15 +#endif + +#ifndef KEY_SPEC_USER_KEYRING +#define KEY_SPEC_USER_KEYRING -4 +#endif + +#ifndef KEY_SPEC_SESSION_KEYRING +#define KEY_SPEC_SESSION_KEYRING -3 +#endif + +/* From linux/key.h */ +#ifndef KEY_POS_VIEW + +typedef int32_t key_serial_t; + +#define KEY_POS_VIEW 0x01000000 +#define KEY_POS_READ 0x02000000 +#define KEY_POS_WRITE 0x04000000 +#define KEY_POS_SEARCH 0x08000000 +#define KEY_POS_LINK 0x10000000 +#define KEY_POS_SETATTR 0x20000000 +#define KEY_POS_ALL 0x3f000000 + +#define KEY_USR_VIEW 0x00010000 +#define KEY_USR_READ 0x00020000 +#define KEY_USR_WRITE 0x00040000 +#define KEY_USR_SEARCH 0x00080000 +#define KEY_USR_LINK 0x00100000 +#define KEY_USR_SETATTR 0x00200000 +#define KEY_USR_ALL 0x003f0000 + +#define KEY_GRP_VIEW 0x00000100 +#define KEY_GRP_READ 0x00000200 +#define KEY_GRP_WRITE 0x00000400 +#define KEY_GRP_SEARCH 0x00000800 +#define KEY_GRP_LINK 0x00001000 +#define KEY_GRP_SETATTR 0x00002000 +#define KEY_GRP_ALL 0x00003f00 + +#define KEY_OTH_VIEW 0x00000001 +#define KEY_OTH_READ 0x00000002 +#define KEY_OTH_WRITE 0x00000004 +#define KEY_OTH_SEARCH 0x00000008 +#define KEY_OTH_LINK 0x00000010 +#define KEY_OTH_SETATTR 0x00000020 +#define KEY_OTH_ALL 0x0000003f +#endif diff --git a/src/basic/missing_magic.h b/src/basic/missing_magic.h new file mode 100644 index 0000000..4910cd3 --- /dev/null +++ b/src/basic/missing_magic.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/magic.h> + +/* 62aa81d7c4c24b90fdb61da70ac0dbbc414f9939 (4.13) */ +#ifndef OCFS2_SUPER_MAGIC +#define OCFS2_SUPER_MAGIC 0x7461636f +#endif + +/* 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff (4.5) */ +#ifndef CGROUP2_SUPER_MAGIC +#define CGROUP2_SUPER_MAGIC 0x63677270 +#endif + +/* 4282d60689d4f21b40692029080440cc58e8a17d (4.1) */ +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +/* e149ed2b805fefdccf7ccdfc19eca22fdd4514ac (3.19) */ +#ifndef NSFS_MAGIC +#define NSFS_MAGIC 0x6e736673 +#endif + +/* b2197755b2633e164a439682fb05a9b5ea48f706 (4.4) */ +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + +/* Not exposed yet (4.20). Defined at ipc/mqueue.c */ +#ifndef MQUEUE_MAGIC +#define MQUEUE_MAGIC 0x19800202 +#endif diff --git a/src/basic/missing_mman.h b/src/basic/missing_mman.h new file mode 100644 index 0000000..7ff12f7 --- /dev/null +++ b/src/basic/missing_mman.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/mman.h> + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif diff --git a/src/basic/missing_network.h b/src/basic/missing_network.h new file mode 100644 index 0000000..59a8cd2 --- /dev/null +++ b/src/basic/missing_network.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/loop.h> +#include <linux/rtnetlink.h> +#include <net/ethernet.h> + +#include "missing_ethtool.h" +#include "missing_fib_rules.h" +#include "missing_fou.h" +#include "missing_if_bridge.h" +#include "missing_if_link.h" +#include "missing_if_tunnel.h" +#include "missing_vxcan.h" + +/* if.h */ +/* The following two defines are actually available in the kernel headers for longer, but we define them here anyway, + * since that makes it easier to use them in conjunction with the glibc net/if.h header which conflicts with + * linux/if.h. */ +#ifndef IF_OPER_UNKNOWN +#define IF_OPER_UNKNOWN 0 +#endif + +#ifndef IF_OPER_UP +#define IF_OPER_UP 6 +#endif + +#ifndef IFF_LOWER_UP +#define IFF_LOWER_UP 0x10000 +#endif + +#ifndef IFF_DORMANT +#define IFF_DORMANT 0x20000 +#endif + +/* if_addr.h */ +#if !HAVE_IFA_FLAGS +#define IFA_FLAGS 8 +#endif + +#ifndef IFA_F_MANAGETEMPADDR +#define IFA_F_MANAGETEMPADDR 0x100 +#endif + +#ifndef IFA_F_NOPREFIXROUTE +#define IFA_F_NOPREFIXROUTE 0x200 +#endif + +#ifndef IFA_F_MCAUTOJOIN +#define IFA_F_MCAUTOJOIN 0x400 +#endif + +/* if_arp.h */ +#ifndef ARPHRD_IP6GRE +#define ARPHRD_IP6GRE 823 +#endif + +/* if_bonding.h */ +#ifndef BOND_XMIT_POLICY_ENCAP23 +#define BOND_XMIT_POLICY_ENCAP23 3 +#endif + +#ifndef BOND_XMIT_POLICY_ENCAP34 +#define BOND_XMIT_POLICY_ENCAP34 4 +#endif + +/* if_tun.h */ +#ifndef IFF_MULTI_QUEUE +#define IFF_MULTI_QUEUE 0x100 +#endif + +/* in6.h */ +#ifndef IPV6_UNICAST_IF +#define IPV6_UNICAST_IF 76 +#endif + +/* ip.h */ +#ifndef IPV4_MIN_MTU +#define IPV4_MIN_MTU 68 +#endif + +/* ipv6.h */ +#ifndef IPV6_MIN_MTU +#define IPV6_MIN_MTU 1280 +#endif + +/* loop.h */ +#if !HAVE_LO_FLAGS_PARTSCAN +#define LO_FLAGS_PARTSCAN 8 +#endif + +#ifndef LOOP_CTL_REMOVE +#define LOOP_CTL_REMOVE 0x4C81 +#endif + +#ifndef LOOP_CTL_GET_FREE +#define LOOP_CTL_GET_FREE 0x4C82 +#endif + +/* netdevice.h */ +#ifndef NET_ADDR_RANDOM +#define NET_ADDR_RANDOM 1 +#endif + +#ifndef NET_NAME_UNKNOWN +#define NET_NAME_UNKNOWN 0 +#endif + +#ifndef NET_NAME_ENUM +#define NET_NAME_ENUM 1 +#endif + +#ifndef NET_NAME_PREDICTABLE +#define NET_NAME_PREDICTABLE 2 +#endif + +#ifndef NET_NAME_USER +#define NET_NAME_USER 3 +#endif + +#ifndef NET_NAME_RENAMED +#define NET_NAME_RENAMED 4 +#endif + +/* netlink.h */ +#ifndef NETLINK_LIST_MEMBERSHIPS /* b42be38b2778eda2237fc759e55e3b698b05b315 (4.2) */ +#define NETLINK_LIST_MEMBERSHIPS 9 +#endif + +/* rtnetlink.h */ +#ifndef RTA_PREF +#define RTA_PREF 20 +#endif + +#ifndef RTAX_QUICKACK +#define RTAX_QUICKACK 15 +#endif + +#ifndef RTA_EXPIRES +#define RTA_EXPIRES 23 +#endif + +/* Note that LOOPBACK_IFINDEX is currently not exported by the + * kernel/glibc, but hardcoded internally by the kernel. However, as + * it is exported to userspace indirectly via rtnetlink and the + * ioctls, and made use of widely we define it here too, in a way that + * is compatible with the kernel's internal definition. */ +#ifndef LOOPBACK_IFINDEX +#define LOOPBACK_IFINDEX 1 +#endif + +/* Not exposed yet. Similar values are defined in net/ethernet.h */ +#ifndef ETHERTYPE_LLDP +#define ETHERTYPE_LLDP 0x88cc +#endif diff --git a/src/basic/missing_prctl.h b/src/basic/missing_prctl.h new file mode 100644 index 0000000..f80cd17 --- /dev/null +++ b/src/basic/missing_prctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/prctl.h> + +/* 58319057b7847667f0c9585b9de0e8932b0fdb08 (4.3) */ +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 + +#define PR_CAP_AMBIENT_IS_SET 1 +#define PR_CAP_AMBIENT_RAISE 2 +#define PR_CAP_AMBIENT_LOWER 3 +#define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif diff --git a/src/basic/missing_random.h b/src/basic/missing_random.h new file mode 100644 index 0000000..2e76031 --- /dev/null +++ b/src/basic/missing_random.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if USE_SYS_RANDOM_H +# include <sys/random.h> +#else +# include <linux/random.h> +#endif + +#ifndef GRND_NONBLOCK +#define GRND_NONBLOCK 0x0001 +#endif + +#ifndef GRND_RANDOM +#define GRND_RANDOM 0x0002 +#endif diff --git a/src/basic/missing_resource.h b/src/basic/missing_resource.h new file mode 100644 index 0000000..22ba8ab --- /dev/null +++ b/src/basic/missing_resource.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/resource.h> + +#ifndef RLIMIT_RTTIME +#define RLIMIT_RTTIME 15 +#endif + +/* If RLIMIT_RTTIME is not defined, then we cannot use RLIMIT_NLIMITS as is */ +#define _RLIMIT_MAX (RLIMIT_RTTIME+1 > RLIMIT_NLIMITS ? RLIMIT_RTTIME+1 : RLIMIT_NLIMITS) diff --git a/src/basic/missing_sched.h b/src/basic/missing_sched.h new file mode 100644 index 0000000..baa3913 --- /dev/null +++ b/src/basic/missing_sched.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sched.h> + +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0x02000000 +#endif + +/* Not exposed yet. Defined at include/linux/sched.h */ +#ifndef PF_KTHREAD +#define PF_KTHREAD 0x00200000 +#endif + +/* The maximum thread/process name length including trailing NUL byte. This mimics the kernel definition of the same + * name, which we need in userspace at various places but is not defined in userspace currently, neither under this + * name nor any other. */ +/* Not exposed yet. Defined at include/linux/sched.h */ +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif diff --git a/src/basic/missing_securebits.h b/src/basic/missing_securebits.h new file mode 100644 index 0000000..40d6ec9 --- /dev/null +++ b/src/basic/missing_securebits.h @@ -0,0 +1,17 @@ +#pragma once + +#include <linux/securebits.h> + +/* 746bf6d64275be0c65b0631d8a72b16f1454cfa1 (4.3) */ +#ifndef SECURE_NO_CAP_AMBIENT_RAISE +#define SECURE_NO_CAP_AMBIENT_RAISE 6 +#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */ +#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) +#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) + +#undef SECURE_ALL_BITS +#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ + issecure_mask(SECURE_NO_SETUID_FIXUP) | \ + issecure_mask(SECURE_KEEP_CAPS) | \ + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) +#endif diff --git a/src/basic/missing_socket.h b/src/basic/missing_socket.h new file mode 100644 index 0000000..a5fd457 --- /dev/null +++ b/src/basic/missing_socket.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/socket.h> + +#if HAVE_LINUX_VM_SOCKETS_H +#include <linux/vm_sockets.h> +#else +#define VMADDR_CID_ANY -1U +struct sockaddr_vm { + unsigned short svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(unsigned short) - + sizeof(unsigned short) - + sizeof(unsigned int) - + sizeof(unsigned int)]; +}; +#endif /* !HAVE_LINUX_VM_SOCKETS_H */ + +#ifndef AF_VSOCK +#define AF_VSOCK 40 +#endif + +#ifndef SO_REUSEPORT +#define SO_REUSEPORT 15 +#endif + +#ifndef SO_PEERGROUPS +#define SO_PEERGROUPS 59 +#endif + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +#ifndef SOL_ALG +#define SOL_ALG 279 +#endif + +/* Not exposed yet. Defined in include/linux/socket.h. */ +#ifndef SOL_SCTP +#define SOL_SCTP 132 +#endif + +/* Not exposed yet. Defined in include/linux/socket.h */ +#ifndef SCM_SECURITY +#define SCM_SECURITY 0x03 +#endif + +/* netinet/in.h */ +#ifndef IP_FREEBIND +#define IP_FREEBIND 15 +#endif + +#ifndef IP_TRANSPARENT +#define IP_TRANSPARENT 19 +#endif diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h new file mode 100644 index 0000000..5116206 --- /dev/null +++ b/src/basic/missing_stat.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/types.h> +#include <sys/stat.h> + +#if WANT_LINUX_STAT_H +#include <linux/stat.h> +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#if !HAVE_STRUCT_STATX +struct statx_timestamp { + __s64 tv_sec; + __u32 tv_nsec; + __s32 __reserved; +}; +struct statx { + __u32 stx_mask; + __u32 stx_blksize; + __u64 stx_attributes; + __u32 stx_nlink; + __u32 stx_uid; + __u32 stx_gid; + __u16 stx_mode; + __u16 __spare0[1]; + __u64 stx_ino; + __u64 stx_size; + __u64 stx_blocks; + __u64 stx_attributes_mask; + struct statx_timestamp stx_atime; + struct statx_timestamp stx_btime; + struct statx_timestamp stx_ctime; + struct statx_timestamp stx_mtime; + __u32 stx_rdev_major; + __u32 stx_rdev_minor; + __u32 stx_dev_major; + __u32 stx_dev_minor; + __u64 __spare2[14]; +}; +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_BTIME +#define STATX_BTIME 0x00000800U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef AT_STATX_DONT_SYNC +#define AT_STATX_DONT_SYNC 0x4000 +#endif diff --git a/src/basic/missing_stdlib.h b/src/basic/missing_stdlib.h new file mode 100644 index 0000000..188a8d4 --- /dev/null +++ b/src/basic/missing_stdlib.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdlib.h> + +/* stdlib.h */ +#if !HAVE_SECURE_GETENV +# if HAVE___SECURE_GETENV +# define secure_getenv __secure_getenv +# else +# error "neither secure_getenv nor __secure_getenv are available" +# endif +#endif diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h new file mode 100644 index 0000000..d1aa322 --- /dev/null +++ b/src/basic/missing_syscall.h @@ -0,0 +1,446 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* Missing glibc definitions to access certain kernel APIs */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#ifdef ARCH_MIPS +#include <asm/sgidefs.h> +#endif + +#include "missing_keyctl.h" +#include "missing_stat.h" + +/* linux/kcmp.h */ +#ifndef KCMP_FILE /* 3f4994cfc15f38a3159c6e3a4b3ab2e1481a6b02 (3.19) */ +#define KCMP_FILE 0 +#endif + +#if !HAVE_PIVOT_ROOT +static inline int missing_pivot_root(const char *new_root, const char *put_old) { + return syscall(__NR_pivot_root, new_root, put_old); +} + +# define pivot_root missing_pivot_root +#endif + +/* ======================================================================= */ + +#if !HAVE_MEMFD_CREATE +# ifndef __NR_memfd_create +# if defined __x86_64__ +# define __NR_memfd_create 319 +# elif defined __arm__ +# define __NR_memfd_create 385 +# elif defined __aarch64__ +# define __NR_memfd_create 279 +# elif defined __s390__ +# define __NR_memfd_create 350 +# elif defined _MIPS_SIM +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define __NR_memfd_create 4354 +# endif +# if _MIPS_SIM == _MIPS_SIM_NABI32 +# define __NR_memfd_create 6318 +# endif +# if _MIPS_SIM == _MIPS_SIM_ABI64 +# define __NR_memfd_create 5314 +# endif +# elif defined __i386__ +# define __NR_memfd_create 356 +# elif defined __arc__ +# define __NR_memfd_create 279 +# else +# warning "__NR_memfd_create unknown for your architecture" +# endif +# endif + +static inline int missing_memfd_create(const char *name, unsigned int flags) { +# ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define memfd_create missing_memfd_create +#endif + +/* ======================================================================= */ + +#if !HAVE_GETRANDOM +# ifndef __NR_getrandom +# if defined __x86_64__ +# define __NR_getrandom 318 +# elif defined(__i386__) +# define __NR_getrandom 355 +# elif defined(__arm__) +# define __NR_getrandom 384 +# elif defined(__aarch64__) +# define __NR_getrandom 278 +# elif defined(__ia64__) +# define __NR_getrandom 1339 +# elif defined(__m68k__) +# define __NR_getrandom 352 +# elif defined(__s390x__) +# define __NR_getrandom 349 +# elif defined(__powerpc__) +# define __NR_getrandom 359 +# elif defined _MIPS_SIM +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define __NR_getrandom 4353 +# endif +# if _MIPS_SIM == _MIPS_SIM_NABI32 +# define __NR_getrandom 6317 +# endif +# if _MIPS_SIM == _MIPS_SIM_ABI64 +# define __NR_getrandom 5313 +# endif +# elif defined(__arc__) +# define __NR_getrandom 278 +# else +# warning "__NR_getrandom unknown for your architecture" +# endif +# endif + +static inline int missing_getrandom(void *buffer, size_t count, unsigned flags) { +# ifdef __NR_getrandom + return syscall(__NR_getrandom, buffer, count, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define getrandom missing_getrandom +#endif + +/* ======================================================================= */ + +#if !HAVE_GETTID +static inline pid_t missing_gettid(void) { + return (pid_t) syscall(__NR_gettid); +} + +# define gettid missing_gettid +#endif + +/* ======================================================================= */ + +#if !HAVE_NAME_TO_HANDLE_AT +# ifndef __NR_name_to_handle_at +# if defined(__x86_64__) +# define __NR_name_to_handle_at 303 +# elif defined(__i386__) +# define __NR_name_to_handle_at 341 +# elif defined(__arm__) +# define __NR_name_to_handle_at 370 +# elif defined(__powerpc__) +# define __NR_name_to_handle_at 345 +# elif defined(__arc__) +# define __NR_name_to_handle_at 264 +# else +# error "__NR_name_to_handle_at is not defined" +# endif +# endif + +struct file_handle { + unsigned int handle_bytes; + int handle_type; + unsigned char f_handle[0]; +}; + +static inline int missing_name_to_handle_at(int fd, const char *name, struct file_handle *handle, int *mnt_id, int flags) { +# ifdef __NR_name_to_handle_at + return syscall(__NR_name_to_handle_at, fd, name, handle, mnt_id, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define name_to_handle_at missing_name_to_handle_at +#endif + +/* ======================================================================= */ + +#if !HAVE_SETNS +# ifndef __NR_setns +# if defined(__x86_64__) +# define __NR_setns 308 +# elif defined(__i386__) +# define __NR_setns 346 +# elif defined(__arc__) +# define __NR_setns 268 +# else +# error "__NR_setns is not defined" +# endif +# endif + +static inline int missing_setns(int fd, int nstype) { +# ifdef __NR_setns + return syscall(__NR_setns, fd, nstype); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define setns missing_setns +#endif + +/* ======================================================================= */ + +static inline pid_t raw_getpid(void) { +#if defined(__alpha__) + return (pid_t) syscall(__NR_getxpid); +#else + return (pid_t) syscall(__NR_getpid); +#endif +} + +/* ======================================================================= */ + +#if !HAVE_RENAMEAT2 +# ifndef __NR_renameat2 +# if defined __x86_64__ +# define __NR_renameat2 316 +# elif defined __arm__ +# define __NR_renameat2 382 +# elif defined __aarch64__ +# define __NR_renameat2 276 +# elif defined _MIPS_SIM +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define __NR_renameat2 4351 +# endif +# if _MIPS_SIM == _MIPS_SIM_NABI32 +# define __NR_renameat2 6315 +# endif +# if _MIPS_SIM == _MIPS_SIM_ABI64 +# define __NR_renameat2 5311 +# endif +# elif defined __i386__ +# define __NR_renameat2 353 +# elif defined __powerpc64__ +# define __NR_renameat2 357 +# elif defined __s390__ || defined __s390x__ +# define __NR_renameat2 347 +# elif defined __arc__ +# define __NR_renameat2 276 +# else +# warning "__NR_renameat2 unknown for your architecture" +# endif +# endif + +static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) { +# ifdef __NR_renameat2 + return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define renameat2 missing_renameat2 +#endif + +/* ======================================================================= */ + +#if !HAVE_KCMP +static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) { +# ifdef __NR_kcmp + return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define kcmp missing_kcmp +#endif + +/* ======================================================================= */ + +#if !HAVE_KEYCTL +static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { +# ifdef __NR_keyctl + return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5); +# else + errno = ENOSYS; + return -1; +# endif + +# define keyctl missing_keyctl +} + +static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) { +# ifdef __NR_add_key + return syscall(__NR_add_key, type, description, payload, plen, ringid); +# else + errno = ENOSYS; + return -1; +# endif + +# define add_key missing_add_key +} + +static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) { +# ifdef __NR_request_key + return syscall(__NR_request_key, type, description, callout_info, destringid); +# else + errno = ENOSYS; + return -1; +# endif + +# define request_key missing_request_key +} +#endif + +/* ======================================================================= */ + +#if !HAVE_COPY_FILE_RANGE +# ifndef __NR_copy_file_range +# if defined(__x86_64__) +# define __NR_copy_file_range 326 +# elif defined(__i386__) +# define __NR_copy_file_range 377 +# elif defined __s390__ +# define __NR_copy_file_range 375 +# elif defined __arm__ +# define __NR_copy_file_range 391 +# elif defined __aarch64__ +# define __NR_copy_file_range 285 +# elif defined __powerpc__ +# define __NR_copy_file_range 379 +# elif defined __arc__ +# define __NR_copy_file_range 285 +# else +# warning "__NR_copy_file_range not defined for your architecture" +# endif +# endif + +static inline ssize_t missing_copy_file_range(int fd_in, loff_t *off_in, + int fd_out, loff_t *off_out, + size_t len, + unsigned int flags) { +# ifdef __NR_copy_file_range + return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define copy_file_range missing_copy_file_range +#endif + +/* ======================================================================= */ + +#if !HAVE_BPF +# ifndef __NR_bpf +# if defined __i386__ +# define __NR_bpf 357 +# elif defined __x86_64__ +# define __NR_bpf 321 +# elif defined __aarch64__ +# define __NR_bpf 280 +# elif defined __arm__ +# define __NR_bpf 386 +# elif defined __sparc__ +# define __NR_bpf 349 +# elif defined __s390__ +# define __NR_bpf 351 +# elif defined __tilegx__ +# define __NR_bpf 280 +# else +# warning "__NR_bpf not defined for your architecture" +# endif +# endif + +union bpf_attr; + +static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size) { +#ifdef __NR_bpf + return (int) syscall(__NR_bpf, cmd, attr, size); +#else + errno = ENOSYS; + return -1; +#endif +} + +# define bpf missing_bpf +#endif + +/* ======================================================================= */ + +#ifndef __IGNORE_pkey_mprotect +# ifndef __NR_pkey_mprotect +# if defined __i386__ +# define __NR_pkey_mprotect 380 +# elif defined __x86_64__ +# define __NR_pkey_mprotect 329 +# elif defined __arm__ +# define __NR_pkey_mprotect 394 +# elif defined __aarch64__ +# define __NR_pkey_mprotect 394 +# elif defined __powerpc__ +# define __NR_pkey_mprotect 386 +# elif defined _MIPS_SIM +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define __NR_pkey_mprotect 4363 +# endif +# if _MIPS_SIM == _MIPS_SIM_NABI32 +# define __NR_pkey_mprotect 6327 +# endif +# if _MIPS_SIM == _MIPS_SIM_ABI64 +# define __NR_pkey_mprotect 5323 +# endif +# else +# warning "__NR_pkey_mprotect not defined for your architecture" +# endif +# endif +#endif + +/* ======================================================================= */ + +#if !HAVE_STATX +# ifndef __NR_statx +# if defined __aarch64__ || defined __arm__ +# define __NR_statx 397 +# elif defined __alpha__ +# define __NR_statx 522 +# elif defined __i386__ || defined __powerpc64__ +# define __NR_statx 383 +# elif defined __sparc__ +# define __NR_statx 360 +# elif defined __x86_64__ +# define __NR_statx 332 +# else +# warning "__NR_statx not defined for your architecture" +# endif +# endif + +struct statx; +#endif + +/* This typedef is supposed to be always defined. */ +typedef struct statx struct_statx; + +#if !HAVE_STATX +static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) { +# ifdef __NR_statx + return syscall(__NR_statx, dfd, filename, flags, mask, buffer); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define statx missing_statx +#endif diff --git a/src/basic/missing_timerfd.h b/src/basic/missing_timerfd.h new file mode 100644 index 0000000..6b04044 --- /dev/null +++ b/src/basic/missing_timerfd.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/timerfd.h> + +#ifndef TFD_TIMER_CANCEL_ON_SET +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) +#endif diff --git a/src/basic/missing_type.h b/src/basic/missing_type.h new file mode 100644 index 0000000..bf8a6ca --- /dev/null +++ b/src/basic/missing_type.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <uchar.h> + +#if !HAVE_CHAR32_T +#define char32_t uint32_t +#endif + +#if !HAVE_CHAR16_T +#define char16_t uint16_t +#endif diff --git a/src/basic/missing_vxcan.h b/src/basic/missing_vxcan.h new file mode 100644 index 0000000..be430f7 --- /dev/null +++ b/src/basic/missing_vxcan.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if !HAVE_LINUX_CAN_VXCAN_H /* linux@a8f820a380a2a06fc4fe1a54159067958f800929 (4.12) */ +enum { + VXCAN_INFO_UNSPEC, + VXCAN_INFO_PEER, + + __VXCAN_INFO_MAX +#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1) +}; +#endif diff --git a/src/basic/mkdir-label.c b/src/basic/mkdir-label.c new file mode 100644 index 0000000..0eba7fc --- /dev/null +++ b/src/basic/mkdir-label.c @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdio.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "label.h" +#include "macro.h" +#include "mkdir.h" +#include "selinux-util.h" +#include "smack-util.h" + +int mkdir_label(const char *path, mode_t mode) { + int r; + + assert(path); + + r = mac_selinux_create_file_prepare(path, S_IFDIR); + if (r < 0) + return r; + + r = mkdir_errno_wrapper(path, mode); + mac_selinux_create_file_clear(); + if (r < 0) + return r; + + return mac_smack_fix(path, 0); +} + +int mkdirat_label(int dirfd, const char *path, mode_t mode) { + int r; + + assert(path); + + r = mac_selinux_create_file_prepare_at(dirfd, path, S_IFDIR); + if (r < 0) + return r; + + r = mkdirat_errno_wrapper(dirfd, path, mode); + mac_selinux_create_file_clear(); + if (r < 0) + return r; + + return mac_smack_fix_at(dirfd, path, 0); +} + +int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) { + return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_label); +} + +int mkdir_parents_label(const char *path, mode_t mode) { + return mkdir_parents_internal(NULL, path, mode, mkdir_label); +} + +int mkdir_p_label(const char *path, mode_t mode) { + return mkdir_p_internal(NULL, path, mode, mkdir_label); +} diff --git a/src/basic/mkdir.c b/src/basic/mkdir.c new file mode 100644 index 0000000..4bb65d5 --- /dev/null +++ b/src/basic/mkdir.c @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdbool.h> +#include <string.h> +#include <sys/stat.h> + +#include "alloc-util.h" +#include "fs-util.h" +#include "macro.h" +#include "mkdir.h" +#include "path-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "user-util.h" + +int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) { + struct stat st; + int r; + + assert(_mkdir != mkdir); + + if (_mkdir(path, mode) >= 0) { + r = chmod_and_chown(path, mode, uid, gid); + if (r < 0) + return r; + } + + if (lstat(path, &st) < 0) + return -errno; + + if ((flags & MKDIR_FOLLOW_SYMLINK) && S_ISLNK(st.st_mode)) { + _cleanup_free_ char *p = NULL; + + r = chase_symlinks(path, NULL, CHASE_NONEXISTENT, &p); + if (r < 0) + return r; + if (r == 0) + return mkdir_safe_internal(p, mode, uid, gid, + flags & ~MKDIR_FOLLOW_SYMLINK, + _mkdir); + + if (lstat(p, &st) < 0) + return -errno; + } + + if (!S_ISDIR(st.st_mode)) { + log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, + "Path \"%s\" already exists and is not a directory, refusing.", path); + return -ENOTDIR; + } + if ((st.st_mode & 0007) > (mode & 0007) || + (st.st_mode & 0070) > (mode & 0070) || + (st.st_mode & 0700) > (mode & 0700)) { + log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, + "Directory \"%s\" already exists, but has mode %04o that is too permissive (%04o was requested), refusing.", + path, st.st_mode & 0777, mode); + return -EEXIST; + } + if ((uid != UID_INVALID && st.st_uid != uid) || + (gid != GID_INVALID && st.st_gid != gid)) { + char u[DECIMAL_STR_MAX(uid_t)] = "-", g[DECIMAL_STR_MAX(gid_t)] = "-"; + + if (uid != UID_INVALID) + xsprintf(u, UID_FMT, uid); + if (gid != UID_INVALID) + xsprintf(g, GID_FMT, gid); + log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, + "Directory \"%s\" already exists, but is owned by "UID_FMT":"GID_FMT" (%s:%s was requested), refusing.", + path, st.st_uid, st.st_gid, u, g); + return -EEXIST; + } + + return 0; +} + +int mkdir_errno_wrapper(const char *pathname, mode_t mode) { + if (mkdir(pathname, mode) < 0) + return -errno; + return 0; +} + +int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode) { + if (mkdirat(dirfd, pathname, mode) < 0) + return -errno; + return 0; +} + +int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) { + return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_errno_wrapper); +} + +int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) { + const char *p, *e; + int r; + + assert(path); + assert(_mkdir != mkdir); + + if (prefix && !path_startswith(path, prefix)) + return -ENOTDIR; + + /* return immediately if directory exists */ + e = strrchr(path, '/'); + if (!e) + return -EINVAL; + + if (e == path) + return 0; + + p = strndupa(path, e - path); + r = is_dir(p, true); + if (r > 0) + return 0; + if (r == 0) + return -ENOTDIR; + + /* create every parent directory in the path, except the last component */ + p = path + strspn(path, "/"); + for (;;) { + char t[strlen(path) + 1]; + + e = p + strcspn(p, "/"); + p = e + strspn(e, "/"); + + /* Is this the last component? If so, then we're done */ + if (*p == 0) + return 0; + + memcpy(t, path, e - path); + t[e-path] = 0; + + if (prefix && path_startswith(prefix, t)) + continue; + + r = _mkdir(t, mode); + if (r < 0 && r != -EEXIST) + return r; + } +} + +int mkdir_parents(const char *path, mode_t mode) { + return mkdir_parents_internal(NULL, path, mode, mkdir_errno_wrapper); +} + +int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) { + int r; + + /* Like mkdir -p */ + + assert(_mkdir != mkdir); + + r = mkdir_parents_internal(prefix, path, mode, _mkdir); + if (r < 0) + return r; + + r = _mkdir(path, mode); + if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0)) + return r; + + return 0; +} + +int mkdir_p(const char *path, mode_t mode) { + return mkdir_p_internal(NULL, path, mode, mkdir_errno_wrapper); +} diff --git a/src/basic/mkdir.h b/src/basic/mkdir.h new file mode 100644 index 0000000..eb54853 --- /dev/null +++ b/src/basic/mkdir.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/types.h> + +typedef enum MkdirFlags { + MKDIR_FOLLOW_SYMLINK = 1 << 0, + MKDIR_WARN_MODE = 1 << 1, +} MkdirFlags; + +int mkdir_errno_wrapper(const char *pathname, mode_t mode); +int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode); +int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags); +int mkdir_parents(const char *path, mode_t mode); +int mkdir_p(const char *path, mode_t mode); + +/* mandatory access control(MAC) versions */ +int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags); +int mkdir_parents_label(const char *path, mode_t mode); +int mkdir_p_label(const char *path, mode_t mode); + +/* internally used */ +typedef int (*mkdir_func_t)(const char *pathname, mode_t mode); +int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir); +int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir); +int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir); diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c new file mode 100644 index 0000000..1e946a0 --- /dev/null +++ b/src/basic/mountpoint-util.c @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio_ext.h> +#include <sys/mount.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "missing.h" +#include "mountpoint-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "stdio-util.h" +#include "strv.h" + +/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of + * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code + * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with + * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition + * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal + * with large file handles anyway. */ +#define ORIGINAL_MAX_HANDLE_SZ 128 + +int name_to_handle_at_loop( + int fd, + const char *path, + struct file_handle **ret_handle, + int *ret_mnt_id, + int flags) { + + _cleanup_free_ struct file_handle *h = NULL; + size_t n = ORIGINAL_MAX_HANDLE_SZ; + + /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified + * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a + * start value, it is not an upper bound on the buffer size required. + * + * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed + * as NULL if there's no interest in either. */ + + for (;;) { + int mnt_id = -1; + + h = malloc0(offsetof(struct file_handle, f_handle) + n); + if (!h) + return -ENOMEM; + + h->handle_bytes = n; + + if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) { + + if (ret_handle) + *ret_handle = TAKE_PTR(h); + + if (ret_mnt_id) + *ret_mnt_id = mnt_id; + + return 0; + } + if (errno != EOVERFLOW) + return -errno; + + if (!ret_handle && ret_mnt_id && mnt_id >= 0) { + + /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the + * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to + * be filled in, and the caller was interested in only the mount ID an nothing else. */ + + *ret_mnt_id = mnt_id; + return 0; + } + + /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something + * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small + * buffer. In that case propagate EOVERFLOW */ + if (h->handle_bytes <= n) + return -EOVERFLOW; + + /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */ + n = h->handle_bytes; + if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */ + return -EOVERFLOW; + + h = mfree(h); + } +} + +static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) { + char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; + _cleanup_free_ char *fdinfo = NULL; + _cleanup_close_ int subfd = -1; + char *p; + int r; + + if ((flags & AT_EMPTY_PATH) && isempty(filename)) + xsprintf(path, "/proc/self/fdinfo/%i", fd); + else { + subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW)); + if (subfd < 0) + return -errno; + + xsprintf(path, "/proc/self/fdinfo/%i", subfd); + } + + r = read_full_file(path, &fdinfo, NULL); + if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */ + return -EOPNOTSUPP; + if (r < 0) + return r; + + p = startswith(fdinfo, "mnt_id:"); + if (!p) { + p = strstr(fdinfo, "\nmnt_id:"); + if (!p) /* The mnt_id field is a relatively new addition */ + return -EOPNOTSUPP; + + p += 8; + } + + p += strspn(p, WHITESPACE); + p[strcspn(p, WHITESPACE)] = 0; + + return safe_atoi(p, mnt_id); +} + +int fd_is_mount_point(int fd, const char *filename, int flags) { + _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL; + int mount_id = -1, mount_id_parent = -1; + bool nosupp = false, check_st_dev = true; + struct stat a, b; + int r; + + assert(fd >= 0); + assert(filename); + + /* First we will try the name_to_handle_at() syscall, which + * tells us the mount id and an opaque file "handle". It is + * not supported everywhere though (kernel compile-time + * option, not all file systems are hooked up). If it works + * the mount id is usually good enough to tell us whether + * something is a mount point. + * + * If that didn't work we will try to read the mount id from + * /proc/self/fdinfo/<fd>. This is almost as good as + * name_to_handle_at(), however, does not return the + * opaque file handle. The opaque file handle is pretty useful + * to detect the root directory, which we should always + * consider a mount point. Hence we use this only as + * fallback. Exporting the mnt_id in fdinfo is a pretty recent + * kernel addition. + * + * As last fallback we do traditional fstat() based st_dev + * comparisons. This is how things were traditionally done, + * but unionfs breaks this since it exposes file + * systems with a variety of st_dev reported. Also, btrfs + * subvolumes have different st_dev, even though they aren't + * real mounts of their own. */ + + r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags); + if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) + /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked + * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount + * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness + * (EINVAL): fall back to simpler logic. */ + goto fallback_fdinfo; + else if (r == -EOPNOTSUPP) + /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs + * supports it (in which case it is a mount point), otherwise fallback to the traditional stat() + * logic */ + nosupp = true; + else if (r < 0) + return r; + + r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH); + if (r == -EOPNOTSUPP) { + if (nosupp) + /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */ + goto fallback_fdinfo; + else + /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so, + * it must be a mount point. */ + return 1; + } else if (r < 0) + return r; + + /* The parent can do name_to_handle_at() but the + * directory we are interested in can't? If so, it + * must be a mount point. */ + if (nosupp) + return 1; + + /* If the file handle for the directory we are + * interested in and its parent are identical, we + * assume this is the root directory, which is a mount + * point. */ + + if (h->handle_bytes == h_parent->handle_bytes && + h->handle_type == h_parent->handle_type && + memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0) + return 1; + + return mount_id != mount_id_parent; + +fallback_fdinfo: + r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id); + if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM)) + goto fallback_fstat; + if (r < 0) + return r; + + r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent); + if (r < 0) + return r; + + if (mount_id != mount_id_parent) + return 1; + + /* Hmm, so, the mount ids are the same. This leaves one + * special case though for the root file system. For that, + * let's see if the parent directory has the same inode as we + * are interested in. Hence, let's also do fstat() checks now, + * too, but avoid the st_dev comparisons, since they aren't + * that useful on unionfs mounts. */ + check_st_dev = false; + +fallback_fstat: + /* yay for fstatat() taking a different set of flags than the other + * _at() above */ + if (flags & AT_SYMLINK_FOLLOW) + flags &= ~AT_SYMLINK_FOLLOW; + else + flags |= AT_SYMLINK_NOFOLLOW; + if (fstatat(fd, filename, &a, flags) < 0) + return -errno; + + if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0) + return -errno; + + /* A directory with same device and inode as its parent? Must + * be the root directory */ + if (a.st_dev == b.st_dev && + a.st_ino == b.st_ino) + return 1; + + return check_st_dev && (a.st_dev != b.st_dev); +} + +/* flags can be AT_SYMLINK_FOLLOW or 0 */ +int path_is_mount_point(const char *t, const char *root, int flags) { + _cleanup_free_ char *canonical = NULL; + _cleanup_close_ int fd = -1; + int r; + + assert(t); + assert((flags & ~AT_SYMLINK_FOLLOW) == 0); + + if (path_equal(t, "/")) + return 1; + + /* we need to resolve symlinks manually, we can't just rely on + * fd_is_mount_point() to do that for us; if we have a structure like + * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we + * look at needs to be /usr, not /. */ + if (flags & AT_SYMLINK_FOLLOW) { + r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical); + if (r < 0) + return r; + + t = canonical; + } + + fd = open_parent(t, O_PATH|O_CLOEXEC, 0); + if (fd < 0) + return -errno; + + return fd_is_mount_point(fd, last_path_component(t), flags); +} + +int path_get_mnt_id(const char *path, int *ret) { + int r; + + r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0); + if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */ + return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret); + + return r; +} + +bool fstype_is_network(const char *fstype) { + const char *x; + + x = startswith(fstype, "fuse."); + if (x) + fstype = x; + + return STR_IN_SET(fstype, + "afs", + "cifs", + "smbfs", + "sshfs", + "ncpfs", + "ncp", + "nfs", + "nfs4", + "gfs", + "gfs2", + "glusterfs", + "pvfs2", /* OrangeFS */ + "ocfs2", + "lustre"); +} + +bool fstype_is_api_vfs(const char *fstype) { + return STR_IN_SET(fstype, + "autofs", + "bpf", + "cgroup", + "cgroup2", + "configfs", + "cpuset", + "debugfs", + "devpts", + "devtmpfs", + "efivarfs", + "fusectl", + "hugetlbfs", + "mqueue", + "proc", + "pstore", + "ramfs", + "securityfs", + "sysfs", + "tmpfs", + "tracefs"); +} + +bool fstype_is_ro(const char *fstype) { + /* All Linux file systems that are necessarily read-only */ + return STR_IN_SET(fstype, + "DM_verity_hash", + "iso9660", + "squashfs"); +} + +bool fstype_can_discard(const char *fstype) { + return STR_IN_SET(fstype, + "btrfs", + "ext4", + "vfat", + "xfs"); +} + +bool fstype_can_uid_gid(const char *fstype) { + + /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories, + * current and future. */ + + return STR_IN_SET(fstype, + "adfs", + "fat", + "hfs", + "hpfs", + "iso9660", + "msdos", + "ntfs", + "vfat"); +} + +int dev_is_devtmpfs(void) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + int mount_id, r; + char *e; + + r = path_get_mnt_id("/dev", &mount_id); + if (r < 0) + return r; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER); + + for (;;) { + _cleanup_free_ char *line = NULL; + int mid; + + r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + if (sscanf(line, "%i", &mid) != 1) + continue; + + if (mid != mount_id) + continue; + + e = strstr(line, " - "); + if (!e) + continue; + + /* accept any name that starts with the currently expected type */ + if (startswith(e + 3, "devtmpfs")) + return true; + } + + return false; +} + +const char *mount_propagation_flags_to_string(unsigned long flags) { + + switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) { + case 0: + return ""; + case MS_SHARED: + return "shared"; + case MS_SLAVE: + return "slave"; + case MS_PRIVATE: + return "private"; + } + + return NULL; +} + +int mount_propagation_flags_from_string(const char *name, unsigned long *ret) { + + if (isempty(name)) + *ret = 0; + else if (streq(name, "shared")) + *ret = MS_SHARED; + else if (streq(name, "slave")) + *ret = MS_SLAVE; + else if (streq(name, "private")) + *ret = MS_PRIVATE; + else + return -EINVAL; + return 0; +} diff --git a/src/basic/mountpoint-util.h b/src/basic/mountpoint-util.h new file mode 100644 index 0000000..5398836 --- /dev/null +++ b/src/basic/mountpoint-util.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <fcntl.h> +#include <stdbool.h> +#include <sys/types.h> + +int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags); + +int path_get_mnt_id(const char *path, int *ret); + +int fd_is_mount_point(int fd, const char *filename, int flags); +int path_is_mount_point(const char *path, const char *root, int flags); + +bool fstype_is_network(const char *fstype); +bool fstype_is_api_vfs(const char *fstype); +bool fstype_is_ro(const char *fsype); +bool fstype_can_discard(const char *fstype); +bool fstype_can_uid_gid(const char *fstype); + +int dev_is_devtmpfs(void); + +const char *mount_propagation_flags_to_string(unsigned long flags); +int mount_propagation_flags_from_string(const char *name, unsigned long *ret); diff --git a/src/basic/nss-util.h b/src/basic/nss-util.h new file mode 100644 index 0000000..2045175 --- /dev/null +++ b/src/basic/nss-util.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <grp.h> +#include <netdb.h> +#include <nss.h> +#include <pwd.h> +#include <resolv.h> + +#define NSS_SIGNALS_BLOCK SIGALRM,SIGVTALRM,SIGPIPE,SIGCHLD,SIGTSTP,SIGIO,SIGHUP,SIGUSR1,SIGUSR2,SIGPROF,SIGURG,SIGWINCH + +#ifndef DEPRECATED_RES_USE_INET6 +# define DEPRECATED_RES_USE_INET6 0x00002000 +#endif + +#define NSS_GETHOSTBYNAME_PROTOTYPES(module) \ +enum nss_status _nss_##module##_gethostbyname4_r( \ + const char *name, \ + struct gaih_addrtuple **pat, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop, \ + int32_t *ttlp) _public_; \ +enum nss_status _nss_##module##_gethostbyname3_r( \ + const char *name, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop, \ + int32_t *ttlp, \ + char **canonp) _public_; \ +enum nss_status _nss_##module##_gethostbyname2_r( \ + const char *name, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) _public_; \ +enum nss_status _nss_##module##_gethostbyname_r( \ + const char *name, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) _public_ + +#define NSS_GETHOSTBYADDR_PROTOTYPES(module) \ +enum nss_status _nss_##module##_gethostbyaddr2_r( \ + const void* addr, socklen_t len, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop, \ + int32_t *ttlp) _public_; \ +enum nss_status _nss_##module##_gethostbyaddr_r( \ + const void* addr, socklen_t len, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) _public_ + +#define NSS_GETHOSTBYNAME_FALLBACKS(module) \ +enum nss_status _nss_##module##_gethostbyname2_r( \ + const char *name, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) { \ + return _nss_##module##_gethostbyname3_r( \ + name, \ + af, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL, \ + NULL); \ +} \ +enum nss_status _nss_##module##_gethostbyname_r( \ + const char *name, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) { \ + enum nss_status ret = NSS_STATUS_NOTFOUND; \ + \ + if (_res.options & DEPRECATED_RES_USE_INET6) \ + ret = _nss_##module##_gethostbyname3_r( \ + name, \ + AF_INET6, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL, \ + NULL); \ + if (ret == NSS_STATUS_NOTFOUND) \ + ret = _nss_##module##_gethostbyname3_r( \ + name, \ + AF_INET, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL, \ + NULL); \ + return ret; \ +} + +#define NSS_GETHOSTBYADDR_FALLBACKS(module) \ +enum nss_status _nss_##module##_gethostbyaddr_r( \ + const void* addr, socklen_t len, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) { \ + return _nss_##module##_gethostbyaddr2_r( \ + addr, len, \ + af, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL); \ +} + +#define NSS_GETPW_PROTOTYPES(module) \ +enum nss_status _nss_##module##_getpwnam_r( \ + const char *name, \ + struct passwd *pwd, \ + char *buffer, size_t buflen, \ + int *errnop) _public_; \ +enum nss_status _nss_##module##_getpwuid_r( \ + uid_t uid, \ + struct passwd *pwd, \ + char *buffer, size_t buflen, \ + int *errnop) _public_ + +#define NSS_GETGR_PROTOTYPES(module) \ +enum nss_status _nss_##module##_getgrnam_r( \ + const char *name, \ + struct group *gr, \ + char *buffer, size_t buflen, \ + int *errnop) _public_; \ +enum nss_status _nss_##module##_getgrgid_r( \ + gid_t gid, \ + struct group *gr, \ + char *buffer, size_t buflen, \ + int *errnop) _public_ + +typedef enum nss_status (*_nss_gethostbyname4_r_t)( + const char *name, + struct gaih_addrtuple **pat, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp); + +typedef enum nss_status (*_nss_gethostbyname3_r_t)( + const char *name, + int af, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp, + char **canonp); + +typedef enum nss_status (*_nss_gethostbyname2_r_t)( + const char *name, + int af, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop); + +typedef enum nss_status (*_nss_gethostbyname_r_t)( + const char *name, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop); + +typedef enum nss_status (*_nss_gethostbyaddr2_r_t)( + const void* addr, socklen_t len, + int af, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp); +typedef enum nss_status (*_nss_gethostbyaddr_r_t)( + const void* addr, socklen_t len, + int af, + struct hostent *host, + char *buffer, size_t buflen, + int *errnop, int *h_errnop); diff --git a/src/basic/ordered-set.c b/src/basic/ordered-set.c new file mode 100644 index 0000000..ed9ba77 --- /dev/null +++ b/src/basic/ordered-set.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "ordered-set.h" +#include "strv.h" + +int ordered_set_consume(OrderedSet *s, void *p) { + int r; + + r = ordered_set_put(s, p); + if (r <= 0) + free(p); + + return r; +} + +int ordered_set_put_strdup(OrderedSet *s, const char *p) { + char *c; + int r; + + assert(s); + assert(p); + + c = strdup(p); + if (!c) + return -ENOMEM; + + r = ordered_set_consume(s, c); + if (r == -EEXIST) + return 0; + + return r; +} + +int ordered_set_put_strdupv(OrderedSet *s, char **l) { + int n = 0, r; + char **i; + + STRV_FOREACH(i, l) { + r = ordered_set_put_strdup(s, *i); + if (r < 0) + return r; + + n += r; + } + + return n; +} diff --git a/src/basic/ordered-set.h b/src/basic/ordered-set.h new file mode 100644 index 0000000..7cbb718 --- /dev/null +++ b/src/basic/ordered-set.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "hashmap.h" + +typedef struct OrderedSet OrderedSet; + +static inline OrderedSet* ordered_set_new(const struct hash_ops *ops) { + return (OrderedSet*) ordered_hashmap_new(ops); +} + +static inline int ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops) { + if (*s) + return 0; + + *s = ordered_set_new(ops); + if (!*s) + return -ENOMEM; + + return 0; +} + +static inline OrderedSet* ordered_set_free(OrderedSet *s) { + return (OrderedSet*) ordered_hashmap_free((OrderedHashmap*) s); +} + +static inline OrderedSet* ordered_set_free_free(OrderedSet *s) { + return (OrderedSet*) ordered_hashmap_free_free((OrderedHashmap*) s); +} + +static inline int ordered_set_put(OrderedSet *s, void *p) { + return ordered_hashmap_put((OrderedHashmap*) s, p, p); +} + +static inline bool ordered_set_isempty(OrderedSet *s) { + return ordered_hashmap_isempty((OrderedHashmap*) s); +} + +static inline bool ordered_set_iterate(OrderedSet *s, Iterator *i, void **value) { + return ordered_hashmap_iterate((OrderedHashmap*) s, i, value, NULL); +} + +static inline void* ordered_set_remove(OrderedSet *s, void *p) { + return ordered_hashmap_remove((OrderedHashmap*) s, p); +} + +static inline void* ordered_set_steal_first(OrderedSet *s) { + return ordered_hashmap_steal_first((OrderedHashmap*) s); +} + +int ordered_set_consume(OrderedSet *s, void *p); +int ordered_set_put_strdup(OrderedSet *s, const char *p); +int ordered_set_put_strdupv(OrderedSet *s, char **l); + +#define ORDERED_SET_FOREACH(e, s, i) \ + for ((i) = ITERATOR_FIRST; ordered_set_iterate((s), &(i), (void**)&(e)); ) + +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free_free); + +#define _cleanup_ordered_set_free_ _cleanup_(ordered_set_freep) +#define _cleanup_ordered_set_free_free_ _cleanup_(ordered_set_free_freep) diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c new file mode 100644 index 0000000..87724af --- /dev/null +++ b/src/basic/parse-util.c @@ -0,0 +1,779 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <inttypes.h> +#include <linux/oom.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> + +#include "alloc-util.h" +#include "errno-list.h" +#include "extract-word.h" +#include "locale-util.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "process-util.h" +#include "stat-util.h" +#include "string-util.h" + +int parse_boolean(const char *v) { + if (!v) + return -EINVAL; + + if (streq(v, "1") || strcaseeq(v, "yes") || strcaseeq(v, "y") || strcaseeq(v, "true") || strcaseeq(v, "t") || strcaseeq(v, "on")) + return 1; + else if (streq(v, "0") || strcaseeq(v, "no") || strcaseeq(v, "n") || strcaseeq(v, "false") || strcaseeq(v, "f") || strcaseeq(v, "off")) + return 0; + + return -EINVAL; +} + +int parse_pid(const char *s, pid_t* ret_pid) { + unsigned long ul = 0; + pid_t pid; + int r; + + assert(s); + assert(ret_pid); + + r = safe_atolu(s, &ul); + if (r < 0) + return r; + + pid = (pid_t) ul; + + if ((unsigned long) pid != ul) + return -ERANGE; + + if (!pid_is_valid(pid)) + return -ERANGE; + + *ret_pid = pid; + return 0; +} + +int parse_mode(const char *s, mode_t *ret) { + char *x; + long l; + + assert(s); + assert(ret); + + s += strspn(s, WHITESPACE); + if (s[0] == '-') + return -ERANGE; + + errno = 0; + l = strtol(s, &x, 8); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (l < 0 || l > 07777) + return -ERANGE; + + *ret = (mode_t) l; + return 0; +} + +int parse_ifindex(const char *s, int *ret) { + int ifi, r; + + r = safe_atoi(s, &ifi); + if (r < 0) + return r; + if (ifi <= 0) + return -EINVAL; + + *ret = ifi; + return 0; +} + +int parse_mtu(int family, const char *s, uint32_t *ret) { + uint64_t u; + size_t m; + int r; + + r = parse_size(s, 1024, &u); + if (r < 0) + return r; + + if (u > UINT32_MAX) + return -ERANGE; + + if (family == AF_INET6) + m = IPV6_MIN_MTU; /* This is 1280 */ + else + m = IPV4_MIN_MTU; /* For all other protocols, including 'unspecified' we assume the IPv4 minimal MTU */ + + if (u < m) + return -ERANGE; + + *ret = (uint32_t) u; + return 0; +} + +int parse_size(const char *t, uint64_t base, uint64_t *size) { + + /* Soo, sometimes we want to parse IEC binary suffixes, and + * sometimes SI decimal suffixes. This function can parse + * both. Which one is the right way depends on the + * context. Wikipedia suggests that SI is customary for + * hardware metrics and network speeds, while IEC is + * customary for most data sizes used by software and volatile + * (RAM) memory. Hence be careful which one you pick! + * + * In either case we use just K, M, G as suffix, and not Ki, + * Mi, Gi or so (as IEC would suggest). That's because that's + * frickin' ugly. But this means you really need to make sure + * to document which base you are parsing when you use this + * call. */ + + struct table { + const char *suffix; + unsigned long long factor; + }; + + static const struct table iec[] = { + { "E", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL }, + { "P", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL }, + { "T", 1024ULL*1024ULL*1024ULL*1024ULL }, + { "G", 1024ULL*1024ULL*1024ULL }, + { "M", 1024ULL*1024ULL }, + { "K", 1024ULL }, + { "B", 1ULL }, + { "", 1ULL }, + }; + + static const struct table si[] = { + { "E", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL*1000ULL }, + { "P", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL }, + { "T", 1000ULL*1000ULL*1000ULL*1000ULL }, + { "G", 1000ULL*1000ULL*1000ULL }, + { "M", 1000ULL*1000ULL }, + { "K", 1000ULL }, + { "B", 1ULL }, + { "", 1ULL }, + }; + + const struct table *table; + const char *p; + unsigned long long r = 0; + unsigned n_entries, start_pos = 0; + + assert(t); + assert(IN_SET(base, 1000, 1024)); + assert(size); + + if (base == 1000) { + table = si; + n_entries = ELEMENTSOF(si); + } else { + table = iec; + n_entries = ELEMENTSOF(iec); + } + + p = t; + do { + unsigned long long l, tmp; + double frac = 0; + char *e; + unsigned i; + + p += strspn(p, WHITESPACE); + + errno = 0; + l = strtoull(p, &e, 10); + if (errno > 0) + return -errno; + if (e == p) + return -EINVAL; + if (*p == '-') + return -ERANGE; + + if (*e == '.') { + e++; + + /* strtoull() itself would accept space/+/- */ + if (*e >= '0' && *e <= '9') { + unsigned long long l2; + char *e2; + + l2 = strtoull(e, &e2, 10); + if (errno > 0) + return -errno; + + /* Ignore failure. E.g. 10.M is valid */ + frac = l2; + for (; e < e2; e++) + frac /= 10; + } + } + + e += strspn(e, WHITESPACE); + + for (i = start_pos; i < n_entries; i++) + if (startswith(e, table[i].suffix)) + break; + + if (i >= n_entries) + return -EINVAL; + + if (l + (frac > 0) > ULLONG_MAX / table[i].factor) + return -ERANGE; + + tmp = l * table[i].factor + (unsigned long long) (frac * table[i].factor); + if (tmp > ULLONG_MAX - r) + return -ERANGE; + + r += tmp; + if ((unsigned long long) (uint64_t) r != r) + return -ERANGE; + + p = e + strlen(table[i].suffix); + + start_pos = i + 1; + + } while (*p); + + *size = r; + + return 0; +} + +int parse_range(const char *t, unsigned *lower, unsigned *upper) { + _cleanup_free_ char *word = NULL; + unsigned l, u; + int r; + + assert(lower); + assert(upper); + + /* Extract the lower bound. */ + r = extract_first_word(&t, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = safe_atou(word, &l); + if (r < 0) + return r; + + /* Check for the upper bound and extract it if needed */ + if (!t) + /* Single number with no dashes. */ + u = l; + else if (!*t) + /* Trailing dash is an error. */ + return -EINVAL; + else { + r = safe_atou(t, &u); + if (r < 0) + return r; + } + + *lower = l; + *upper = u; + return 0; +} + +int parse_errno(const char *t) { + int r, e; + + assert(t); + + r = errno_from_name(t); + if (r > 0) + return r; + + r = safe_atoi(t, &e); + if (r < 0) + return r; + + /* 0 is also allowed here */ + if (!errno_is_valid(e) && e != 0) + return -ERANGE; + + return e; +} + +int parse_syscall_and_errno(const char *in, char **name, int *error) { + _cleanup_free_ char *n = NULL; + char *p; + int e = -1; + + assert(in); + assert(name); + assert(error); + + /* + * This parse "syscall:errno" like "uname:EILSEQ", "@sync:255". + * If errno is omitted, then error is set to -1. + * Empty syscall name is not allowed. + * Here, we do not check that the syscall name is valid or not. + */ + + p = strchr(in, ':'); + if (p) { + e = parse_errno(p + 1); + if (e < 0) + return e; + + n = strndup(in, p - in); + } else + n = strdup(in); + + if (!n) + return -ENOMEM; + + if (isempty(n)) + return -EINVAL; + + *error = e; + *name = TAKE_PTR(n); + + return 0; +} + +char *format_bytes(char *buf, size_t l, uint64_t t) { + unsigned i; + + /* This only does IEC units so far */ + + static const struct { + const char *suffix; + uint64_t factor; + } table[] = { + { "E", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "P", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "T", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "G", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "M", UINT64_C(1024)*UINT64_C(1024) }, + { "K", UINT64_C(1024) }, + }; + + if (t == (uint64_t) -1) + return NULL; + + for (i = 0; i < ELEMENTSOF(table); i++) { + + if (t >= table[i].factor) { + snprintf(buf, l, + "%" PRIu64 ".%" PRIu64 "%s", + t / table[i].factor, + ((t*UINT64_C(10)) / table[i].factor) % UINT64_C(10), + table[i].suffix); + + goto finish; + } + } + + snprintf(buf, l, "%" PRIu64 "B", t); + +finish: + buf[l-1] = 0; + return buf; + +} + +int safe_atou_full(const char *s, unsigned base, unsigned *ret_u) { + char *x = NULL; + unsigned long l; + + assert(s); + assert(ret_u); + assert(base <= 16); + + /* strtoul() is happy to parse negative values, and silently + * converts them to unsigned values without generating an + * error. We want a clean error, hence let's look for the "-" + * prefix on our own, and generate an error. But let's do so + * only after strtoul() validated that the string is clean + * otherwise, so that we return EINVAL preferably over + * ERANGE. */ + + s += strspn(s, WHITESPACE); + + errno = 0; + l = strtoul(s, &x, base); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (s[0] == '-') + return -ERANGE; + if ((unsigned long) (unsigned) l != l) + return -ERANGE; + + *ret_u = (unsigned) l; + return 0; +} + +int safe_atoi(const char *s, int *ret_i) { + char *x = NULL; + long l; + + assert(s); + assert(ret_i); + + errno = 0; + l = strtol(s, &x, 0); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if ((long) (int) l != l) + return -ERANGE; + + *ret_i = (int) l; + return 0; +} + +int safe_atollu(const char *s, long long unsigned *ret_llu) { + char *x = NULL; + unsigned long long l; + + assert(s); + assert(ret_llu); + + s += strspn(s, WHITESPACE); + + errno = 0; + l = strtoull(s, &x, 0); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (*s == '-') + return -ERANGE; + + *ret_llu = l; + return 0; +} + +int safe_atolli(const char *s, long long int *ret_lli) { + char *x = NULL; + long long l; + + assert(s); + assert(ret_lli); + + errno = 0; + l = strtoll(s, &x, 0); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + + *ret_lli = l; + return 0; +} + +int safe_atou8(const char *s, uint8_t *ret) { + char *x = NULL; + unsigned long l; + + assert(s); + assert(ret); + + s += strspn(s, WHITESPACE); + + errno = 0; + l = strtoul(s, &x, 0); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (s[0] == '-') + return -ERANGE; + if ((unsigned long) (uint8_t) l != l) + return -ERANGE; + + *ret = (uint8_t) l; + return 0; +} + +int safe_atou16_full(const char *s, unsigned base, uint16_t *ret) { + char *x = NULL; + unsigned long l; + + assert(s); + assert(ret); + assert(base <= 16); + + s += strspn(s, WHITESPACE); + + errno = 0; + l = strtoul(s, &x, base); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (s[0] == '-') + return -ERANGE; + if ((unsigned long) (uint16_t) l != l) + return -ERANGE; + + *ret = (uint16_t) l; + return 0; +} + +int safe_atoi16(const char *s, int16_t *ret) { + char *x = NULL; + long l; + + assert(s); + assert(ret); + + errno = 0; + l = strtol(s, &x, 0); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if ((long) (int16_t) l != l) + return -ERANGE; + + *ret = (int16_t) l; + return 0; +} + +int safe_atod(const char *s, double *ret_d) { + _cleanup_(freelocalep) locale_t loc = (locale_t) 0; + char *x = NULL; + double d = 0; + + assert(s); + assert(ret_d); + + loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); + if (loc == (locale_t) 0) + return -errno; + + errno = 0; + d = strtod_l(s, &x, loc); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + + *ret_d = (double) d; + return 0; +} + +int parse_fractional_part_u(const char **p, size_t digits, unsigned *res) { + size_t i; + unsigned val = 0; + const char *s; + + s = *p; + + /* accept any number of digits, strtoull is limited to 19 */ + for (i=0; i < digits; i++,s++) { + if (*s < '0' || *s > '9') { + if (i == 0) + return -EINVAL; + + /* too few digits, pad with 0 */ + for (; i < digits; i++) + val *= 10; + + break; + } + + val *= 10; + val += *s - '0'; + } + + /* maybe round up */ + if (*s >= '5' && *s <= '9') + val++; + + s += strspn(s, DIGITS); + + *p = s; + *res = val; + + return 0; +} + +int parse_percent_unbounded(const char *p) { + const char *pc, *n; + int r, v; + + pc = endswith(p, "%"); + if (!pc) + return -EINVAL; + + n = strndupa(p, pc - p); + r = safe_atoi(n, &v); + if (r < 0) + return r; + if (v < 0) + return -ERANGE; + + return v; +} + +int parse_percent(const char *p) { + int v; + + v = parse_percent_unbounded(p); + if (v > 100) + return -ERANGE; + + return v; +} + +int parse_permille_unbounded(const char *p) { + const char *pc, *pm, *dot, *n; + int r, q, v; + + pm = endswith(p, "‰"); + if (pm) { + n = strndupa(p, pm - p); + r = safe_atoi(n, &v); + if (r < 0) + return r; + if (v < 0) + return -ERANGE; + } else { + pc = endswith(p, "%"); + if (!pc) + return -EINVAL; + + dot = memchr(p, '.', pc - p); + if (dot) { + if (dot + 2 != pc) + return -EINVAL; + if (dot[1] < '0' || dot[1] > '9') + return -EINVAL; + q = dot[1] - '0'; + n = strndupa(p, dot - p); + } else { + q = 0; + n = strndupa(p, pc - p); + } + r = safe_atoi(n, &v); + if (r < 0) + return r; + if (v < 0) + return -ERANGE; + if (v > (INT_MAX - q) / 10) + return -ERANGE; + + v = v * 10 + q; + } + + return v; +} + +int parse_permille(const char *p) { + int v; + + v = parse_permille_unbounded(p); + if (v > 1000) + return -ERANGE; + + return v; +} + +int parse_nice(const char *p, int *ret) { + int n, r; + + r = safe_atoi(p, &n); + if (r < 0) + return r; + + if (!nice_is_valid(n)) + return -ERANGE; + + *ret = n; + return 0; +} + +int parse_ip_port(const char *s, uint16_t *ret) { + uint16_t l; + int r; + + r = safe_atou16(s, &l); + if (r < 0) + return r; + + if (l == 0) + return -EINVAL; + + *ret = (uint16_t) l; + + return 0; +} + +int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high) { + unsigned l, h; + int r; + + r = parse_range(s, &l, &h); + if (r < 0) + return r; + + if (l <= 0 || l > 65535 || h <= 0 || h > 65535) + return -EINVAL; + + if (h < l) + return -EINVAL; + + *low = l; + *high = h; + + return 0; +} + +int parse_dev(const char *s, dev_t *ret) { + const char *major; + unsigned x, y; + size_t n; + int r; + + n = strspn(s, DIGITS); + if (n == 0) + return -EINVAL; + if (s[n] != ':') + return -EINVAL; + + major = strndupa(s, n); + r = safe_atou(major, &x); + if (r < 0) + return r; + + r = safe_atou(s + n + 1, &y); + if (r < 0) + return r; + + if (!DEVICE_MAJOR_VALID(x) || !DEVICE_MINOR_VALID(y)) + return -ERANGE; + + *ret = makedev(x, y); + return 0; +} + +int parse_oom_score_adjust(const char *s, int *ret) { + int r, v; + + assert(s); + assert(ret); + + r = safe_atoi(s, &v); + if (r < 0) + return r; + + if (v < OOM_SCORE_ADJ_MIN || v > OOM_SCORE_ADJ_MAX) + return -ERANGE; + + *ret = v; + return 0; +} diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h new file mode 100644 index 0000000..e47641b --- /dev/null +++ b/src/basic/parse-util.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> + +#include "macro.h" + +#define MODE_INVALID ((mode_t) -1) + +int parse_boolean(const char *v) _pure_; +int parse_dev(const char *s, dev_t *ret); +int parse_pid(const char *s, pid_t* ret_pid); +int parse_mode(const char *s, mode_t *ret); +int parse_ifindex(const char *s, int *ret); +int parse_mtu(int family, const char *s, uint32_t *ret); + +int parse_size(const char *t, uint64_t base, uint64_t *size); +int parse_range(const char *t, unsigned *lower, unsigned *upper); +int parse_errno(const char *t); +int parse_syscall_and_errno(const char *in, char **name, int *error); + +#define FORMAT_BYTES_MAX 8 +char *format_bytes(char *buf, size_t l, uint64_t t); + +int safe_atou_full(const char *s, unsigned base, unsigned *ret_u); + +static inline int safe_atou(const char *s, unsigned *ret_u) { + return safe_atou_full(s, 0, ret_u); +} + +int safe_atoi(const char *s, int *ret_i); +int safe_atollu(const char *s, unsigned long long *ret_u); +int safe_atolli(const char *s, long long int *ret_i); + +int safe_atou8(const char *s, uint8_t *ret); + +int safe_atou16_full(const char *s, unsigned base, uint16_t *ret); + +static inline int safe_atou16(const char *s, uint16_t *ret) { + return safe_atou16_full(s, 0, ret); +} + +static inline int safe_atoux16(const char *s, uint16_t *ret) { + return safe_atou16_full(s, 16, ret); +} + +int safe_atoi16(const char *s, int16_t *ret); + +static inline int safe_atou32(const char *s, uint32_t *ret_u) { + assert_cc(sizeof(uint32_t) == sizeof(unsigned)); + return safe_atou(s, (unsigned*) ret_u); +} + +static inline int safe_atoi32(const char *s, int32_t *ret_i) { + assert_cc(sizeof(int32_t) == sizeof(int)); + return safe_atoi(s, (int*) ret_i); +} + +static inline int safe_atou64(const char *s, uint64_t *ret_u) { + assert_cc(sizeof(uint64_t) == sizeof(unsigned long long)); + return safe_atollu(s, (unsigned long long*) ret_u); +} + +static inline int safe_atoi64(const char *s, int64_t *ret_i) { + assert_cc(sizeof(int64_t) == sizeof(long long int)); + return safe_atolli(s, (long long int*) ret_i); +} + +#if LONG_MAX == INT_MAX +static inline int safe_atolu(const char *s, unsigned long *ret_u) { + assert_cc(sizeof(unsigned long) == sizeof(unsigned)); + return safe_atou(s, (unsigned*) ret_u); +} +static inline int safe_atoli(const char *s, long int *ret_u) { + assert_cc(sizeof(long int) == sizeof(int)); + return safe_atoi(s, (int*) ret_u); +} +#else +static inline int safe_atolu(const char *s, unsigned long *ret_u) { + assert_cc(sizeof(unsigned long) == sizeof(unsigned long long)); + return safe_atollu(s, (unsigned long long*) ret_u); +} +static inline int safe_atoli(const char *s, long int *ret_u) { + assert_cc(sizeof(long int) == sizeof(long long int)); + return safe_atolli(s, (long long int*) ret_u); +} +#endif + +#if SIZE_MAX == UINT_MAX +static inline int safe_atozu(const char *s, size_t *ret_u) { + assert_cc(sizeof(size_t) == sizeof(unsigned)); + return safe_atou(s, (unsigned *) ret_u); +} +#else +static inline int safe_atozu(const char *s, size_t *ret_u) { + assert_cc(sizeof(size_t) == sizeof(long unsigned)); + return safe_atolu(s, ret_u); +} +#endif + +int safe_atod(const char *s, double *ret_d); + +int parse_fractional_part_u(const char **s, size_t digits, unsigned *res); + +int parse_percent_unbounded(const char *p); +int parse_percent(const char *p); + +int parse_permille_unbounded(const char *p); +int parse_permille(const char *p); + +int parse_nice(const char *p, int *ret); + +int parse_ip_port(const char *s, uint16_t *ret); +int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high); + +int parse_oom_score_adjust(const char *s, int *ret); diff --git a/src/basic/path-util.c b/src/basic/path-util.c new file mode 100644 index 0000000..2215173 --- /dev/null +++ b/src/basic/path-util.c @@ -0,0 +1,1150 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +/* When we include libgen.h because we need dirname() we immediately + * undefine basename() since libgen.h defines it as a macro to the + * POSIX version which is really broken. We prefer GNU basename(). */ +#include <libgen.h> +#undef basename + +#include "alloc-util.h" +#include "extract-word.h" +#include "fs-util.h" +#include "glob-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "utf8.h" + +bool path_is_absolute(const char *p) { + return p[0] == '/'; +} + +bool is_path(const char *p) { + return !!strchr(p, '/'); +} + +int path_split_and_make_absolute(const char *p, char ***ret) { + char **l; + int r; + + assert(p); + assert(ret); + + l = strv_split(p, ":"); + if (!l) + return -ENOMEM; + + r = path_strv_make_absolute_cwd(l); + if (r < 0) { + strv_free(l); + return r; + } + + *ret = l; + return r; +} + +char *path_make_absolute(const char *p, const char *prefix) { + assert(p); + + /* Makes every item in the list an absolute path by prepending + * the prefix, if specified and necessary */ + + if (path_is_absolute(p) || isempty(prefix)) + return strdup(p); + + if (endswith(prefix, "/")) + return strjoin(prefix, p); + else + return strjoin(prefix, "/", p); +} + +int safe_getcwd(char **ret) { + char *cwd; + + cwd = get_current_dir_name(); + if (!cwd) + return negative_errno(); + + /* Let's make sure the directory is really absolute, to protect us from the logic behind + * CVE-2018-1000001 */ + if (cwd[0] != '/') { + free(cwd); + return -ENOMEDIUM; + } + + *ret = cwd; + return 0; +} + +int path_make_absolute_cwd(const char *p, char **ret) { + char *c; + int r; + + assert(p); + assert(ret); + + /* Similar to path_make_absolute(), but prefixes with the + * current working directory. */ + + if (path_is_absolute(p)) + c = strdup(p); + else { + _cleanup_free_ char *cwd = NULL; + + r = safe_getcwd(&cwd); + if (r < 0) + return r; + + c = path_join(cwd, p); + } + if (!c) + return -ENOMEM; + + *ret = c; + return 0; +} + +int path_make_relative(const char *from_dir, const char *to_path, char **_r) { + char *f, *t, *r, *p; + unsigned n_parents = 0; + + assert(from_dir); + assert(to_path); + assert(_r); + + /* Strips the common part, and adds ".." elements as necessary. */ + + if (!path_is_absolute(from_dir) || !path_is_absolute(to_path)) + return -EINVAL; + + f = strdupa(from_dir); + t = strdupa(to_path); + + path_simplify(f, true); + path_simplify(t, true); + + /* Skip the common part. */ + for (;;) { + size_t a, b; + + f += *f == '/'; + t += *t == '/'; + + if (!*f) { + if (!*t) + /* from_dir equals to_path. */ + r = strdup("."); + else + /* from_dir is a parent directory of to_path. */ + r = strdup(t); + if (!r) + return -ENOMEM; + + *_r = r; + return 0; + } + + if (!*t) + break; + + a = strcspn(f, "/"); + b = strcspn(t, "/"); + + if (a != b || memcmp(f, t, a) != 0) + break; + + f += a; + t += b; + } + + /* If we're here, then "from_dir" has one or more elements that need to + * be replaced with "..". */ + + /* Count the number of necessary ".." elements. */ + for (; *f;) { + size_t w; + + w = strcspn(f, "/"); + + /* If this includes ".." we can't do a simple series of "..", refuse */ + if (w == 2 && f[0] == '.' && f[1] == '.') + return -EINVAL; + + /* Count number of elements */ + n_parents++; + + f += w; + f += *f == '/'; + } + + r = new(char, n_parents * 3 + strlen(t) + 1); + if (!r) + return -ENOMEM; + + for (p = r; n_parents > 0; n_parents--) + p = mempcpy(p, "../", 3); + + if (*t) + strcpy(p, t); + else + /* Remove trailing slash */ + *(--p) = 0; + + *_r = r; + return 0; +} + +int path_strv_make_absolute_cwd(char **l) { + char **s; + int r; + + /* Goes through every item in the string list and makes it + * absolute. This works in place and won't rollback any + * changes on failure. */ + + STRV_FOREACH(s, l) { + char *t; + + r = path_make_absolute_cwd(*s, &t); + if (r < 0) + return r; + + path_simplify(t, false); + free_and_replace(*s, t); + } + + return 0; +} + +char **path_strv_resolve(char **l, const char *root) { + char **s; + unsigned k = 0; + bool enomem = false; + int r; + + if (strv_isempty(l)) + return l; + + /* Goes through every item in the string list and canonicalize + * the path. This works in place and won't rollback any + * changes on failure. */ + + STRV_FOREACH(s, l) { + _cleanup_free_ char *orig = NULL; + char *t, *u; + + if (!path_is_absolute(*s)) { + free(*s); + continue; + } + + if (root) { + orig = *s; + t = prefix_root(root, orig); + if (!t) { + enomem = true; + continue; + } + } else + t = *s; + + r = chase_symlinks(t, root, 0, &u); + if (r == -ENOENT) { + if (root) { + u = TAKE_PTR(orig); + free(t); + } else + u = t; + } else if (r < 0) { + free(t); + + if (r == -ENOMEM) + enomem = true; + + continue; + } else if (root) { + char *x; + + free(t); + x = path_startswith(u, root); + if (x) { + /* restore the slash if it was lost */ + if (!startswith(x, "/")) + *(--x) = '/'; + + t = strdup(x); + free(u); + if (!t) { + enomem = true; + continue; + } + u = t; + } else { + /* canonicalized path goes outside of + * prefix, keep the original path instead */ + free_and_replace(u, orig); + } + } else + free(t); + + l[k++] = u; + } + + l[k] = NULL; + + if (enomem) + return NULL; + + return l; +} + +char **path_strv_resolve_uniq(char **l, const char *root) { + + if (strv_isempty(l)) + return l; + + if (!path_strv_resolve(l, root)) + return NULL; + + return strv_uniq(l); +} + +char *path_simplify(char *path, bool kill_dots) { + char *f, *t; + bool slash = false, ignore_slash = false, absolute; + + assert(path); + + /* Removes redundant inner and trailing slashes. Also removes unnecessary dots + * if kill_dots is true. Modifies the passed string in-place. + * + * ///foo//./bar/. becomes /foo/./bar/. (if kill_dots is false) + * ///foo//./bar/. becomes /foo/bar (if kill_dots is true) + * .//./foo//./bar/. becomes ././foo/./bar/. (if kill_dots is false) + * .//./foo//./bar/. becomes foo/bar (if kill_dots is true) + */ + + if (isempty(path)) + return path; + + absolute = path_is_absolute(path); + + f = path; + if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/')) { + ignore_slash = true; + f++; + } + + for (t = path; *f; f++) { + + if (*f == '/') { + slash = true; + continue; + } + + if (slash) { + if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/')) + continue; + + slash = false; + if (ignore_slash) + ignore_slash = false; + else + *(t++) = '/'; + } + + *(t++) = *f; + } + + /* Special rule, if we stripped everything, we either need a "/" (for the root directory) + * or "." for the current directory */ + if (t == path) { + if (absolute) + *(t++) = '/'; + else + *(t++) = '.'; + } + + *t = 0; + return path; +} + +char* path_startswith(const char *path, const char *prefix) { + assert(path); + assert(prefix); + + /* Returns a pointer to the start of the first component after the parts matched by + * the prefix, iff + * - both paths are absolute or both paths are relative, + * and + * - each component in prefix in turn matches a component in path at the same position. + * An empty string will be returned when the prefix and path are equivalent. + * + * Returns NULL otherwise. + */ + + if ((path[0] == '/') != (prefix[0] == '/')) + return NULL; + + for (;;) { + size_t a, b; + + path += strspn(path, "/"); + prefix += strspn(prefix, "/"); + + if (*prefix == 0) + return (char*) path; + + if (*path == 0) + return NULL; + + a = strcspn(path, "/"); + b = strcspn(prefix, "/"); + + if (a != b) + return NULL; + + if (memcmp(path, prefix, a) != 0) + return NULL; + + path += a; + prefix += b; + } +} + +int path_compare(const char *a, const char *b) { + int d; + + assert(a); + assert(b); + + /* A relative path and an absolute path must not compare as equal. + * Which one is sorted before the other does not really matter. + * Here a relative path is ordered before an absolute path. */ + d = (a[0] == '/') - (b[0] == '/'); + if (d != 0) + return d; + + for (;;) { + size_t j, k; + + a += strspn(a, "/"); + b += strspn(b, "/"); + + if (*a == 0 && *b == 0) + return 0; + + /* Order prefixes first: "/foo" before "/foo/bar" */ + if (*a == 0) + return -1; + if (*b == 0) + return 1; + + j = strcspn(a, "/"); + k = strcspn(b, "/"); + + /* Alphabetical sort: "/foo/aaa" before "/foo/b" */ + d = memcmp(a, b, MIN(j, k)); + if (d != 0) + return (d > 0) - (d < 0); /* sign of d */ + + /* Sort "/foo/a" before "/foo/aaa" */ + d = (j > k) - (j < k); /* sign of (j - k) */ + if (d != 0) + return d; + + a += j; + b += k; + } +} + +bool path_equal(const char *a, const char *b) { + return path_compare(a, b) == 0; +} + +bool path_equal_or_files_same(const char *a, const char *b, int flags) { + return path_equal(a, b) || files_same(a, b, flags) > 0; +} + +char* path_join_internal(const char *first, ...) { + char *joined, *q; + const char *p; + va_list ap; + bool slash; + size_t sz; + + /* Joins all listed strings until the sentinel and places a "/" between them unless the strings end/begin + * already with one so that it is unnecessary. Note that slashes which are already duplicate won't be + * removed. The string returned is hence always equal to or longer than the sum of the lengths of each + * individual string. + * + * Note: any listed empty string is simply skipped. This can be useful for concatenating strings of which some + * are optional. + * + * Examples: + * + * path_join("foo", "bar") → "foo/bar" + * path_join("foo/", "bar") → "foo/bar" + * path_join("", "foo", "", "bar", "") → "foo/bar" */ + + sz = strlen_ptr(first); + va_start(ap, first); + while ((p = va_arg(ap, char*)) != (const char*) -1) + if (!isempty(p)) + sz += 1 + strlen(p); + va_end(ap); + + joined = new(char, sz + 1); + if (!joined) + return NULL; + + if (!isempty(first)) { + q = stpcpy(joined, first); + slash = endswith(first, "/"); + } else { + /* Skip empty items */ + joined[0] = 0; + q = joined; + slash = true; /* no need to generate a slash anymore */ + } + + va_start(ap, first); + while ((p = va_arg(ap, char*)) != (const char*) -1) { + if (isempty(p)) + continue; + + if (!slash && p[0] != '/') + *(q++) = '/'; + + q = stpcpy(q, p); + slash = endswith(p, "/"); + } + va_end(ap); + + return joined; +} + +int find_binary(const char *name, char **ret) { + int last_error, r; + const char *p; + + assert(name); + + if (is_path(name)) { + if (access(name, X_OK) < 0) + return -errno; + + if (ret) { + r = path_make_absolute_cwd(name, ret); + if (r < 0) + return r; + } + + return 0; + } + + /** + * Plain getenv, not secure_getenv, because we want + * to actually allow the user to pick the binary. + */ + p = getenv("PATH"); + if (!p) + p = DEFAULT_PATH; + + last_error = -ENOENT; + + for (;;) { + _cleanup_free_ char *j = NULL, *element = NULL; + + r = extract_first_word(&p, &element, ":", EXTRACT_RELAX|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + break; + + if (!path_is_absolute(element)) + continue; + + j = strjoin(element, "/", name); + if (!j) + return -ENOMEM; + + if (access(j, X_OK) >= 0) { + /* Found it! */ + + if (ret) { + *ret = path_simplify(j, false); + j = NULL; + } + + return 0; + } + + last_error = -errno; + } + + return last_error; +} + +bool paths_check_timestamp(const char* const* paths, usec_t *timestamp, bool update) { + bool changed = false; + const char* const* i; + + assert(timestamp); + + if (!paths) + return false; + + STRV_FOREACH(i, paths) { + struct stat stats; + usec_t u; + + if (stat(*i, &stats) < 0) + continue; + + u = timespec_load(&stats.st_mtim); + + /* first check */ + if (*timestamp >= u) + continue; + + log_debug("timestamp of '%s' changed", *i); + + /* update timestamp */ + if (update) { + *timestamp = u; + changed = true; + } else + return true; + } + + return changed; +} + +static int binary_is_good(const char *binary) { + _cleanup_free_ char *p = NULL, *d = NULL; + int r; + + r = find_binary(binary, &p); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + /* An fsck that is linked to /bin/true is a non-existent + * fsck */ + + r = readlink_malloc(p, &d); + if (r == -EINVAL) /* not a symlink */ + return 1; + if (r < 0) + return r; + + return !PATH_IN_SET(d, "true" + "/bin/true", + "/usr/bin/true", + "/dev/null"); +} + +int fsck_exists(const char *fstype) { + const char *checker; + + assert(fstype); + + if (streq(fstype, "auto")) + return -EINVAL; + + checker = strjoina("fsck.", fstype); + return binary_is_good(checker); +} + +int mkfs_exists(const char *fstype) { + const char *mkfs; + + assert(fstype); + + if (streq(fstype, "auto")) + return -EINVAL; + + mkfs = strjoina("mkfs.", fstype); + return binary_is_good(mkfs); +} + +char *prefix_root(const char *root, const char *path) { + char *n, *p; + size_t l; + + /* If root is passed, prefixes path with it. Otherwise returns + * it as is. */ + + assert(path); + + /* First, drop duplicate prefixing slashes from the path */ + while (path[0] == '/' && path[1] == '/') + path++; + + if (empty_or_root(root)) + return strdup(path); + + l = strlen(root) + 1 + strlen(path) + 1; + + n = new(char, l); + if (!n) + return NULL; + + p = stpcpy(n, root); + + while (p > n && p[-1] == '/') + p--; + + if (path[0] != '/') + *(p++) = '/'; + + strcpy(p, path); + return n; +} + +int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg) { + char *p; + int r; + + /* + * This function is intended to be used in command line + * parsers, to handle paths that are passed in. It makes the + * path absolute, and reduces it to NULL if omitted or + * root (the latter optionally). + * + * NOTE THAT THIS WILL FREE THE PREVIOUS ARGUMENT POINTER ON + * SUCCESS! Hence, do not pass in uninitialized pointers. + */ + + if (isempty(path)) { + *arg = mfree(*arg); + return 0; + } + + r = path_make_absolute_cwd(path, &p); + if (r < 0) + return log_error_errno(r, "Failed to parse path \"%s\" and make it absolute: %m", path); + + path_simplify(p, false); + if (suppress_root && empty_or_root(p)) + p = mfree(p); + + free_and_replace(*arg, p); + + return 0; +} + +char* dirname_malloc(const char *path) { + char *d, *dir, *dir2; + + assert(path); + + d = strdup(path); + if (!d) + return NULL; + + dir = dirname(d); + assert(dir); + + if (dir == d) + return d; + + dir2 = strdup(dir); + free(d); + + return dir2; +} + +const char *last_path_component(const char *path) { + + /* Finds the last component of the path, preserving the optional trailing slash that signifies a directory. + * + * a/b/c → c + * a/b/c/ → c/ + * x → x + * x/ → x/ + * /y → y + * /y/ → y/ + * / → / + * // → / + * /foo/a → a + * /foo/a/ → a/ + * + * Also, the empty string is mapped to itself. + * + * This is different than basename(), which returns "" when a trailing slash is present. + */ + + unsigned l, k; + + if (!path) + return NULL; + + l = k = strlen(path); + if (l == 0) /* special case — an empty string */ + return path; + + while (k > 0 && path[k-1] == '/') + k--; + + if (k == 0) /* the root directory */ + return path + l - 1; + + while (k > 0 && path[k-1] != '/') + k--; + + return path + k; +} + +int path_extract_filename(const char *p, char **ret) { + _cleanup_free_ char *a = NULL; + const char *c, *e = NULL, *q; + + /* Extracts the filename part (i.e. right-most component) from a path, i.e. string that passes + * filename_is_valid(). A wrapper around last_path_component(), but eats up trailing slashes. */ + + if (!p) + return -EINVAL; + + c = last_path_component(p); + + for (q = c; *q != 0; q++) + if (*q != '/') + e = q + 1; + + if (!e) /* no valid character? */ + return -EINVAL; + + a = strndup(c, e - c); + if (!a) + return -ENOMEM; + + if (!filename_is_valid(a)) + return -EINVAL; + + *ret = TAKE_PTR(a); + + return 0; +} + +bool filename_is_valid(const char *p) { + const char *e; + + if (isempty(p)) + return false; + + if (dot_or_dot_dot(p)) + return false; + + e = strchrnul(p, '/'); + if (*e != 0) + return false; + + if (e - p > FILENAME_MAX) /* FILENAME_MAX is counted *without* the trailing NUL byte */ + return false; + + return true; +} + +bool path_is_valid(const char *p) { + + if (isempty(p)) + return false; + + if (strlen(p) >= PATH_MAX) /* PATH_MAX is counted *with* the trailing NUL byte */ + return false; + + return true; +} + +bool path_is_normalized(const char *p) { + + if (!path_is_valid(p)) + return false; + + if (dot_or_dot_dot(p)) + return false; + + if (startswith(p, "../") || endswith(p, "/..") || strstr(p, "/../")) + return false; + + if (startswith(p, "./") || endswith(p, "/.") || strstr(p, "/./")) + return false; + + if (strstr(p, "//")) + return false; + + return true; +} + +char *file_in_same_dir(const char *path, const char *filename) { + char *e, *ret; + size_t k; + + assert(path); + assert(filename); + + /* This removes the last component of path and appends + * filename, unless the latter is absolute anyway or the + * former isn't */ + + if (path_is_absolute(filename)) + return strdup(filename); + + e = strrchr(path, '/'); + if (!e) + return strdup(filename); + + k = strlen(filename); + ret = new(char, (e + 1 - path) + k + 1); + if (!ret) + return NULL; + + memcpy(mempcpy(ret, path, e + 1 - path), filename, k + 1); + return ret; +} + +bool hidden_or_backup_file(const char *filename) { + const char *p; + + assert(filename); + + if (filename[0] == '.' || + streq(filename, "lost+found") || + streq(filename, "aquota.user") || + streq(filename, "aquota.group") || + endswith(filename, "~")) + return true; + + p = strrchr(filename, '.'); + if (!p) + return false; + + /* Please, let's not add more entries to the list below. If external projects think it's a good idea to come up + * with always new suffixes and that everybody else should just adjust to that, then it really should be on + * them. Hence, in future, let's not add any more entries. Instead, let's ask those packages to instead adopt + * one of the generic suffixes/prefixes for hidden files or backups, possibly augmented with an additional + * string. Specifically: there's now: + * + * The generic suffixes "~" and ".bak" for backup files + * The generic prefix "." for hidden files + * + * Thus, if a new package manager "foopkg" wants its own set of ".foopkg-new", ".foopkg-old", ".foopkg-dist" + * or so registered, let's refuse that and ask them to use ".foopkg.new", ".foopkg.old" or ".foopkg~" instead. + */ + + return STR_IN_SET(p + 1, + "rpmnew", + "rpmsave", + "rpmorig", + "dpkg-old", + "dpkg-new", + "dpkg-tmp", + "dpkg-dist", + "dpkg-bak", + "dpkg-backup", + "dpkg-remove", + "ucf-new", + "ucf-old", + "ucf-dist", + "swp", + "bak", + "old", + "new"); +} + +bool is_device_path(const char *path) { + + /* Returns true on paths that likely refer to a device, either by path in sysfs or to something in /dev */ + + return PATH_STARTSWITH_SET(path, "/dev/", "/sys/"); +} + +bool valid_device_node_path(const char *path) { + + /* Some superficial checks whether the specified path is a valid device node path, all without looking at the + * actual device node. */ + + if (!PATH_STARTSWITH_SET(path, "/dev/", "/run/systemd/inaccessible/")) + return false; + + if (endswith(path, "/")) /* can't be a device node if it ends in a slash */ + return false; + + return path_is_normalized(path); +} + +bool valid_device_allow_pattern(const char *path) { + assert(path); + + /* Like valid_device_node_path(), but also allows full-subsystem expressions, like DeviceAllow= and DeviceDeny= + * accept it */ + + if (STARTSWITH_SET(path, "block-", "char-")) + return true; + + return valid_device_node_path(path); +} + +int systemd_installation_has_version(const char *root, unsigned minimal_version) { + const char *pattern; + int r; + + /* Try to guess if systemd installation is later than the specified version. This + * is hacky and likely to yield false negatives, particularly if the installation + * is non-standard. False positives should be relatively rare. + */ + + NULSTR_FOREACH(pattern, + /* /lib works for systems without usr-merge, and for systems with a sane + * usr-merge, where /lib is a symlink to /usr/lib. /usr/lib is necessary + * for Gentoo which does a merge without making /lib a symlink. + */ + "lib/systemd/libsystemd-shared-*.so\0" + "lib64/systemd/libsystemd-shared-*.so\0" + "usr/lib/systemd/libsystemd-shared-*.so\0" + "usr/lib64/systemd/libsystemd-shared-*.so\0") { + + _cleanup_strv_free_ char **names = NULL; + _cleanup_free_ char *path = NULL; + char *c, **name; + + path = prefix_root(root, pattern); + if (!path) + return -ENOMEM; + + r = glob_extend(&names, path); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + + assert_se(c = endswith(path, "*.so")); + *c = '\0'; /* truncate the glob part */ + + STRV_FOREACH(name, names) { + /* This is most likely to run only once, hence let's not optimize anything. */ + char *t, *t2; + unsigned version; + + t = startswith(*name, path); + if (!t) + continue; + + t2 = endswith(t, ".so"); + if (!t2) + continue; + + t2[0] = '\0'; /* truncate the suffix */ + + r = safe_atou(t, &version); + if (r < 0) { + log_debug_errno(r, "Found libsystemd shared at \"%s.so\", but failed to parse version: %m", *name); + continue; + } + + log_debug("Found libsystemd shared at \"%s.so\", version %u (%s).", + *name, version, + version >= minimal_version ? "OK" : "too old"); + if (version >= minimal_version) + return true; + } + } + + return false; +} + +bool dot_or_dot_dot(const char *path) { + if (!path) + return false; + if (path[0] != '.') + return false; + if (path[1] == 0) + return true; + if (path[1] != '.') + return false; + + return path[2] == 0; +} + +bool empty_or_root(const char *root) { + + /* For operations relative to some root directory, returns true if the specified root directory is redundant, + * i.e. either / or NULL or the empty string or any equivalent. */ + + if (!root) + return true; + + return root[strspn(root, "/")] == 0; +} + +int path_simplify_and_warn( + char *path, + unsigned flag, + const char *unit, + const char *filename, + unsigned line, + const char *lvalue) { + + bool absolute, fatal = flag & PATH_CHECK_FATAL; + + assert(!FLAGS_SET(flag, PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE)); + + if (!utf8_is_valid(path)) { + log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, path); + return -EINVAL; + } + + if (flag & (PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE)) { + absolute = path_is_absolute(path); + + if (!absolute && (flag & PATH_CHECK_ABSOLUTE)) { + log_syntax(unit, LOG_ERR, filename, line, 0, + "%s= path is not absolute%s: %s", + lvalue, fatal ? "" : ", ignoring", path); + return -EINVAL; + } + + if (absolute && (flag & PATH_CHECK_RELATIVE)) { + log_syntax(unit, LOG_ERR, filename, line, 0, + "%s= path is absolute%s: %s", + lvalue, fatal ? "" : ", ignoring", path); + return -EINVAL; + } + } + + path_simplify(path, true); + + if (!path_is_normalized(path)) { + log_syntax(unit, LOG_ERR, filename, line, 0, + "%s= path is not normalized%s: %s", + lvalue, fatal ? "" : ", ignoring", path); + return -EINVAL; + } + + if (!path_is_valid(path)) { + log_syntax(unit, LOG_ERR, filename, line, 0, + "%s= path has invalid length (%zu bytes)%s.", + lvalue, strlen(path), fatal ? "" : ", ignoring"); + return -EINVAL; + } + + return 0; +} diff --git a/src/basic/path-util.h b/src/basic/path-util.h new file mode 100644 index 0000000..86c5a57 --- /dev/null +++ b/src/basic/path-util.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <alloca.h> +#include <stdbool.h> +#include <stddef.h> + +#include "macro.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +#define PATH_SPLIT_SBIN_BIN(x) x "sbin:" x "bin" +#define PATH_SPLIT_SBIN_BIN_NULSTR(x) x "sbin\0" x "bin\0" + +#define PATH_NORMAL_SBIN_BIN(x) x "bin" +#define PATH_NORMAL_SBIN_BIN_NULSTR(x) x "bin\0" + +#if HAVE_SPLIT_BIN +# define PATH_SBIN_BIN(x) PATH_SPLIT_SBIN_BIN(x) +# define PATH_SBIN_BIN_NULSTR(x) PATH_SPLIT_SBIN_BIN_NULSTR(x) +#else +# define PATH_SBIN_BIN(x) PATH_NORMAL_SBIN_BIN(x) +# define PATH_SBIN_BIN_NULSTR(x) PATH_NORMAL_SBIN_BIN_NULSTR(x) +#endif + +#define DEFAULT_PATH_NORMAL PATH_SBIN_BIN("/usr/local/") ":" PATH_SBIN_BIN("/usr/") +#define DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/usr/local/") PATH_SBIN_BIN_NULSTR("/usr/") +#define DEFAULT_PATH_SPLIT_USR DEFAULT_PATH_NORMAL ":" PATH_SBIN_BIN("/") +#define DEFAULT_PATH_SPLIT_USR_NULSTR DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/") +#define DEFAULT_PATH_COMPAT PATH_SPLIT_SBIN_BIN("/usr/local/") ":" PATH_SPLIT_SBIN_BIN("/usr/") ":" PATH_SPLIT_SBIN_BIN("/") + +#if HAVE_SPLIT_USR +# define DEFAULT_PATH DEFAULT_PATH_SPLIT_USR +# define DEFAULT_PATH_NULSTR DEFAULT_PATH_SPLIT_USR_NULSTR +#else +# define DEFAULT_PATH DEFAULT_PATH_NORMAL +# define DEFAULT_PATH_NULSTR DEFAULT_PATH_NORMAL_NULSTR +#endif + +bool is_path(const char *p) _pure_; +int path_split_and_make_absolute(const char *p, char ***ret); +bool path_is_absolute(const char *p) _pure_; +char* path_make_absolute(const char *p, const char *prefix); +int safe_getcwd(char **ret); +int path_make_absolute_cwd(const char *p, char **ret); +int path_make_relative(const char *from_dir, const char *to_path, char **_r); +char* path_startswith(const char *path, const char *prefix) _pure_; +int path_compare(const char *a, const char *b) _pure_; +bool path_equal(const char *a, const char *b) _pure_; +bool path_equal_or_files_same(const char *a, const char *b, int flags); +char* path_join_internal(const char *first, ...); +#define path_join(x, ...) path_join_internal(x, __VA_ARGS__, (const char*) -1) + +char* path_simplify(char *path, bool kill_dots); + +static inline bool path_equal_ptr(const char *a, const char *b) { + return !!a == !!b && (!a || path_equal(a, b)); +} + +/* Note: the search terminates on the first NULL item. */ +#define PATH_IN_SET(p, ...) \ + ({ \ + char **_s; \ + bool _found = false; \ + STRV_FOREACH(_s, STRV_MAKE(__VA_ARGS__)) \ + if (path_equal(p, *_s)) { \ + _found = true; \ + break; \ + } \ + _found; \ + }) + +#define PATH_STARTSWITH_SET(p, ...) \ + ({ \ + const char *_p = (p); \ + char *_found = NULL, **_i; \ + STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \ + _found = path_startswith(_p, *_i); \ + if (_found) \ + break; \ + } \ + _found; \ + }) + +int path_strv_make_absolute_cwd(char **l); +char** path_strv_resolve(char **l, const char *root); +char** path_strv_resolve_uniq(char **l, const char *root); + +int find_binary(const char *name, char **filename); + +bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update); + +int fsck_exists(const char *fstype); +int mkfs_exists(const char *fstype); + +/* Iterates through the path prefixes of the specified path, going up + * the tree, to root. Also returns "" (and not "/"!) for the root + * directory. Excludes the specified directory itself */ +#define PATH_FOREACH_PREFIX(prefix, path) \ + for (char *_slash = ({ \ + path_simplify(strcpy(prefix, path), false); \ + streq(prefix, "/") ? NULL : strrchr(prefix, '/'); \ + }); \ + _slash && ((*_slash = 0), true); \ + _slash = strrchr((prefix), '/')) + +/* Same as PATH_FOREACH_PREFIX but also includes the specified path itself */ +#define PATH_FOREACH_PREFIX_MORE(prefix, path) \ + for (char *_slash = ({ \ + path_simplify(strcpy(prefix, path), false); \ + if (streq(prefix, "/")) \ + prefix[0] = 0; \ + strrchr(prefix, 0); \ + }); \ + _slash && ((*_slash = 0), true); \ + _slash = strrchr((prefix), '/')) + +char *prefix_root(const char *root, const char *path); + +/* Similar to prefix_root(), but returns an alloca() buffer, or + * possibly a const pointer into the path parameter */ +#define prefix_roota(root, path) \ + ({ \ + const char* _path = (path), *_root = (root), *_ret; \ + char *_p, *_n; \ + size_t _l; \ + while (_path[0] == '/' && _path[1] == '/') \ + _path ++; \ + if (empty_or_root(_root)) \ + _ret = _path; \ + else { \ + _l = strlen(_root) + 1 + strlen(_path) + 1; \ + _n = newa(char, _l); \ + _p = stpcpy(_n, _root); \ + while (_p > _n && _p[-1] == '/') \ + _p--; \ + if (_path[0] != '/') \ + *(_p++) = '/'; \ + strcpy(_p, _path); \ + _ret = _n; \ + } \ + _ret; \ + }) + +int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg); + +char* dirname_malloc(const char *path); +const char *last_path_component(const char *path); +int path_extract_filename(const char *p, char **ret); + +bool filename_is_valid(const char *p) _pure_; +bool path_is_valid(const char *p) _pure_; +bool path_is_normalized(const char *p) _pure_; + +char *file_in_same_dir(const char *path, const char *filename); + +bool hidden_or_backup_file(const char *filename) _pure_; + +bool is_device_path(const char *path); + +bool valid_device_node_path(const char *path); +bool valid_device_allow_pattern(const char *path); + +int systemd_installation_has_version(const char *root, unsigned minimal_version); + +bool dot_or_dot_dot(const char *path); + +static inline const char *skip_dev_prefix(const char *p) { + const char *e; + + /* Drop any /dev prefix if there is any */ + + e = path_startswith(p, "/dev/"); + + return e ?: p; +} + +bool empty_or_root(const char *root); +static inline const char *empty_to_root(const char *path) { + return isempty(path) ? "/" : path; +} + +enum { + PATH_CHECK_FATAL = 1 << 0, /* If not set, then error message is appended with 'ignoring'. */ + PATH_CHECK_ABSOLUTE = 1 << 1, + PATH_CHECK_RELATIVE = 1 << 2, +}; + +int path_simplify_and_warn(char *path, unsigned flag, const char *unit, const char *filename, unsigned line, const char *lvalue); diff --git a/src/basic/prioq.c b/src/basic/prioq.c new file mode 100644 index 0000000..76b27fa --- /dev/null +++ b/src/basic/prioq.c @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +/* + * Priority Queue + * The prioq object implements a priority queue. That is, it orders objects by + * their priority and allows O(1) access to the object with the highest + * priority. Insertion and removal are Θ(log n). Optionally, the caller can + * provide a pointer to an index which will be kept up-to-date by the prioq. + * + * The underlying algorithm used in this implementation is a Heap. + */ + +#include <errno.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "hashmap.h" +#include "prioq.h" + +struct prioq_item { + void *data; + unsigned *idx; +}; + +struct Prioq { + compare_func_t compare_func; + unsigned n_items, n_allocated; + + struct prioq_item *items; +}; + +Prioq *prioq_new(compare_func_t compare_func) { + Prioq *q; + + q = new(Prioq, 1); + if (!q) + return q; + + *q = (Prioq) { + .compare_func = compare_func, + }; + + return q; +} + +Prioq* prioq_free(Prioq *q) { + if (!q) + return NULL; + + free(q->items); + return mfree(q); +} + +int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func) { + assert(q); + + if (*q) + return 0; + + *q = prioq_new(compare_func); + if (!*q) + return -ENOMEM; + + return 0; +} + +static void swap(Prioq *q, unsigned j, unsigned k) { + assert(q); + assert(j < q->n_items); + assert(k < q->n_items); + + assert(!q->items[j].idx || *(q->items[j].idx) == j); + assert(!q->items[k].idx || *(q->items[k].idx) == k); + + SWAP_TWO(q->items[j].data, q->items[k].data); + SWAP_TWO(q->items[j].idx, q->items[k].idx); + + if (q->items[j].idx) + *q->items[j].idx = j; + + if (q->items[k].idx) + *q->items[k].idx = k; +} + +static unsigned shuffle_up(Prioq *q, unsigned idx) { + assert(q); + assert(idx < q->n_items); + + while (idx > 0) { + unsigned k; + + k = (idx-1)/2; + + if (q->compare_func(q->items[k].data, q->items[idx].data) <= 0) + break; + + swap(q, idx, k); + idx = k; + } + + return idx; +} + +static unsigned shuffle_down(Prioq *q, unsigned idx) { + assert(q); + + for (;;) { + unsigned j, k, s; + + k = (idx+1)*2; /* right child */ + j = k-1; /* left child */ + + if (j >= q->n_items) + break; + + if (q->compare_func(q->items[j].data, q->items[idx].data) < 0) + + /* So our left child is smaller than we are, let's + * remember this fact */ + s = j; + else + s = idx; + + if (k < q->n_items && + q->compare_func(q->items[k].data, q->items[s].data) < 0) + + /* So our right child is smaller than we are, let's + * remember this fact */ + s = k; + + /* s now points to the smallest of the three items */ + + if (s == idx) + /* No swap necessary, we're done */ + break; + + swap(q, idx, s); + idx = s; + } + + return idx; +} + +int prioq_put(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + unsigned k; + + assert(q); + + if (q->n_items >= q->n_allocated) { + unsigned n; + struct prioq_item *j; + + n = MAX((q->n_items+1) * 2, 16u); + j = reallocarray(q->items, n, sizeof(struct prioq_item)); + if (!j) + return -ENOMEM; + + q->items = j; + q->n_allocated = n; + } + + k = q->n_items++; + i = q->items + k; + i->data = data; + i->idx = idx; + + if (idx) + *idx = k; + + shuffle_up(q, k); + + return 0; +} + +static void remove_item(Prioq *q, struct prioq_item *i) { + struct prioq_item *l; + + assert(q); + assert(i); + + l = q->items + q->n_items - 1; + + if (i == l) + /* Last entry, let's just remove it */ + q->n_items--; + else { + unsigned k; + + /* Not last entry, let's replace the last entry with + * this one, and reshuffle */ + + k = i - q->items; + + i->data = l->data; + i->idx = l->idx; + if (i->idx) + *i->idx = k; + q->n_items--; + + k = shuffle_down(q, k); + shuffle_up(q, k); + } +} + +_pure_ static struct prioq_item* find_item(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + + assert(q); + + if (q->n_items <= 0) + return NULL; + + if (idx) { + if (*idx == PRIOQ_IDX_NULL || + *idx >= q->n_items) + return NULL; + + i = q->items + *idx; + if (i->data != data) + return NULL; + + return i; + } else { + for (i = q->items; i < q->items + q->n_items; i++) + if (i->data == data) + return i; + return NULL; + } +} + +int prioq_remove(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + + if (!q) + return 0; + + i = find_item(q, data, idx); + if (!i) + return 0; + + remove_item(q, i); + return 1; +} + +int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + unsigned k; + + assert(q); + + i = find_item(q, data, idx); + if (!i) + return 0; + + k = i - q->items; + k = shuffle_down(q, k); + shuffle_up(q, k); + return 1; +} + +void *prioq_peek_by_index(Prioq *q, unsigned idx) { + if (!q) + return NULL; + + if (idx >= q->n_items) + return NULL; + + return q->items[idx].data; +} + +void *prioq_pop(Prioq *q) { + void *data; + + if (!q) + return NULL; + + if (q->n_items <= 0) + return NULL; + + data = q->items[0].data; + remove_item(q, q->items); + return data; +} + +unsigned prioq_size(Prioq *q) { + + if (!q) + return 0; + + return q->n_items; +} + +bool prioq_isempty(Prioq *q) { + + if (!q) + return true; + + return q->n_items <= 0; +} diff --git a/src/basic/prioq.h b/src/basic/prioq.h new file mode 100644 index 0000000..1fb57bf --- /dev/null +++ b/src/basic/prioq.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" + +typedef struct Prioq Prioq; + +#define PRIOQ_IDX_NULL ((unsigned) -1) + +Prioq *prioq_new(compare_func_t compare); +Prioq *prioq_free(Prioq *q); +DEFINE_TRIVIAL_CLEANUP_FUNC(Prioq*, prioq_free); +int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func); + +int prioq_put(Prioq *q, void *data, unsigned *idx); +int prioq_remove(Prioq *q, void *data, unsigned *idx); +int prioq_reshuffle(Prioq *q, void *data, unsigned *idx); + +void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_; +static inline void *prioq_peek(Prioq *q) { + return prioq_peek_by_index(q, 0); +} +void *prioq_pop(Prioq *q); + +#define PRIOQ_FOREACH_ITEM(q, p) \ + for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++) + +unsigned prioq_size(Prioq *q) _pure_; +bool prioq_isempty(Prioq *q) _pure_; diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c new file mode 100644 index 0000000..1670001 --- /dev/null +++ b/src/basic/proc-cmdline.c @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdbool.h> +#include <stddef.h> +#include <string.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "fileio.h" +#include "macro.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "special.h" +#include "string-util.h" +#include "util.h" +#include "virt.h" + +int proc_cmdline(char **ret) { + const char *e; + assert(ret); + + /* For testing purposes it is sometimes useful to be able to override what we consider /proc/cmdline to be */ + e = secure_getenv("SYSTEMD_PROC_CMDLINE"); + if (e) { + char *m; + + m = strdup(e); + if (!m) + return -ENOMEM; + + *ret = m; + return 0; + } + + if (detect_container() > 0) + return get_process_cmdline(1, 0, false, ret); + else + return read_one_line_file("/proc/cmdline", ret); +} + +static int proc_cmdline_extract_first(const char **p, char **ret_word, ProcCmdlineFlags flags) { + const char *q = *p; + int r; + + for (;;) { + _cleanup_free_ char *word = NULL; + const char *c; + + r = extract_first_word(&q, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX); + if (r < 0) + return r; + if (r == 0) + break; + + /* Filter out arguments that are intended only for the initrd */ + c = startswith(word, "rd."); + if (c) { + if (!in_initrd()) + continue; + + if (FLAGS_SET(flags, PROC_CMDLINE_STRIP_RD_PREFIX)) { + r = free_and_strdup(&word, c); + if (r < 0) + return r; + } + + } else if (FLAGS_SET(flags, PROC_CMDLINE_RD_STRICT) && in_initrd()) + continue; /* And optionally filter out arguments that are intended only for the host */ + + *p = q; + *ret_word = TAKE_PTR(word); + return 1; + } + + *p = q; + *ret_word = NULL; + return 0; +} + +int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) { + const char *p; + int r; + + assert(parse_item); + + /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_parse(), let's make this + * clear. */ + assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL)); + + p = line; + for (;;) { + _cleanup_free_ char *word = NULL; + char *value; + + r = proc_cmdline_extract_first(&p, &word, flags); + if (r < 0) + return r; + if (r == 0) + break; + + value = strchr(word, '='); + if (value) + *(value++) = 0; + + r = parse_item(word, value, data); + if (r < 0) + return r; + } + + return 0; +} + +int proc_cmdline_parse(proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) { + _cleanup_free_ char *line = NULL; + int r; + + assert(parse_item); + + r = proc_cmdline(&line); + if (r < 0) + return r; + + return proc_cmdline_parse_given(line, parse_item, data, flags); +} + +static bool relaxed_equal_char(char a, char b) { + return a == b || + (a == '_' && b == '-') || + (a == '-' && b == '_'); +} + +char *proc_cmdline_key_startswith(const char *s, const char *prefix) { + assert(s); + assert(prefix); + + /* Much like startswith(), but considers "-" and "_" the same */ + + for (; *prefix != 0; s++, prefix++) + if (!relaxed_equal_char(*s, *prefix)) + return NULL; + + return (char*) s; +} + +bool proc_cmdline_key_streq(const char *x, const char *y) { + assert(x); + assert(y); + + /* Much like streq(), but considers "-" and "_" the same */ + + for (; *x != 0 || *y != 0; x++, y++) + if (!relaxed_equal_char(*x, *y)) + return false; + + return true; +} + +int proc_cmdline_get_key(const char *key, ProcCmdlineFlags flags, char **ret_value) { + _cleanup_free_ char *line = NULL, *ret = NULL; + bool found = false; + const char *p; + int r; + + /* Looks for a specific key on the kernel command line. Supports three modes: + * + * a) The "ret_value" parameter is used. In this case a parameter beginning with the "key" string followed by + * "=" is searched for, and the value following it is returned in "ret_value". + * + * b) as above, but the PROC_CMDLINE_VALUE_OPTIONAL flag is set. In this case if the key is found as a separate + * word (i.e. not followed by "=" but instead by whitespace or the end of the command line), then this is + * also accepted, and "value" is returned as NULL. + * + * c) The "ret_value" parameter is NULL. In this case a search for the exact "key" parameter is performed. + * + * In all three cases, > 0 is returned if the key is found, 0 if not. */ + + if (isempty(key)) + return -EINVAL; + + if (FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL) && !ret_value) + return -EINVAL; + + r = proc_cmdline(&line); + if (r < 0) + return r; + + p = line; + for (;;) { + _cleanup_free_ char *word = NULL; + + r = proc_cmdline_extract_first(&p, &word, flags); + if (r < 0) + return r; + if (r == 0) + break; + + if (ret_value) { + const char *e; + + e = proc_cmdline_key_startswith(word, key); + if (!e) + continue; + + if (*e == '=') { + r = free_and_strdup(&ret, e+1); + if (r < 0) + return r; + + found = true; + + } else if (*e == 0 && FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL)) + found = true; + + } else { + if (streq(word, key)) { + found = true; + break; /* we found what we were looking for */ + } + } + } + + if (ret_value) + *ret_value = TAKE_PTR(ret); + + return found; +} + +int proc_cmdline_get_bool(const char *key, bool *ret) { + _cleanup_free_ char *v = NULL; + int r; + + assert(ret); + + r = proc_cmdline_get_key(key, PROC_CMDLINE_VALUE_OPTIONAL, &v); + if (r < 0) + return r; + if (r == 0) { + *ret = false; + return 0; + } + + if (v) { /* parameter passed */ + r = parse_boolean(v); + if (r < 0) + return r; + *ret = r; + } else /* no parameter passed */ + *ret = true; + + return 1; +} + +int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...) { + _cleanup_free_ char *line = NULL; + const char *p; + va_list ap; + int r, ret = 0; + + /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_get_key_many(), let's make + * this clear. */ + assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL)); + + /* This call may clobber arguments on failure! */ + + r = proc_cmdline(&line); + if (r < 0) + return r; + + p = line; + for (;;) { + _cleanup_free_ char *word = NULL; + + r = proc_cmdline_extract_first(&p, &word, flags); + if (r < 0) + return r; + if (r == 0) + break; + + va_start(ap, flags); + + for (;;) { + char **v; + const char *k, *e; + + k = va_arg(ap, const char*); + if (!k) + break; + + assert_se(v = va_arg(ap, char**)); + + e = proc_cmdline_key_startswith(word, k); + if (e && *e == '=') { + r = free_and_strdup(v, e + 1); + if (r < 0) { + va_end(ap); + return r; + } + + ret++; + } + } + + va_end(ap); + } + + return ret; +} + +int shall_restore_state(void) { + bool ret; + int r; + + r = proc_cmdline_get_bool("systemd.restore_state", &ret); + if (r < 0) + return r; + + return r > 0 ? ret : true; +} + +static const char * const rlmap[] = { + "emergency", SPECIAL_EMERGENCY_TARGET, + "-b", SPECIAL_EMERGENCY_TARGET, + "rescue", SPECIAL_RESCUE_TARGET, + "single", SPECIAL_RESCUE_TARGET, + "-s", SPECIAL_RESCUE_TARGET, + "s", SPECIAL_RESCUE_TARGET, + "S", SPECIAL_RESCUE_TARGET, + "1", SPECIAL_RESCUE_TARGET, + "2", SPECIAL_MULTI_USER_TARGET, + "3", SPECIAL_MULTI_USER_TARGET, + "4", SPECIAL_MULTI_USER_TARGET, + "5", SPECIAL_GRAPHICAL_TARGET, + NULL +}; + +static const char * const rlmap_initrd[] = { + "emergency", SPECIAL_EMERGENCY_TARGET, + "rescue", SPECIAL_RESCUE_TARGET, + NULL +}; + +const char* runlevel_to_target(const char *word) { + const char * const *rlmap_ptr; + size_t i; + + if (!word) + return NULL; + + if (in_initrd()) { + word = startswith(word, "rd."); + if (!word) + return NULL; + } + + rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap; + + for (i = 0; rlmap_ptr[i]; i += 2) + if (streq(word, rlmap_ptr[i])) + return rlmap_ptr[i+1]; + + return NULL; +} diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h new file mode 100644 index 0000000..ff04379 --- /dev/null +++ b/src/basic/proc-cmdline.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "log.h" + +typedef enum ProcCmdlineFlags { + PROC_CMDLINE_STRIP_RD_PREFIX = 1 << 0, + PROC_CMDLINE_VALUE_OPTIONAL = 1 << 1, + PROC_CMDLINE_RD_STRICT = 1 << 2, +} ProcCmdlineFlags; + +typedef int (*proc_cmdline_parse_t)(const char *key, const char *value, void *data); + +int proc_cmdline(char **ret); + +int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags); +int proc_cmdline_parse(const proc_cmdline_parse_t parse, void *userdata, ProcCmdlineFlags flags); + +int proc_cmdline_get_key(const char *parameter, ProcCmdlineFlags flags, char **value); +int proc_cmdline_get_bool(const char *key, bool *ret); + +int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...); +#define proc_cmdline_get_key_many(flags, ...) proc_cmdline_get_key_many_internal(flags, __VA_ARGS__, NULL) + +char *proc_cmdline_key_startswith(const char *s, const char *prefix); +bool proc_cmdline_key_streq(const char *x, const char *y); + +int shall_restore_state(void); +const char* runlevel_to_target(const char *rl); + +/* A little helper call, to be used in proc_cmdline_parse_t callbacks */ +static inline bool proc_cmdline_value_missing(const char *key, const char *value) { + if (!value) { + log_warning("Missing argument for %s= kernel command line switch, ignoring.", key); + return true; + } + + return false; +} diff --git a/src/basic/process-util.c b/src/basic/process-util.c new file mode 100644 index 0000000..78ce43b --- /dev/null +++ b/src/basic/process-util.c @@ -0,0 +1,1565 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <linux/oom.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <sys/personality.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <syslog.h> +#include <unistd.h> +#if HAVE_VALGRIND_VALGRIND_H +#include <valgrind/valgrind.h> +#endif + +#include "alloc-util.h" +#include "architecture.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "ioprio.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "process-util.h" +#include "raw-clone.h" +#include "rlimit-util.h" +#include "signal-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "terminal-util.h" +#include "user-util.h" +#include "util.h" + +int get_process_state(pid_t pid) { + const char *p; + char state; + int r; + _cleanup_free_ char *line = NULL; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "stat"); + + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + if (sscanf(p, " %c", &state) != 1) + return -EIO; + + return (unsigned char) state; +} + +int get_process_comm(pid_t pid, char **ret) { + _cleanup_free_ char *escaped = NULL, *comm = NULL; + const char *p; + int r; + + assert(ret); + assert(pid >= 0); + + escaped = new(char, TASK_COMM_LEN); + if (!escaped) + return -ENOMEM; + + p = procfs_file_alloca(pid, "comm"); + + r = read_one_line_file(p, &comm); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */ + cellescape(escaped, TASK_COMM_LEN, comm); + + *ret = TAKE_PTR(escaped); + return 0; +} + +int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) { + _cleanup_fclose_ FILE *f = NULL; + bool space = false; + char *k; + _cleanup_free_ char *ans = NULL; + const char *p; + int c; + + assert(line); + assert(pid >= 0); + + /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing + * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most + * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If + * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a + * command line that resolves to the empty string will return the "comm" name of the process instead. + * + * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and + * comm_fallback is false). Returns 0 and sets *line otherwise. */ + + p = procfs_file_alloca(pid, "cmdline"); + + f = fopen(p, "re"); + if (!f) { + if (errno == ENOENT) + return -ESRCH; + return -errno; + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + if (max_length == 0) { + /* This is supposed to be a safety guard against runaway command lines. */ + long l = sysconf(_SC_ARG_MAX); + assert(l > 0); + max_length = l; + } + + if (max_length == 1) { + + /* If there's only room for one byte, return the empty string */ + ans = new0(char, 1); + if (!ans) + return -ENOMEM; + + *line = TAKE_PTR(ans); + return 0; + + } else { + bool dotdotdot = false; + size_t left; + + ans = new(char, max_length); + if (!ans) + return -ENOMEM; + + k = ans; + left = max_length; + while ((c = getc(f)) != EOF) { + + if (isprint(c)) { + + if (space) { + if (left <= 2) { + dotdotdot = true; + break; + } + + *(k++) = ' '; + left--; + space = false; + } + + if (left <= 1) { + dotdotdot = true; + break; + } + + *(k++) = (char) c; + left--; + } else if (k > ans) + space = true; + } + + if (dotdotdot) { + if (max_length <= 4) { + k = ans; + left = max_length; + } else { + k = ans + max_length - 4; + left = 4; + + /* Eat up final spaces */ + while (k > ans && isspace(k[-1])) { + k--; + left++; + } + } + + strncpy(k, "...", left-1); + k[left-1] = 0; + } else + *k = 0; + } + + /* Kernel threads have no argv[] */ + if (isempty(ans)) { + _cleanup_free_ char *t = NULL; + int h; + + ans = mfree(ans); + + if (!comm_fallback) + return -ENOENT; + + h = get_process_comm(pid, &t); + if (h < 0) + return h; + + size_t l = strlen(t); + + if (l + 3 <= max_length) { + ans = strjoin("[", t, "]"); + if (!ans) + return -ENOMEM; + + } else if (max_length <= 6) { + ans = new(char, max_length); + if (!ans) + return -ENOMEM; + + memcpy(ans, "[...]", max_length-1); + ans[max_length-1] = 0; + } else { + t[max_length - 6] = 0; + + /* Chop off final spaces */ + delete_trailing_chars(t, WHITESPACE); + + ans = strjoin("[", t, "...]"); + if (!ans) + return -ENOMEM; + } + + *line = TAKE_PTR(ans); + return 0; + } + + k = realloc(ans, strlen(ans) + 1); + if (!k) + return -ENOMEM; + + ans = NULL; + *line = k; + + return 0; +} + +int rename_process(const char name[]) { + static size_t mm_size = 0; + static char *mm = NULL; + bool truncated = false; + size_t l; + + /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's + * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in + * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded; + * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be + * truncated. + * + * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */ + + if (isempty(name)) + return -EINVAL; /* let's not confuse users unnecessarily with an empty name */ + + if (!is_main_thread()) + return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we + * cache things without locking, and we make assumptions that PR_SET_NAME sets the + * process name that isn't correct on any other threads */ + + l = strlen(name); + + /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we + * can use PR_SET_NAME, which sets the thread name for the calling thread. */ + if (prctl(PR_SET_NAME, name) < 0) + log_debug_errno(errno, "PR_SET_NAME failed: %m"); + if (l >= TASK_COMM_LEN) /* Linux process names can be 15 chars at max */ + truncated = true; + + /* Second step, change glibc's ID of the process name. */ + if (program_invocation_name) { + size_t k; + + k = strlen(program_invocation_name); + strncpy(program_invocation_name, name, k); + if (l > k) + truncated = true; + } + + /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but + * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at + * the end. This is the best option for changing /proc/self/cmdline. */ + + /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the + * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is + * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if + * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but + * mmap() is not. */ + if (geteuid() != 0) + log_debug("Skipping PR_SET_MM, as we don't have privileges."); + else if (mm_size < l+1) { + size_t nn_size; + char *nn; + + nn_size = PAGE_ALIGN(l+1); + nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (nn == MAP_FAILED) { + log_debug_errno(errno, "mmap() failed: %m"); + goto use_saved_argv; + } + + strncpy(nn, name, nn_size); + + /* Now, let's tell the kernel about this new memory */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) { + /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be + * below the desired start address, in which case the kernel may have kicked this back due + * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in + * action). The proper solution would be to have a prctl() API that could set both start+end + * simultaneously, or at least let us query the existing address to anticipate this condition + * and respond accordingly. For now, we can only guess at the cause of this failure and try + * a workaround--which will briefly expand the arg space to something potentially huge before + * resizing it to what we want. */ + log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m"); + + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) { + log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m"); + (void) munmap(nn, nn_size); + goto use_saved_argv; + } + + if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) { + log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m"); + goto use_saved_argv; + } + } else { + /* And update the end pointer to the new end, too. If this fails, we don't really know what + * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure, + * and continue. */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) + log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); + } + + if (mm) + (void) munmap(mm, mm_size); + + mm = nn; + mm_size = nn_size; + } else { + strncpy(mm, name, mm_size); + + /* Update the end pointer, continuing regardless of any failure. */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0) + log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); + } + +use_saved_argv: + /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if + * it still looks here */ + + if (saved_argc > 0) { + int i; + + if (saved_argv[0]) { + size_t k; + + k = strlen(saved_argv[0]); + strncpy(saved_argv[0], name, k); + if (l > k) + truncated = true; + } + + for (i = 1; i < saved_argc; i++) { + if (!saved_argv[i]) + break; + + memzero(saved_argv[i], strlen(saved_argv[i])); + } + } + + return !truncated; +} + +int is_kernel_thread(pid_t pid) { + _cleanup_free_ char *line = NULL; + unsigned long long flags; + size_t l, i; + const char *p; + char *q; + int r; + + if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */ + return 0; + if (!pid_is_valid(pid)) + return -EINVAL; + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* Skip past the comm field */ + q = strrchr(line, ')'); + if (!q) + return -EINVAL; + q++; + + /* Skip 6 fields to reach the flags field */ + for (i = 0; i < 6; i++) { + l = strspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + + l = strcspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + } + + /* Skip preceding whitespace */ + l = strspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + + /* Truncate the rest */ + l = strcspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q[l] = 0; + + r = safe_atollu(q, &flags); + if (r < 0) + return r; + + return !!(flags & PF_KTHREAD); +} + +int get_process_capeff(pid_t pid, char **capeff) { + const char *p; + int r; + + assert(capeff); + assert(pid >= 0); + + p = procfs_file_alloca(pid, "status"); + + r = get_proc_field(p, "CapEff", WHITESPACE, capeff); + if (r == -ENOENT) + return -ESRCH; + + return r; +} + +static int get_process_link_contents(const char *proc_file, char **name) { + int r; + + assert(proc_file); + assert(name); + + r = readlink_malloc(proc_file, name); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + return 0; +} + +int get_process_exe(pid_t pid, char **name) { + const char *p; + char *d; + int r; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "exe"); + r = get_process_link_contents(p, name); + if (r < 0) + return r; + + d = endswith(*name, " (deleted)"); + if (d) + *d = '\0'; + + return 0; +} + +static int get_process_id(pid_t pid, const char *field, uid_t *uid) { + _cleanup_fclose_ FILE *f = NULL; + const char *p; + int r; + + assert(field); + assert(uid); + + if (pid < 0) + return -EINVAL; + + p = procfs_file_alloca(pid, "status"); + f = fopen(p, "re"); + if (!f) { + if (errno == ENOENT) + return -ESRCH; + return -errno; + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + for (;;) { + _cleanup_free_ char *line = NULL; + char *l; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + l = strstrip(line); + + if (startswith(l, field)) { + l += strlen(field); + l += strspn(l, WHITESPACE); + + l[strcspn(l, WHITESPACE)] = 0; + + return parse_uid(l, uid); + } + } + + return -EIO; +} + +int get_process_uid(pid_t pid, uid_t *uid) { + + if (pid == 0 || pid == getpid_cached()) { + *uid = getuid(); + return 0; + } + + return get_process_id(pid, "Uid:", uid); +} + +int get_process_gid(pid_t pid, gid_t *gid) { + + if (pid == 0 || pid == getpid_cached()) { + *gid = getgid(); + return 0; + } + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + return get_process_id(pid, "Gid:", gid); +} + +int get_process_cwd(pid_t pid, char **cwd) { + const char *p; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "cwd"); + + return get_process_link_contents(p, cwd); +} + +int get_process_root(pid_t pid, char **root) { + const char *p; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "root"); + + return get_process_link_contents(p, root); +} + +#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U) + +int get_process_environ(pid_t pid, char **env) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *outcome = NULL; + size_t allocated = 0, sz = 0; + const char *p; + int r; + + assert(pid >= 0); + assert(env); + + p = procfs_file_alloca(pid, "environ"); + + f = fopen(p, "re"); + if (!f) { + if (errno == ENOENT) + return -ESRCH; + return -errno; + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + for (;;) { + char c; + + if (sz >= ENVIRONMENT_BLOCK_MAX) + return -ENOBUFS; + + if (!GREEDY_REALLOC(outcome, allocated, sz + 5)) + return -ENOMEM; + + r = safe_fgetc(f, &c); + if (r < 0) + return r; + if (r == 0) + break; + + if (c == '\0') + outcome[sz++] = '\n'; + else + sz += cescape_char(c, outcome + sz); + } + + outcome[sz] = '\0'; + *env = TAKE_PTR(outcome); + + return 0; +} + +int get_process_ppid(pid_t pid, pid_t *_ppid) { + int r; + _cleanup_free_ char *line = NULL; + long unsigned ppid; + const char *p; + + assert(pid >= 0); + assert(_ppid); + + if (pid == 0 || pid == getpid_cached()) { + *_ppid = getppid(); + return 0; + } + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* Let's skip the pid and comm fields. The latter is enclosed + * in () but does not escape any () in its value, so let's + * skip over it manually */ + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + if (sscanf(p, " " + "%*c " /* state */ + "%lu ", /* ppid */ + &ppid) != 1) + return -EIO; + + if ((long unsigned) (pid_t) ppid != ppid) + return -ERANGE; + + *_ppid = (pid_t) ppid; + + return 0; +} + +int wait_for_terminate(pid_t pid, siginfo_t *status) { + siginfo_t dummy; + + assert(pid >= 1); + + if (!status) + status = &dummy; + + for (;;) { + zero(*status); + + if (waitid(P_PID, pid, status, WEXITED) < 0) { + + if (errno == EINTR) + continue; + + return negative_errno(); + } + + return 0; + } +} + +/* + * Return values: + * < 0 : wait_for_terminate() failed to get the state of the + * process, the process was terminated by a signal, or + * failed for an unknown reason. + * >=0 : The process terminated normally, and its exit code is + * returned. + * + * That is, success is indicated by a return value of zero, and an + * error is indicated by a non-zero value. + * + * A warning is emitted if the process terminates abnormally, + * and also if it returns non-zero unless check_exit_code is true. + */ +int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) { + _cleanup_free_ char *buffer = NULL; + siginfo_t status; + int r, prio; + + assert(pid > 1); + + if (!name) { + r = get_process_comm(pid, &buffer); + if (r < 0) + log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid); + else + name = buffer; + } + + prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG; + + r = wait_for_terminate(pid, &status); + if (r < 0) + return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name)); + + if (status.si_code == CLD_EXITED) { + if (status.si_status != EXIT_SUCCESS) + log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG, + "%s failed with exit status %i.", strna(name), status.si_status); + else + log_debug("%s succeeded.", name); + + return status.si_status; + + } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) { + + log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status)); + return -EPROTO; + } + + log_full(prio, "%s failed due to unknown reason.", strna(name)); + return -EPROTO; +} + +/* + * Return values: + * + * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process + * was terminated by a signal, or failed for an unknown reason. + * + * >=0 : The process terminated normally with no failures. + * + * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure + * states are indicated by error is indicated by a non-zero value. + * + * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off + * to remain entirely race-free. + */ +int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) { + sigset_t mask; + int r; + usec_t until; + + assert_se(sigemptyset(&mask) == 0); + assert_se(sigaddset(&mask, SIGCHLD) == 0); + + /* Drop into a sigtimewait-based timeout. Waiting for the + * pid to exit. */ + until = now(CLOCK_MONOTONIC) + timeout; + for (;;) { + usec_t n; + siginfo_t status = {}; + struct timespec ts; + + n = now(CLOCK_MONOTONIC); + if (n >= until) + break; + + r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0; + /* Assuming we woke due to the child exiting. */ + if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) { + if (status.si_pid == pid) { + /* This is the correct child.*/ + if (status.si_code == CLD_EXITED) + return (status.si_status == 0) ? 0 : -EPROTO; + else + return -EPROTO; + } + } + /* Not the child, check for errors and proceed appropriately */ + if (r < 0) { + switch (r) { + case -EAGAIN: + /* Timed out, child is likely hung. */ + return -ETIMEDOUT; + case -EINTR: + /* Received a different signal and should retry */ + continue; + default: + /* Return any unexpected errors */ + return r; + } + } + } + + return -EPROTO; +} + +void sigkill_wait(pid_t pid) { + assert(pid > 1); + + if (kill(pid, SIGKILL) >= 0) + (void) wait_for_terminate(pid, NULL); +} + +void sigkill_waitp(pid_t *pid) { + PROTECT_ERRNO; + + if (!pid) + return; + if (*pid <= 1) + return; + + sigkill_wait(*pid); +} + +void sigterm_wait(pid_t pid) { + assert(pid > 1); + + if (kill_and_sigcont(pid, SIGTERM) >= 0) + (void) wait_for_terminate(pid, NULL); +} + +int kill_and_sigcont(pid_t pid, int sig) { + int r; + + r = kill(pid, sig) < 0 ? -errno : 0; + + /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't + * affected by a process being suspended anyway. */ + if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL)) + (void) kill(pid, SIGCONT); + + return r; +} + +int getenv_for_pid(pid_t pid, const char *field, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + char *value = NULL; + const char *path; + size_t l, sum = 0; + int r; + + assert(pid >= 0); + assert(field); + assert(ret); + + if (pid == 0 || pid == getpid_cached()) { + const char *e; + + e = getenv(field); + if (!e) { + *ret = NULL; + return 0; + } + + value = strdup(e); + if (!value) + return -ENOMEM; + + *ret = value; + return 1; + } + + if (!pid_is_valid(pid)) + return -EINVAL; + + path = procfs_file_alloca(pid, "environ"); + + f = fopen(path, "re"); + if (!f) { + if (errno == ENOENT) + return -ESRCH; + + return -errno; + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + l = strlen(field); + for (;;) { + _cleanup_free_ char *line = NULL; + + if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */ + return -ENOBUFS; + + r = read_nul_string(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) /* EOF */ + break; + + sum += r; + + if (strneq(line, field, l) && line[l] == '=') { + value = strdup(line + l + 1); + if (!value) + return -ENOMEM; + + *ret = value; + return 1; + } + } + + *ret = NULL; + return 0; +} + +bool pid_is_unwaited(pid_t pid) { + /* Checks whether a PID is still valid at all, including a zombie */ + + if (pid < 0) + return false; + + if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */ + return true; + + if (pid == getpid_cached()) + return true; + + if (kill(pid, 0) >= 0) + return true; + + return errno != ESRCH; +} + +bool pid_is_alive(pid_t pid) { + int r; + + /* Checks whether a PID is still valid and not a zombie */ + + if (pid < 0) + return false; + + if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */ + return true; + + if (pid == getpid_cached()) + return true; + + r = get_process_state(pid); + if (IN_SET(r, -ESRCH, 'Z')) + return false; + + return true; +} + +int pid_from_same_root_fs(pid_t pid) { + const char *root; + + if (pid < 0) + return false; + + if (pid == 0 || pid == getpid_cached()) + return true; + + root = procfs_file_alloca(pid, "root"); + + return files_same(root, "/proc/1/root", 0); +} + +bool is_main_thread(void) { + static thread_local int cached = 0; + + if (_unlikely_(cached == 0)) + cached = getpid_cached() == gettid() ? 1 : -1; + + return cached > 0; +} + +_noreturn_ void freeze(void) { + + log_close(); + + /* Make sure nobody waits for us on a socket anymore */ + close_all_fds(NULL, 0); + + sync(); + + /* Let's not freeze right away, but keep reaping zombies. */ + for (;;) { + int r; + siginfo_t si = {}; + + r = waitid(P_ALL, 0, &si, WEXITED); + if (r < 0 && errno != EINTR) + break; + } + + /* waitid() failed with an unexpected error, things are really borked. Freeze now! */ + for (;;) + pause(); +} + +bool oom_score_adjust_is_valid(int oa) { + return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX; +} + +unsigned long personality_from_string(const char *p) { + int architecture; + + if (!p) + return PERSONALITY_INVALID; + + /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just + * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for + * the same register size. */ + + architecture = architecture_from_string(p); + if (architecture < 0) + return PERSONALITY_INVALID; + + if (architecture == native_architecture()) + return PER_LINUX; +#ifdef SECONDARY_ARCHITECTURE + if (architecture == SECONDARY_ARCHITECTURE) + return PER_LINUX32; +#endif + + return PERSONALITY_INVALID; +} + +const char* personality_to_string(unsigned long p) { + int architecture = _ARCHITECTURE_INVALID; + + if (p == PER_LINUX) + architecture = native_architecture(); +#ifdef SECONDARY_ARCHITECTURE + else if (p == PER_LINUX32) + architecture = SECONDARY_ARCHITECTURE; +#endif + + if (architecture < 0) + return NULL; + + return architecture_to_string(architecture); +} + +int safe_personality(unsigned long p) { + int ret; + + /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno, + * and in others as negative return value containing an errno-like value. Let's work around this: this is a + * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and + * the return value indicating the same issue, so that we are definitely on the safe side. + * + * See https://github.com/systemd/systemd/issues/6737 */ + + errno = 0; + ret = personality(p); + if (ret < 0) { + if (errno != 0) + return -errno; + + errno = -ret; + } + + return ret; +} + +int opinionated_personality(unsigned long *ret) { + int current; + + /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit + * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the + * two most relevant personalities: PER_LINUX and PER_LINUX32. */ + + current = safe_personality(PERSONALITY_INVALID); + if (current < 0) + return current; + + if (((unsigned long) current & 0xffff) == PER_LINUX32) + *ret = PER_LINUX32; + else + *ret = PER_LINUX; + + return 0; +} + +void valgrind_summary_hack(void) { +#if HAVE_VALGRIND_VALGRIND_H + if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) { + pid_t pid; + pid = raw_clone(SIGCHLD); + if (pid < 0) + log_emergency_errno(errno, "Failed to fork off valgrind helper: %m"); + else if (pid == 0) + exit(EXIT_SUCCESS); + else { + log_info("Spawned valgrind helper as PID "PID_FMT".", pid); + (void) wait_for_terminate(pid, NULL); + } + } +#endif +} + +int pid_compare_func(const pid_t *a, const pid_t *b) { + /* Suitable for usage in qsort() */ + return CMP(*a, *b); +} + +int ioprio_parse_priority(const char *s, int *ret) { + int i, r; + + assert(s); + assert(ret); + + r = safe_atoi(s, &i); + if (r < 0) + return r; + + if (!ioprio_priority_is_valid(i)) + return -EINVAL; + + *ret = i; + return 0; +} + +/* The cached PID, possible values: + * + * == UNSET [0] → cache not initialized yet + * == BUSY [-1] → some thread is initializing it at the moment + * any other → the cached PID + */ + +#define CACHED_PID_UNSET ((pid_t) 0) +#define CACHED_PID_BUSY ((pid_t) -1) + +static pid_t cached_pid = CACHED_PID_UNSET; + +void reset_cached_pid(void) { + /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */ + cached_pid = CACHED_PID_UNSET; +} + +/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc + * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against + * libpthread, as it is part of glibc anyway. */ +extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle); +extern void* __dso_handle _weak_; + +pid_t getpid_cached(void) { + static bool installed = false; + pid_t current_value; + + /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a + * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally + * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when + * objects were used across fork()s. With this caching the old behaviour is somewhat restored. + * + * https://bugzilla.redhat.com/show_bug.cgi?id=1443976 + * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e + */ + + current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY); + + switch (current_value) { + + case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */ + pid_t new_pid; + + new_pid = raw_getpid(); + + if (!installed) { + /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's + * only half-documented (glibc doesn't document it but LSB does — though only superficially) + * we'll check for errors only in the most generic fashion possible. */ + + if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) { + /* OOM? Let's try again later */ + cached_pid = CACHED_PID_UNSET; + return new_pid; + } + + installed = true; + } + + cached_pid = new_pid; + return new_pid; + } + + case CACHED_PID_BUSY: /* Somebody else is currently initializing */ + return raw_getpid(); + + default: /* Properly initialized */ + return current_value; + } +} + +int must_be_root(void) { + + if (geteuid() == 0) + return 0; + + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root."); +} + +int safe_fork_full( + const char *name, + const int except_fds[], + size_t n_except_fds, + ForkFlags flags, + pid_t *ret_pid) { + + pid_t original_pid, pid; + sigset_t saved_ss, ss; + bool block_signals = false; + int prio, r; + + /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always + * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */ + + prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG; + + original_pid = getpid_cached(); + + if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) { + /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can + * be sure that SIGTERMs are not lost we might send to the child. */ + + assert_se(sigfillset(&ss) >= 0); + block_signals = true; + + } else if (flags & FORK_WAIT) { + /* Let's block SIGCHLD at least, so that we can safely watch for the child process */ + + assert_se(sigemptyset(&ss) >= 0); + assert_se(sigaddset(&ss, SIGCHLD) >= 0); + block_signals = true; + } + + if (block_signals) + if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0) + return log_full_errno(prio, errno, "Failed to set signal mask: %m"); + + if (flags & FORK_NEW_MOUNTNS) + pid = raw_clone(SIGCHLD|CLONE_NEWNS); + else + pid = fork(); + if (pid < 0) { + r = -errno; + + if (block_signals) /* undo what we did above */ + (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL); + + return log_full_errno(prio, r, "Failed to fork: %m"); + } + if (pid > 0) { + /* We are in the parent process */ + + log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid); + + if (flags & FORK_WAIT) { + r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0)); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */ + return -EPROTO; + } + + if (block_signals) /* undo what we did above */ + (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL); + + if (ret_pid) + *ret_pid = pid; + + return 1; + } + + /* We are in the child process */ + + if (flags & FORK_REOPEN_LOG) { + /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */ + log_close(); + log_set_open_when_needed(true); + } + + if (name) { + r = rename_process(name); + if (r < 0) + log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG, + r, "Failed to rename process, ignoring: %m"); + } + + if (flags & FORK_DEATHSIG) + if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) { + log_full_errno(prio, errno, "Failed to set death signal: %m"); + _exit(EXIT_FAILURE); + } + + if (flags & FORK_RESET_SIGNALS) { + r = reset_all_signal_handlers(); + if (r < 0) { + log_full_errno(prio, r, "Failed to reset signal handlers: %m"); + _exit(EXIT_FAILURE); + } + + /* This implicitly undoes the signal mask stuff we did before the fork()ing above */ + r = reset_signal_mask(); + if (r < 0) { + log_full_errno(prio, r, "Failed to reset signal mask: %m"); + _exit(EXIT_FAILURE); + } + } else if (block_signals) { /* undo what we did above */ + if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) { + log_full_errno(prio, errno, "Failed to restore signal mask: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_DEATHSIG) { + pid_t ppid; + /* Let's see if the parent PID is still the one we started from? If not, then the parent + * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */ + + ppid = getppid(); + if (ppid == 0) + /* Parent is in a differn't PID namespace. */; + else if (ppid != original_pid) { + log_debug("Parent died early, raising SIGTERM."); + (void) raise(SIGTERM); + _exit(EXIT_FAILURE); + } + } + + if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) { + + /* Optionally, make sure we never propagate mounts to the host. */ + + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) { + log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_CLOSE_ALL_FDS) { + /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */ + log_close(); + + r = close_all_fds(except_fds, n_except_fds); + if (r < 0) { + log_full_errno(prio, r, "Failed to close all file descriptors: %m"); + _exit(EXIT_FAILURE); + } + } + + /* When we were asked to reopen the logs, do so again now */ + if (flags & FORK_REOPEN_LOG) { + log_open(); + log_set_open_when_needed(false); + } + + if (flags & FORK_NULL_STDIO) { + r = make_null_stdio(); + if (r < 0) { + log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_RLIMIT_NOFILE_SAFE) { + r = rlimit_nofile_safe(); + if (r < 0) { + log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m"); + _exit(EXIT_FAILURE); + } + } + + if (ret_pid) + *ret_pid = getpid_cached(); + + return 0; +} + +int namespace_fork( + const char *outer_name, + const char *inner_name, + const int except_fds[], + size_t n_except_fds, + ForkFlags flags, + int pidns_fd, + int mntns_fd, + int netns_fd, + int userns_fd, + int root_fd, + pid_t *ret_pid) { + + int r; + + /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle + * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that + * /proc/self/fd works correctly. */ + + r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid); + if (r < 0) + return r; + if (r == 0) { + pid_t pid; + + /* Child */ + + r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd); + if (r < 0) { + log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m"); + _exit(EXIT_FAILURE); + } + + /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */ + r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid); + if (r < 0) + _exit(EXIT_FAILURE); + if (r == 0) { + /* Child */ + if (ret_pid) + *ret_pid = pid; + return 0; + } + + r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0); + if (r < 0) + _exit(EXIT_FAILURE); + + _exit(r); + } + + return 1; +} + +int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) { + bool stdout_is_tty, stderr_is_tty; + size_t n, i; + va_list ap; + char **l; + int r; + + assert(path); + + /* Spawns a temporary TTY agent, making sure it goes away when we go away */ + + r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid); + if (r < 0) + return r; + if (r > 0) + return 0; + + /* In the child: */ + + stdout_is_tty = isatty(STDOUT_FILENO); + stderr_is_tty = isatty(STDERR_FILENO); + + if (!stdout_is_tty || !stderr_is_tty) { + int fd; + + /* Detach from stdout/stderr. and reopen + * /dev/tty for them. This is important to + * ensure that when systemctl is started via + * popen() or a similar call that expects to + * read EOF we actually do generate EOF and + * not delay this indefinitely by because we + * keep an unused copy of stdin around. */ + fd = open("/dev/tty", O_WRONLY); + if (fd < 0) { + log_error_errno(errno, "Failed to open /dev/tty: %m"); + _exit(EXIT_FAILURE); + } + + if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) { + log_error_errno(errno, "Failed to dup2 /dev/tty: %m"); + _exit(EXIT_FAILURE); + } + + if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) { + log_error_errno(errno, "Failed to dup2 /dev/tty: %m"); + _exit(EXIT_FAILURE); + } + + safe_close_above_stdio(fd); + } + + (void) rlimit_nofile_safe(); + + /* Count arguments */ + va_start(ap, path); + for (n = 0; va_arg(ap, char*); n++) + ; + va_end(ap); + + /* Allocate strv */ + l = newa(char*, n + 1); + + /* Fill in arguments */ + va_start(ap, path); + for (i = 0; i <= n; i++) + l[i] = va_arg(ap, char*); + va_end(ap); + + execv(path, l); + _exit(EXIT_FAILURE); +} + +int set_oom_score_adjust(int value) { + char t[DECIMAL_STR_MAX(int)]; + + sprintf(t, "%i", value); + + return write_string_file("/proc/self/oom_score_adj", t, + WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER); +} + +static const char *const ioprio_class_table[] = { + [IOPRIO_CLASS_NONE] = "none", + [IOPRIO_CLASS_RT] = "realtime", + [IOPRIO_CLASS_BE] = "best-effort", + [IOPRIO_CLASS_IDLE] = "idle" +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES); + +static const char *const sigchld_code_table[] = { + [CLD_EXITED] = "exited", + [CLD_KILLED] = "killed", + [CLD_DUMPED] = "dumped", + [CLD_TRAPPED] = "trapped", + [CLD_STOPPED] = "stopped", + [CLD_CONTINUED] = "continued", +}; + +DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int); + +static const char* const sched_policy_table[] = { + [SCHED_OTHER] = "other", + [SCHED_BATCH] = "batch", + [SCHED_IDLE] = "idle", + [SCHED_FIFO] = "fifo", + [SCHED_RR] = "rr" +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX); diff --git a/src/basic/process-util.h b/src/basic/process-util.h new file mode 100644 index 0000000..c85ea30 --- /dev/null +++ b/src/basic/process-util.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <alloca.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/types.h> + +#include "format-util.h" +#include "ioprio.h" +#include "macro.h" +#include "time-util.h" + +#define procfs_file_alloca(pid, field) \ + ({ \ + pid_t _pid_ = (pid); \ + const char *_r_; \ + if (_pid_ == 0) { \ + _r_ = ("/proc/self/" field); \ + } else { \ + _r_ = newa(char, STRLEN("/proc/") + DECIMAL_STR_MAX(pid_t) + 1 + sizeof(field)); \ + sprintf((char*) _r_, "/proc/"PID_FMT"/" field, _pid_); \ + } \ + _r_; \ + }) + +int get_process_state(pid_t pid); +int get_process_comm(pid_t pid, char **name); +int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line); +int get_process_exe(pid_t pid, char **name); +int get_process_uid(pid_t pid, uid_t *uid); +int get_process_gid(pid_t pid, gid_t *gid); +int get_process_capeff(pid_t pid, char **capeff); +int get_process_cwd(pid_t pid, char **cwd); +int get_process_root(pid_t pid, char **root); +int get_process_environ(pid_t pid, char **environ); +int get_process_ppid(pid_t pid, pid_t *ppid); + +int wait_for_terminate(pid_t pid, siginfo_t *status); + +typedef enum WaitFlags { + WAIT_LOG_ABNORMAL = 1 << 0, + WAIT_LOG_NON_ZERO_EXIT_STATUS = 1 << 1, + + /* A shortcut for requesting the most complete logging */ + WAIT_LOG = WAIT_LOG_ABNORMAL|WAIT_LOG_NON_ZERO_EXIT_STATUS, +} WaitFlags; + +int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags); +int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout); + +void sigkill_wait(pid_t pid); +void sigkill_waitp(pid_t *pid); +void sigterm_wait(pid_t pid); + +int kill_and_sigcont(pid_t pid, int sig); + +int rename_process(const char name[]); +int is_kernel_thread(pid_t pid); + +int getenv_for_pid(pid_t pid, const char *field, char **_value); + +bool pid_is_alive(pid_t pid); +bool pid_is_unwaited(pid_t pid); +int pid_from_same_root_fs(pid_t pid); + +bool is_main_thread(void); + +_noreturn_ void freeze(void); + +bool oom_score_adjust_is_valid(int oa); + +#ifndef PERSONALITY_INVALID +/* personality(7) documents that 0xffffffffUL is used for querying the + * current personality, hence let's use that here as error + * indicator. */ +#define PERSONALITY_INVALID 0xffffffffLU +#endif + +unsigned long personality_from_string(const char *p); +const char *personality_to_string(unsigned long); + +int safe_personality(unsigned long p); +int opinionated_personality(unsigned long *ret); + +int ioprio_class_to_string_alloc(int i, char **s); +int ioprio_class_from_string(const char *s); + +const char *sigchld_code_to_string(int i) _const_; +int sigchld_code_from_string(const char *s) _pure_; + +int sched_policy_to_string_alloc(int i, char **s); +int sched_policy_from_string(const char *s); + +static inline pid_t PTR_TO_PID(const void *p) { + return (pid_t) ((uintptr_t) p); +} + +static inline void* PID_TO_PTR(pid_t pid) { + return (void*) ((uintptr_t) pid); +} + +void valgrind_summary_hack(void); + +int pid_compare_func(const pid_t *a, const pid_t *b); + +static inline bool nice_is_valid(int n) { + return n >= PRIO_MIN && n < PRIO_MAX; +} + +static inline bool sched_policy_is_valid(int i) { + return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR); +} + +static inline bool sched_priority_is_valid(int i) { + return i >= 0 && i <= sched_get_priority_max(SCHED_RR); +} + +static inline bool ioprio_class_is_valid(int i) { + return IN_SET(i, IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE); +} + +static inline bool ioprio_priority_is_valid(int i) { + return i >= 0 && i < IOPRIO_BE_NR; +} + +static inline bool pid_is_valid(pid_t p) { + return p > 0; +} + +int ioprio_parse_priority(const char *s, int *ret); + +pid_t getpid_cached(void); +void reset_cached_pid(void); + +int must_be_root(void); + +typedef enum ForkFlags { + FORK_RESET_SIGNALS = 1 << 0, /* Reset all signal handlers and signal mask */ + FORK_CLOSE_ALL_FDS = 1 << 1, /* Close all open file descriptors in the child, except for 0,1,2 */ + FORK_DEATHSIG = 1 << 2, /* Set PR_DEATHSIG in the child */ + FORK_NULL_STDIO = 1 << 3, /* Connect 0,1,2 to /dev/null */ + FORK_REOPEN_LOG = 1 << 4, /* Reopen log connection */ + FORK_LOG = 1 << 5, /* Log above LOG_DEBUG log level about failures */ + FORK_WAIT = 1 << 6, /* Wait until child exited */ + FORK_NEW_MOUNTNS = 1 << 7, /* Run child in its own mount namespace */ + FORK_MOUNTNS_SLAVE = 1 << 8, /* Make child's mount namespace MS_SLAVE */ + FORK_RLIMIT_NOFILE_SAFE = 1 << 9, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */ +} ForkFlags; + +int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid); + +static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) { + return safe_fork_full(name, NULL, 0, flags, ret_pid); +} + +int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid); + +int fork_agent(const char *name, const int except[], size_t n_except, pid_t *pid, const char *path, ...) _sentinel_; + +int set_oom_score_adjust(int value); + +#if SIZEOF_PID_T == 4 +/* The highest possibly (theoretic) pid_t value on this architecture. */ +#define PID_T_MAX ((pid_t) INT32_MAX) +/* The maximum number of concurrent processes Linux allows on this architecture, as well as the highest valid PID value + * the kernel will potentially assign. This reflects a value compiled into the kernel (PID_MAX_LIMIT), and sets the + * upper boundary on what may be written to the /proc/sys/kernel/pid_max sysctl (but do note that the sysctl is off by + * 1, since PID 0 can never exist and there can hence only be one process less than the limit would suggest). Since + * these values are documented in proc(5) we feel quite confident that they are stable enough for the near future at + * least to define them here too. */ +#define TASKS_MAX 4194303U +#elif SIZEOF_PID_T == 2 +#define PID_T_MAX ((pid_t) INT16_MAX) +#define TASKS_MAX 32767U +#else +#error "Unknown pid_t size" +#endif + +assert_cc(TASKS_MAX <= (unsigned long) PID_T_MAX) + +/* Like TAKE_PTR() but for child PIDs, resetting them to 0 */ +#define TAKE_PID(pid) \ + ({ \ + pid_t _pid_ = (pid); \ + (pid) = 0; \ + _pid_; \ + }) diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c new file mode 100644 index 0000000..7aaf95b --- /dev/null +++ b/src/basic/procfs-util.c @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> + +#include "alloc-util.h" +#include "def.h" +#include "fd-util.h" +#include "fileio.h" +#include "parse-util.h" +#include "process-util.h" +#include "procfs-util.h" +#include "stdio-util.h" +#include "string-util.h" + +int procfs_tasks_get_limit(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + uint64_t pid_max, threads_max; + int r; + + assert(ret); + + /* So there are two sysctl files that control the system limit of processes: + * + * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on + * concurrent tasks. + * + * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number + * of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed + * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by + * default. + * + * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as + * it's the lowest boundary they need to bump when they want higher number of processes. + * + * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of + * tasks that can be created is one lower, as PID 0 is not a valid process ID. */ + + r = read_one_line_file("/proc/sys/kernel/pid_max", &value); + if (r < 0) + return r; + + r = safe_atou64(value, &pid_max); + if (r < 0) + return r; + + value = mfree(value); + r = read_one_line_file("/proc/sys/kernel/threads-max", &value); + if (r < 0) + return r; + + r = safe_atou64(value, &threads_max); + if (r < 0) + return r; + + /* Subtract one from pid_max, since PID 0 is not a valid PID */ + *ret = MIN(pid_max-1, threads_max); + return 0; +} + +int procfs_tasks_set_limit(uint64_t limit) { + char buffer[DECIMAL_STR_MAX(uint64_t)+1]; + _cleanup_free_ char *value = NULL; + uint64_t pid_max; + int r; + + if (limit == 0) /* This makes no sense, we are userspace and hence count as tasks too, and we want to live, + * hence the limit conceptually has to be above 0. Also, most likely if anyone asks for a zero + * limit he/she probably means "no limit", hence let's better refuse this to avoid + * confusion. */ + return -EINVAL; + + /* The Linux kernel doesn't allow this value to go below 20, hence don't allow this either, higher values than + * TASKS_MAX are not accepted by the pid_max sysctl. We'll treat anything this high as "unbounded" and hence + * set it to the maximum. */ + limit = CLAMP(limit, 20U, TASKS_MAX); + + r = read_one_line_file("/proc/sys/kernel/pid_max", &value); + if (r < 0) + return r; + r = safe_atou64(value, &pid_max); + if (r < 0) + return r; + + /* As pid_max is about the numeric pid_t range we'll bump it if necessary, but only ever increase it, never + * decrease it, as threads-max is the much more relevant sysctl. */ + if (limit > pid_max-1) { + sprintf(buffer, "%" PRIu64, limit+1); /* Add one, since PID 0 is not a valid PID */ + r = write_string_file("/proc/sys/kernel/pid_max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + } + + sprintf(buffer, "%" PRIu64, limit); + r = write_string_file("/proc/sys/kernel/threads-max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + uint64_t threads_max; + + /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not + * generate an error */ + + value = mfree(value); + if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0) + return r; /* return original error */ + + if (safe_atou64(value, &threads_max) < 0) + return r; /* return original error */ + + if (MIN(pid_max-1, threads_max) != limit) + return r; /* return original error */ + + /* Yay! Value set already matches what we were trying to set, hence consider this a success. */ + } + + return 0; +} + +int procfs_tasks_get_current(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + const char *p, *nr; + size_t n; + int r; + + assert(ret); + + r = read_one_line_file("/proc/loadavg", &value); + if (r < 0) + return r; + + /* Look for the second part of the fourth field, which is separated by a slash from the first part. None of the + * earlier fields use a slash, hence let's use this to find the right spot. */ + p = strchr(value, '/'); + if (!p) + return -EINVAL; + + p++; + n = strspn(p, DIGITS); + nr = strndupa(p, n); + + return safe_atou64(nr, ret); +} + +static uint64_t calc_gcd64(uint64_t a, uint64_t b) { + + while (b > 0) { + uint64_t t; + + t = a % b; + + a = b; + b = t; + } + + return a; +} + +int procfs_cpu_get_usage(nsec_t *ret) { + _cleanup_free_ char *first_line = NULL; + unsigned long user_ticks, nice_ticks, system_ticks, irq_ticks, softirq_ticks, + guest_ticks = 0, guest_nice_ticks = 0; + long ticks_per_second; + uint64_t sum, gcd, a, b; + const char *p; + int r; + + assert(ret); + + r = read_one_line_file("/proc/stat", &first_line); + if (r < 0) + return r; + + p = first_word(first_line, "cpu"); + if (!p) + return -EINVAL; + + if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu", + &user_ticks, + &nice_ticks, + &system_ticks, + &irq_ticks, + &softirq_ticks, + &guest_ticks, + &guest_nice_ticks) < 5) /* we only insist on the first five fields */ + return -EINVAL; + + ticks_per_second = sysconf(_SC_CLK_TCK); + if (ticks_per_second < 0) + return -errno; + assert(ticks_per_second > 0); + + sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks + + (uint64_t) irq_ticks + (uint64_t) softirq_ticks + + (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks; + + /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */ + gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second); + + a = (uint64_t) NSEC_PER_SEC / gcd; + b = (uint64_t) ticks_per_second / gcd; + + *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b); + return 0; +} + +int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) { + uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX; + _cleanup_fclose_ FILE *f = NULL; + int r; + + f = fopen("/proc/meminfo", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + uint64_t *v; + char *p, *e; + size_t n; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; /* EOF: Couldn't find one or both fields? */ + + p = first_word(line, "MemTotal:"); + if (p) + v = &mem_total; + else { + p = first_word(line, "MemFree:"); + if (p) + v = &mem_free; + else + continue; + } + + /* Determine length of numeric value */ + n = strspn(p, DIGITS); + if (n == 0) + return -EINVAL; + e = p + n; + + /* Ensure the line ends in " kB" */ + n = strspn(e, WHITESPACE); + if (n == 0) + return -EINVAL; + if (!streq(e + n, "kB")) + return -EINVAL; + + *e = 0; + r = safe_atou64(p, v); + if (r < 0) + return r; + if (*v == UINT64_MAX) + return -EINVAL; + + if (mem_total != UINT64_MAX && mem_free != UINT64_MAX) + break; + } + + if (mem_free > mem_total) + return -EINVAL; + + if (ret_total) + *ret_total = mem_total * 1024U; + if (ret_used) + *ret_used = (mem_total - mem_free) * 1024U; + return 0; +} diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h new file mode 100644 index 0000000..5a44e9e --- /dev/null +++ b/src/basic/procfs-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> + +#include "time-util.h" + +int procfs_tasks_get_limit(uint64_t *ret); +int procfs_tasks_set_limit(uint64_t limit); +int procfs_tasks_get_current(uint64_t *ret); + +int procfs_cpu_get_usage(nsec_t *ret); + +int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used); +static inline int procfs_memory_get_used(uint64_t *ret) { + return procfs_memory_get(NULL, ret); +} diff --git a/src/basic/random-util.c b/src/basic/random-util.c new file mode 100644 index 0000000..f7decf6 --- /dev/null +++ b/src/basic/random-util.c @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#if defined(__i386__) || defined(__x86_64__) +#include <cpuid.h> +#endif + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> + +#if HAVE_SYS_AUXV_H +# include <sys/auxv.h> +#endif + +#if USE_SYS_RANDOM_H +# include <sys/random.h> +#else +# include <linux/random.h> +#endif + +#include "fd-util.h" +#include "io-util.h" +#include "missing.h" +#include "random-util.h" +#include "time-util.h" + +#if HAS_FEATURE_MEMORY_SANITIZER +#include <sanitizer/msan_interface.h> +#endif + +int rdrand(unsigned long *ret) { + +#if defined(__i386__) || defined(__x86_64__) + static int have_rdrand = -1; + unsigned char err; + + if (have_rdrand < 0) { + uint32_t eax, ebx, ecx, edx; + + /* Check if RDRAND is supported by the CPU */ + if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) { + have_rdrand = false; + return -EOPNOTSUPP; + } + + have_rdrand = !!(ecx & (1U << 30)); + } + + if (have_rdrand == 0) + return -EOPNOTSUPP; + + asm volatile("rdrand %0;" + "setc %1" + : "=r" (*ret), + "=qm" (err)); + +#if HAS_FEATURE_MEMORY_SANITIZER + __msan_unpoison(&err, sizeof(err)); +#endif + + if (!err) + return -EAGAIN; + + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +int genuine_random_bytes(void *p, size_t n, RandomFlags flags) { + static int have_syscall = -1; + _cleanup_close_ int fd = -1; + bool got_some = false; + int r; + + /* Gathers some randomness from the kernel (or the CPU if the RANDOM_ALLOW_RDRAND flag is set). This call won't + * block, unless the RANDOM_BLOCK flag is set. If RANDOM_DONT_DRAIN is set, an error is returned if the random + * pool is not initialized. Otherwise it will always return some data from the kernel, regardless of whether + * the random pool is fully initialized or not. */ + + if (n == 0) + return 0; + + if (FLAGS_SET(flags, RANDOM_ALLOW_RDRAND)) + /* Try x86-64' RDRAND intrinsic if we have it. We only use it if high quality randomness is not + * required, as we don't trust it (who does?). Note that we only do a single iteration of RDRAND here, + * even though the Intel docs suggest calling this in a tight loop of 10 invocations or so. That's + * because we don't really care about the quality here. We generally prefer using RDRAND if the caller + * allows us too, since this way we won't drain the kernel randomness pool if we don't need it, as the + * pool's entropy is scarce. */ + for (;;) { + unsigned long u; + size_t m; + + if (rdrand(&u) < 0) { + if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) { + /* Fill in the remaining bytes using pseudo-random values */ + pseudo_random_bytes(p, n); + return 0; + } + + /* OK, this didn't work, let's go to getrandom() + /dev/urandom instead */ + break; + } + + m = MIN(sizeof(u), n); + memcpy(p, &u, m); + + p = (uint8_t*) p + m; + n -= m; + + if (n == 0) + return 0; /* Yay, success! */ + + got_some = true; + } + + /* Use the getrandom() syscall unless we know we don't have it. */ + if (have_syscall != 0 && !HAS_FEATURE_MEMORY_SANITIZER) { + + for (;;) { + r = getrandom(p, n, FLAGS_SET(flags, RANDOM_BLOCK) ? 0 : GRND_NONBLOCK); + if (r > 0) { + have_syscall = true; + + if ((size_t) r == n) + return 0; /* Yay, success! */ + + assert((size_t) r < n); + p = (uint8_t*) p + r; + n -= r; + + if (FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) { + /* Fill in the remaining bytes using pseudo-random values */ + pseudo_random_bytes(p, n); + return 0; + } + + got_some = true; + + /* Hmm, we didn't get enough good data but the caller insists on good data? Then try again */ + if (FLAGS_SET(flags, RANDOM_BLOCK)) + continue; + + /* Fill in the rest with /dev/urandom */ + break; + + } else if (r == 0) { + have_syscall = true; + return -EIO; + + } else if (errno == ENOSYS) { + /* We lack the syscall, continue with reading from /dev/urandom. */ + have_syscall = false; + break; + + } else if (errno == EAGAIN) { + /* The kernel has no entropy whatsoever. Let's remember to use the syscall the next + * time again though. + * + * If RANDOM_DONT_DRAIN is set, return an error so that random_bytes() can produce some + * pseudo-random bytes instead. Otherwise, fall back to /dev/urandom, which we know is empty, + * but the kernel will produce some bytes for us on a best-effort basis. */ + have_syscall = true; + + if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) { + /* Fill in the remaining bytes using pseudorandom values */ + pseudo_random_bytes(p, n); + return 0; + } + + if (FLAGS_SET(flags, RANDOM_DONT_DRAIN)) + return -ENODATA; + + /* Use /dev/urandom instead */ + break; + } else + return -errno; + } + } + + fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return errno == ENOENT ? -ENOSYS : -errno; + + return loop_read_exact(fd, p, n, true); +} + +void initialize_srand(void) { + static bool srand_called = false; + unsigned x; +#if HAVE_SYS_AUXV_H + const void *auxv; +#endif + unsigned long k; + + if (srand_called) + return; + +#if HAVE_SYS_AUXV_H + /* The kernel provides us with 16 bytes of entropy in auxv, so let's + * try to make use of that to seed the pseudo-random generator. It's + * better than nothing... */ + + auxv = (const void*) getauxval(AT_RANDOM); + if (auxv) { + assert_cc(sizeof(x) <= 16); + memcpy(&x, auxv, sizeof(x)); + } else +#endif + x = 0; + + x ^= (unsigned) now(CLOCK_REALTIME); + x ^= (unsigned) gettid(); + + if (rdrand(&k) >= 0) + x ^= (unsigned) k; + + srand(x); + srand_called = true; +} + +/* INT_MAX gives us only 31 bits, so use 24 out of that. */ +#if RAND_MAX >= INT_MAX +# define RAND_STEP 3 +#else +/* SHORT_INT_MAX or lower gives at most 15 bits, we just just 8 out of that. */ +# define RAND_STEP 1 +#endif + +void pseudo_random_bytes(void *p, size_t n) { + uint8_t *q; + + initialize_srand(); + + for (q = p; q < (uint8_t*) p + n; q += RAND_STEP) { + unsigned rr; + + rr = (unsigned) rand(); + +#if RAND_STEP >= 3 + if ((size_t) (q - (uint8_t*) p + 2) < n) + q[2] = rr >> 16; +#endif +#if RAND_STEP >= 2 + if ((size_t) (q - (uint8_t*) p + 1) < n) + q[1] = rr >> 8; +#endif + q[0] = rr; + } +} + +void random_bytes(void *p, size_t n) { + + if (genuine_random_bytes(p, n, RANDOM_EXTEND_WITH_PSEUDO|RANDOM_DONT_DRAIN|RANDOM_ALLOW_RDRAND) >= 0) + return; + + /* If for some reason some user made /dev/urandom unavailable to us, or the kernel has no entropy, use a PRNG instead. */ + pseudo_random_bytes(p, n); +} diff --git a/src/basic/random-util.h b/src/basic/random-util.h new file mode 100644 index 0000000..3e8c288 --- /dev/null +++ b/src/basic/random-util.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +typedef enum RandomFlags { + RANDOM_EXTEND_WITH_PSEUDO = 1 << 0, /* If we can't get enough genuine randomness, but some, fill up the rest with pseudo-randomness */ + RANDOM_BLOCK = 1 << 1, /* Rather block than return crap randomness (only if the kernel supports that) */ + RANDOM_DONT_DRAIN = 1 << 2, /* If we can't get any randomness at all, return early with -EAGAIN */ + RANDOM_ALLOW_RDRAND = 1 << 3, /* Allow usage of the CPU RNG */ +} RandomFlags; + +int genuine_random_bytes(void *p, size_t n, RandomFlags flags); /* returns "genuine" randomness, optionally filled upwith pseudo random, if not enough is available */ +void pseudo_random_bytes(void *p, size_t n); /* returns only pseudo-randommess (but possibly seeded from something better) */ +void random_bytes(void *p, size_t n); /* returns genuine randomness if cheaply available, and pseudo randomness if not. */ + +void initialize_srand(void); + +static inline uint64_t random_u64(void) { + uint64_t u; + random_bytes(&u, sizeof(u)); + return u; +} + +static inline uint32_t random_u32(void) { + uint32_t u; + random_bytes(&u, sizeof(u)); + return u; +} + +int rdrand(unsigned long *ret); diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c new file mode 100644 index 0000000..4e04e04 --- /dev/null +++ b/src/basic/ratelimit.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/time.h> + +#include "macro.h" +#include "ratelimit.h" + +/* Modelled after Linux' lib/ratelimit.c by Dave Young + * <hidave.darkstar@gmail.com>, which is licensed GPLv2. */ + +bool ratelimit_below(RateLimit *r) { + usec_t ts; + + assert(r); + + if (r->interval <= 0 || r->burst <= 0) + return true; + + ts = now(CLOCK_MONOTONIC); + + if (r->begin <= 0 || + r->begin + r->interval < ts) { + r->begin = ts; + + /* Reset counter */ + r->num = 0; + goto good; + } + + if (r->num < r->burst) + goto good; + + return false; + +good: + r->num++; + return true; +} diff --git a/src/basic/ratelimit.h b/src/basic/ratelimit.h new file mode 100644 index 0000000..de91def --- /dev/null +++ b/src/basic/ratelimit.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" +#include "util.h" + +typedef struct RateLimit { + usec_t interval; + usec_t begin; + unsigned burst; + unsigned num; +} RateLimit; + +#define RATELIMIT_DEFINE(_name, _interval, _burst) \ + RateLimit _name = { \ + .interval = (_interval), \ + .burst = (_burst), \ + .num = 0, \ + .begin = 0 \ + } + +#define RATELIMIT_INIT(v, _interval, _burst) \ + do { \ + RateLimit *_r = &(v); \ + _r->interval = (_interval); \ + _r->burst = (_burst); \ + _r->num = 0; \ + _r->begin = 0; \ + } while (false) + +#define RATELIMIT_RESET(v) \ + do { \ + RateLimit *_r = &(v); \ + _r->num = 0; \ + _r->begin = 0; \ + } while (false) + +bool ratelimit_below(RateLimit *r); diff --git a/src/basic/raw-clone.h b/src/basic/raw-clone.h new file mode 100644 index 0000000..b8857b0 --- /dev/null +++ b/src/basic/raw-clone.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/*** + Copyright © 2016 Michael Karcher +***/ + +#include <errno.h> +#include <sched.h> +#include <sys/syscall.h> + +#include "log.h" +#include "macro.h" + +/** + * raw_clone() - uses clone to create a new process with clone flags + * @flags: Flags to pass to the clone system call + * + * Uses the clone system call to create a new process with the cloning flags and termination signal passed in the flags + * parameter. Opposed to glibc's clone funtion, using this function does not set up a separate stack for the child, but + * relies on copy-on-write semantics on the one stack at a common virtual address, just as fork does. + * + * To obtain copy-on-write semantics, flags must not contain CLONE_VM, and thus CLONE_THREAD and CLONE_SIGHAND (which + * require CLONE_VM) are not usable. + * + * Additionally, as this function does not pass the ptid, newtls and ctid parameters to the kernel, flags must not + * contain CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID or CLONE_SETTLS. + * + * Returns: 0 in the child process and the child process id in the parent. + */ +static inline pid_t raw_clone(unsigned long flags) { + pid_t ret; + + assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID| + CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0); +#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) + /* On s390/s390x and cris the order of the first and second arguments + * of the raw clone() system call is reversed. */ + ret = (pid_t) syscall(__NR_clone, NULL, flags); +#elif defined(__sparc__) + { + /** + * sparc always returns the other process id in %o0, and + * a boolean flag whether this is the child or the parent in + * %o1. Inline assembly is needed to get the flag returned + * in %o1. + */ + int in_child, child_pid, error; + + asm volatile("mov %3, %%g1\n\t" + "mov %4, %%o0\n\t" + "mov 0 , %%o1\n\t" +#if defined(__arch64__) + "t 0x6d\n\t" +#else + "t 0x10\n\t" +#endif + "addx %%g0, 0, %2\n\t" + "mov %%o1, %0\n\t" + "mov %%o0, %1" : + "=r"(in_child), "=r"(child_pid), "=r"(error) : + "i"(__NR_clone), "r"(flags) : + "%o1", "%o0", "%g1", "cc" ); + + if (error) { + errno = child_pid; + ret = -1; + } else + ret = in_child ? 0 : child_pid; + } +#else + ret = (pid_t) syscall(__NR_clone, flags, NULL); +#endif + + if (ret == 0) + reset_cached_pid(); + + return ret; +} diff --git a/src/basic/raw-reboot.h b/src/basic/raw-reboot.h new file mode 100644 index 0000000..8ecefe9 --- /dev/null +++ b/src/basic/raw-reboot.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/reboot.h> +#include <sys/reboot.h> +#include <sys/syscall.h> + +/* glibc defines the reboot() API call, which is a wrapper around the system call of the same name, but without the + * extra "arg" parameter. Since we need that parameter for some calls, let's add a "raw" wrapper that is defined the + * same way, except it takes the additional argument. */ + +static inline int raw_reboot(int cmd, const void *arg) { + return (int) syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, arg); +} diff --git a/src/basic/refcnt.h b/src/basic/refcnt.h new file mode 100644 index 0000000..40f9a84 --- /dev/null +++ b/src/basic/refcnt.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* A type-safe atomic refcounter. + * + * DO NOT USE THIS UNLESS YOU ACTUALLY CARE ABOUT THREAD SAFETY! */ + +typedef struct { + volatile unsigned _value; +} RefCount; + +#define REFCNT_GET(r) ((r)._value) +#define REFCNT_INC(r) (__sync_add_and_fetch(&(r)._value, 1)) +#define REFCNT_DEC(r) (__sync_sub_and_fetch(&(r)._value, 1)) + +#define REFCNT_INIT ((RefCount) { ._value = 1 }) + +#define _DEFINE_ATOMIC_REF_FUNC(type, name, scope) \ + scope type *name##_ref(type *p) { \ + if (!p) \ + return NULL; \ + \ + assert_se(REFCNT_INC(p->n_ref) >= 2); \ + return p; \ + } + +#define _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func, scope) \ + scope type *name##_unref(type *p) { \ + if (!p) \ + return NULL; \ + \ + if (REFCNT_DEC(p->n_ref) > 0) \ + return NULL; \ + \ + return free_func(p); \ + } + +#define DEFINE_ATOMIC_REF_FUNC(type, name) \ + _DEFINE_ATOMIC_REF_FUNC(type, name,) +#define DEFINE_PUBLIC_ATOMIC_REF_FUNC(type, name) \ + _DEFINE_ATOMIC_REF_FUNC(type, name, _public_) + +#define DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func) \ + _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func,) +#define DEFINE_PUBLIC_ATOMIC_UNREF_FUNC(type, name, free_func) \ + _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func, _public_) + +#define DEFINE_ATOMIC_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_ATOMIC_REF_FUNC(type, name); \ + DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func); + +#define DEFINE_PUBLIC_ATOMIC_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_PUBLIC_ATOMIC_REF_FUNC(type, name); \ + DEFINE_PUBLIC_ATOMIC_UNREF_FUNC(type, name, free_func); diff --git a/src/basic/replace-var.c b/src/basic/replace-var.c new file mode 100644 index 0000000..fd2b5c1 --- /dev/null +++ b/src/basic/replace-var.c @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "macro.h" +#include "replace-var.h" +#include "string-util.h" + +/* + * Generic infrastructure for replacing @FOO@ style variables in + * strings. Will call a callback for each replacement. + */ + +static int get_variable(const char *b, char **r) { + size_t k; + char *t; + + assert(b); + assert(r); + + if (*b != '@') + return 0; + + k = strspn(b + 1, UPPERCASE_LETTERS "_"); + if (k <= 0 || b[k+1] != '@') + return 0; + + t = strndup(b + 1, k); + if (!t) + return -ENOMEM; + + *r = t; + return 1; +} + +char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata) { + char *r, *t; + const char *f; + size_t l; + + assert(text); + assert(lookup); + + l = strlen(text); + r = new(char, l+1); + if (!r) + return NULL; + + f = text; + t = r; + while (*f) { + _cleanup_free_ char *v = NULL, *n = NULL; + char *a; + int k; + size_t skip, d, nl; + + k = get_variable(f, &v); + if (k < 0) + goto oom; + if (k == 0) { + *(t++) = *(f++); + continue; + } + + n = lookup(v, userdata); + if (!n) + goto oom; + + skip = strlen(v) + 2; + + d = t - r; + nl = l - skip + strlen(n); + a = realloc(r, nl + 1); + if (!a) + goto oom; + + l = nl; + r = a; + t = r + d; + + t = stpcpy(t, n); + f += skip; + } + + *t = 0; + return r; + +oom: + return mfree(r); +} diff --git a/src/basic/replace-var.h b/src/basic/replace-var.h new file mode 100644 index 0000000..e6a489f --- /dev/null +++ b/src/basic/replace-var.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata); diff --git a/src/basic/rlimit-util.c b/src/basic/rlimit-util.c new file mode 100644 index 0000000..74b3a02 --- /dev/null +++ b/src/basic/rlimit-util.c @@ -0,0 +1,410 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <sys/resource.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "format-util.h" +#include "macro.h" +#include "missing.h" +#include "rlimit-util.h" +#include "string-table.h" +#include "time-util.h" + +int setrlimit_closest(int resource, const struct rlimit *rlim) { + struct rlimit highest, fixed; + + assert(rlim); + + if (setrlimit(resource, rlim) >= 0) + return 0; + + if (errno != EPERM) + return -errno; + + /* So we failed to set the desired setrlimit, then let's try + * to get as close as we can */ + if (getrlimit(resource, &highest) < 0) + return -errno; + + /* If the hard limit is unbounded anyway, then the EPERM had other reasons, let's propagate the original EPERM + * then */ + if (highest.rlim_max == RLIM_INFINITY) + return -EPERM; + + fixed = (struct rlimit) { + .rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max), + .rlim_max = MIN(rlim->rlim_max, highest.rlim_max), + }; + + /* Shortcut things if we wouldn't change anything. */ + if (fixed.rlim_cur == highest.rlim_cur && + fixed.rlim_max == highest.rlim_max) + return 0; + + if (setrlimit(resource, &fixed) < 0) + return -errno; + + return 0; +} + +int setrlimit_closest_all(const struct rlimit *const *rlim, int *which_failed) { + int i, r; + + assert(rlim); + + /* On failure returns the limit's index that failed in *which_failed, but only if non-NULL */ + + for (i = 0; i < _RLIMIT_MAX; i++) { + if (!rlim[i]) + continue; + + r = setrlimit_closest(i, rlim[i]); + if (r < 0) { + if (which_failed) + *which_failed = i; + + return r; + } + } + + if (which_failed) + *which_failed = -1; + + return 0; +} + +static int rlimit_parse_u64(const char *val, rlim_t *ret) { + uint64_t u; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + /* setrlimit(2) suggests rlim_t is always 64bit on Linux. */ + assert_cc(sizeof(rlim_t) == sizeof(uint64_t)); + + r = safe_atou64(val, &u); + if (r < 0) + return r; + if (u >= (uint64_t) RLIM_INFINITY) + return -ERANGE; + + *ret = (rlim_t) u; + return 0; +} + +static int rlimit_parse_size(const char *val, rlim_t *ret) { + uint64_t u; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + r = parse_size(val, 1024, &u); + if (r < 0) + return r; + if (u >= (uint64_t) RLIM_INFINITY) + return -ERANGE; + + *ret = (rlim_t) u; + return 0; +} + +static int rlimit_parse_sec(const char *val, rlim_t *ret) { + uint64_t u; + usec_t t; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + r = parse_sec(val, &t); + if (r < 0) + return r; + if (t == USEC_INFINITY) { + *ret = RLIM_INFINITY; + return 0; + } + + u = (uint64_t) DIV_ROUND_UP(t, USEC_PER_SEC); + if (u >= (uint64_t) RLIM_INFINITY) + return -ERANGE; + + *ret = (rlim_t) u; + return 0; +} + +static int rlimit_parse_usec(const char *val, rlim_t *ret) { + usec_t t; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + r = parse_time(val, &t, 1); + if (r < 0) + return r; + if (t == USEC_INFINITY) { + *ret = RLIM_INFINITY; + return 0; + } + + *ret = (rlim_t) t; + return 0; +} + +static int rlimit_parse_nice(const char *val, rlim_t *ret) { + uint64_t rl; + int r; + + /* So, Linux is weird. The range for RLIMIT_NICE is 40..1, mapping to the nice levels -20..19. However, the + * RLIMIT_NICE limit defaults to 0 by the kernel, i.e. a value that maps to nice level 20, which of course is + * bogus and does not exist. In order to permit parsing the RLIMIT_NICE of 0 here we hence implement a slight + * asymmetry: when parsing as positive nice level we permit 0..19. When parsing as negative nice level, we + * permit -20..0. But when parsing as raw resource limit value then we also allow the special value 0. + * + * Yeah, Linux is quality engineering sometimes... */ + + if (val[0] == '+') { + + /* Prefixed with "+": Parse as positive user-friendly nice value */ + r = safe_atou64(val + 1, &rl); + if (r < 0) + return r; + + if (rl >= PRIO_MAX) + return -ERANGE; + + rl = 20 - rl; + + } else if (val[0] == '-') { + + /* Prefixed with "-": Parse as negative user-friendly nice value */ + r = safe_atou64(val + 1, &rl); + if (r < 0) + return r; + + if (rl > (uint64_t) (-PRIO_MIN)) + return -ERANGE; + + rl = 20 + rl; + } else { + + /* Not prefixed: parse as raw resource limit value */ + r = safe_atou64(val, &rl); + if (r < 0) + return r; + + if (rl > (uint64_t) (20 - PRIO_MIN)) + return -ERANGE; + } + + *ret = (rlim_t) rl; + return 0; +} + +static int (*const rlimit_parse_table[_RLIMIT_MAX])(const char *val, rlim_t *ret) = { + [RLIMIT_CPU] = rlimit_parse_sec, + [RLIMIT_FSIZE] = rlimit_parse_size, + [RLIMIT_DATA] = rlimit_parse_size, + [RLIMIT_STACK] = rlimit_parse_size, + [RLIMIT_CORE] = rlimit_parse_size, + [RLIMIT_RSS] = rlimit_parse_size, + [RLIMIT_NOFILE] = rlimit_parse_u64, + [RLIMIT_AS] = rlimit_parse_size, + [RLIMIT_NPROC] = rlimit_parse_u64, + [RLIMIT_MEMLOCK] = rlimit_parse_size, + [RLIMIT_LOCKS] = rlimit_parse_u64, + [RLIMIT_SIGPENDING] = rlimit_parse_u64, + [RLIMIT_MSGQUEUE] = rlimit_parse_size, + [RLIMIT_NICE] = rlimit_parse_nice, + [RLIMIT_RTPRIO] = rlimit_parse_u64, + [RLIMIT_RTTIME] = rlimit_parse_usec, +}; + +int rlimit_parse_one(int resource, const char *val, rlim_t *ret) { + assert(val); + assert(ret); + + if (resource < 0) + return -EINVAL; + if (resource >= _RLIMIT_MAX) + return -EINVAL; + + return rlimit_parse_table[resource](val, ret); +} + +int rlimit_parse(int resource, const char *val, struct rlimit *ret) { + _cleanup_free_ char *hard = NULL, *soft = NULL; + rlim_t hl, sl; + int r; + + assert(val); + assert(ret); + + r = extract_first_word(&val, &soft, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = rlimit_parse_one(resource, soft, &sl); + if (r < 0) + return r; + + r = extract_first_word(&val, &hard, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (!isempty(val)) + return -EINVAL; + if (r == 0) + hl = sl; + else { + r = rlimit_parse_one(resource, hard, &hl); + if (r < 0) + return r; + if (sl > hl) + return -EILSEQ; + } + + *ret = (struct rlimit) { + .rlim_cur = sl, + .rlim_max = hl, + }; + + return 0; +} + +int rlimit_format(const struct rlimit *rl, char **ret) { + char *s = NULL; + + assert(rl); + assert(ret); + + if (rl->rlim_cur >= RLIM_INFINITY && rl->rlim_max >= RLIM_INFINITY) + s = strdup("infinity"); + else if (rl->rlim_cur >= RLIM_INFINITY) + (void) asprintf(&s, "infinity:" RLIM_FMT, rl->rlim_max); + else if (rl->rlim_max >= RLIM_INFINITY) + (void) asprintf(&s, RLIM_FMT ":infinity", rl->rlim_cur); + else if (rl->rlim_cur == rl->rlim_max) + (void) asprintf(&s, RLIM_FMT, rl->rlim_cur); + else + (void) asprintf(&s, RLIM_FMT ":" RLIM_FMT, rl->rlim_cur, rl->rlim_max); + + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +static const char* const rlimit_table[_RLIMIT_MAX] = { + [RLIMIT_AS] = "AS", + [RLIMIT_CORE] = "CORE", + [RLIMIT_CPU] = "CPU", + [RLIMIT_DATA] = "DATA", + [RLIMIT_FSIZE] = "FSIZE", + [RLIMIT_LOCKS] = "LOCKS", + [RLIMIT_MEMLOCK] = "MEMLOCK", + [RLIMIT_MSGQUEUE] = "MSGQUEUE", + [RLIMIT_NICE] = "NICE", + [RLIMIT_NOFILE] = "NOFILE", + [RLIMIT_NPROC] = "NPROC", + [RLIMIT_RSS] = "RSS", + [RLIMIT_RTPRIO] = "RTPRIO", + [RLIMIT_RTTIME] = "RTTIME", + [RLIMIT_SIGPENDING] = "SIGPENDING", + [RLIMIT_STACK] = "STACK", +}; + +DEFINE_STRING_TABLE_LOOKUP(rlimit, int); + +int rlimit_from_string_harder(const char *s) { + const char *suffix; + + /* The official prefix */ + suffix = startswith(s, "RLIMIT_"); + if (suffix) + return rlimit_from_string(suffix); + + /* Our own unit file setting prefix */ + suffix = startswith(s, "Limit"); + if (suffix) + return rlimit_from_string(suffix); + + return rlimit_from_string(s); +} + +void rlimit_free_all(struct rlimit **rl) { + int i; + + if (!rl) + return; + + for (i = 0; i < _RLIMIT_MAX; i++) + rl[i] = mfree(rl[i]); +} + +int rlimit_nofile_bump(int limit) { + int r; + + /* Bumps the (soft) RLIMIT_NOFILE resource limit as close as possible to the specified limit. If a negative + * limit is specified, bumps it to the maximum the kernel and the hard resource limit allows. This call should + * be used by all our programs that might need a lot of fds, and that know how to deal with high fd numbers + * (i.e. do not use select() — which chokes on fds >= 1024) */ + + if (limit < 0) + limit = read_nr_open(); + + if (limit < 3) + limit = 3; + + r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(limit)); + if (r < 0) + return log_debug_errno(r, "Failed to set RLIMIT_NOFILE: %m"); + + return 0; +} + +int rlimit_nofile_safe(void) { + struct rlimit rl; + + /* Resets RLIMIT_NOFILE's soft limit FD_SETSIZE (i.e. 1024), for compatibility with software still using + * select() */ + + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) + return log_debug_errno(errno, "Failed to query RLIMIT_NOFILE: %m"); + + if (rl.rlim_cur <= FD_SETSIZE) + return 0; + + rl.rlim_cur = FD_SETSIZE; + if (setrlimit(RLIMIT_NOFILE, &rl) < 0) + return log_debug_errno(errno, "Failed to lower RLIMIT_NOFILE's soft limit to " RLIM_FMT ": %m", rl.rlim_cur); + + return 1; +} diff --git a/src/basic/rlimit-util.h b/src/basic/rlimit-util.h new file mode 100644 index 0000000..d4fca2b --- /dev/null +++ b/src/basic/rlimit-util.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/resource.h> + +#include "macro.h" + +const char *rlimit_to_string(int i) _const_; +int rlimit_from_string(const char *s) _pure_; +int rlimit_from_string_harder(const char *s) _pure_; + +int setrlimit_closest(int resource, const struct rlimit *rlim); +int setrlimit_closest_all(const struct rlimit * const *rlim, int *which_failed); + +int rlimit_parse_one(int resource, const char *val, rlim_t *ret); +int rlimit_parse(int resource, const char *val, struct rlimit *ret); + +int rlimit_format(const struct rlimit *rl, char **ret); + +void rlimit_free_all(struct rlimit **rl); + +#define RLIMIT_MAKE_CONST(lim) ((struct rlimit) { lim, lim }) + +int rlimit_nofile_bump(int limit); +int rlimit_nofile_safe(void); diff --git a/src/basic/rm-rf.c b/src/basic/rm-rf.c new file mode 100644 index 0000000..0c957c9 --- /dev/null +++ b/src/basic/rm-rf.c @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stddef.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "cgroup-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "mountpoint-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "stat-util.h" +#include "string-util.h" + +static bool is_physical_fs(const struct statfs *sfs) { + return !is_temporary_fs(sfs) && !is_cgroup_fs(sfs); +} + +int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int ret = 0, r; + struct statfs sfs; + + assert(fd >= 0); + + /* This returns the first error we run into, but nevertheless + * tries to go on. This closes the passed fd. */ + + if (!(flags & REMOVE_PHYSICAL)) { + + r = fstatfs(fd, &sfs); + if (r < 0) { + safe_close(fd); + return -errno; + } + + if (is_physical_fs(&sfs)) { + /* We refuse to clean physical file systems with this call, + * unless explicitly requested. This is extra paranoia just + * to be sure we never ever remove non-state data. */ + _cleanup_free_ char *path = NULL; + + (void) fd_get_path(fd, &path); + log_error("Attempted to remove disk file system under \"%s\", and we can't allow that.", + strna(path)); + + safe_close(fd); + return -EPERM; + } + } + + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return errno == ENOENT ? 0 : -errno; + } + + FOREACH_DIRENT_ALL(de, d, return -errno) { + bool is_dir; + struct stat st; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (de->d_type == DT_UNKNOWN || + (de->d_type == DT_DIR && (root_dev || (flags & REMOVE_SUBVOLUME)))) { + if (fstatat(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + if (ret == 0 && errno != ENOENT) + ret = -errno; + continue; + } + + is_dir = S_ISDIR(st.st_mode); + } else + is_dir = de->d_type == DT_DIR; + + if (is_dir) { + int subdir_fd; + + /* if root_dev is set, remove subdirectories only if device is same */ + if (root_dev && st.st_dev != root_dev->st_dev) + continue; + + subdir_fd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (subdir_fd < 0) { + if (ret == 0 && errno != ENOENT) + ret = -errno; + continue; + } + + /* Stop at mount points */ + r = fd_is_mount_point(fd, de->d_name, 0); + if (r < 0) { + if (ret == 0 && r != -ENOENT) + ret = r; + + safe_close(subdir_fd); + continue; + } + if (r) { + safe_close(subdir_fd); + continue; + } + + if ((flags & REMOVE_SUBVOLUME) && st.st_ino == 256) { + + /* This could be a subvolume, try to remove it */ + + r = btrfs_subvol_remove_fd(fd, de->d_name, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); + if (r < 0) { + if (!IN_SET(r, -ENOTTY, -EINVAL)) { + if (ret == 0) + ret = r; + + safe_close(subdir_fd); + continue; + } + + /* ENOTTY, then it wasn't a + * btrfs subvolume, continue + * below. */ + } else { + /* It was a subvolume, continue. */ + safe_close(subdir_fd); + continue; + } + } + + /* We pass REMOVE_PHYSICAL here, to avoid + * doing the fstatfs() to check the file + * system type again for each directory */ + r = rm_rf_children(subdir_fd, flags | REMOVE_PHYSICAL, root_dev); + if (r < 0 && ret == 0) + ret = r; + + if (unlinkat(fd, de->d_name, AT_REMOVEDIR) < 0) { + if (ret == 0 && errno != ENOENT) + ret = -errno; + } + + } else if (!(flags & REMOVE_ONLY_DIRECTORIES)) { + + if (unlinkat(fd, de->d_name, 0) < 0) { + if (ret == 0 && errno != ENOENT) + ret = -errno; + } + } + } + return ret; +} + +int rm_rf(const char *path, RemoveFlags flags) { + int fd, r; + struct statfs s; + + assert(path); + + /* We refuse to clean the root file system with this + * call. This is extra paranoia to never cause a really + * seriously broken system. */ + if (path_equal_or_files_same(path, "/", AT_SYMLINK_NOFOLLOW)) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Attempted to remove entire root file system (\"%s\"), and we can't allow that.", + path); + + if (FLAGS_SET(flags, REMOVE_SUBVOLUME | REMOVE_ROOT | REMOVE_PHYSICAL)) { + /* Try to remove as subvolume first */ + r = btrfs_subvol_remove(path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); + if (r >= 0) + return r; + + if (!IN_SET(r, -ENOTTY, -EINVAL, -ENOTDIR)) + return r; + + /* Not btrfs or not a subvolume */ + } + + fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (fd < 0) { + if (!IN_SET(errno, ENOTDIR, ELOOP)) + return -errno; + + if (!(flags & REMOVE_PHYSICAL)) { + if (statfs(path, &s) < 0) + return -errno; + + if (is_physical_fs(&s)) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Attempted to remove files from a disk file system under \"%s\", refusing.", + path); + } + + if ((flags & REMOVE_ROOT) && !(flags & REMOVE_ONLY_DIRECTORIES)) + if (unlink(path) < 0 && errno != ENOENT) + return -errno; + + return 0; + } + + r = rm_rf_children(fd, flags, NULL); + + if (flags & REMOVE_ROOT) { + if (rmdir(path) < 0) { + if (r == 0 && errno != ENOENT) + r = -errno; + } + } + + return r; +} diff --git a/src/basic/rm-rf.h b/src/basic/rm-rf.h new file mode 100644 index 0000000..3ee2b97 --- /dev/null +++ b/src/basic/rm-rf.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/stat.h> + +#include "util.h" + +typedef enum RemoveFlags { + REMOVE_ONLY_DIRECTORIES = 1 << 0, + REMOVE_ROOT = 1 << 1, + REMOVE_PHYSICAL = 1 << 2, /* if not set, only removes files on tmpfs, never physical file systems */ + REMOVE_SUBVOLUME = 1 << 3, +} RemoveFlags; + +int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev); +int rm_rf(const char *path, RemoveFlags flags); + +/* Useful for usage with _cleanup_(), destroys a directory and frees the pointer */ +static inline void rm_rf_physical_and_free(char *p) { + PROTECT_ERRNO; + (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL); + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_physical_and_free); + +/* Similar as above, but also has magic btrfs subvolume powers */ +static inline void rm_rf_subvolume_and_free(char *p) { + PROTECT_ERRNO; + (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_subvolume_and_free); diff --git a/src/basic/selinux-util.c b/src/basic/selinux-util.c new file mode 100644 index 0000000..dc06f3d --- /dev/null +++ b/src/basic/selinux-util.c @@ -0,0 +1,518 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <malloc.h> +#include <stddef.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/un.h> +#include <syslog.h> + +#if HAVE_SELINUX +#include <selinux/context.h> +#include <selinux/label.h> +#include <selinux/selinux.h> +#endif + +#include "alloc-util.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "selinux-util.h" +#include "stdio-util.h" +#include "time-util.h" +#include "util.h" + +#if HAVE_SELINUX +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, freecon); +DEFINE_TRIVIAL_CLEANUP_FUNC(context_t, context_free); + +#define _cleanup_freecon_ _cleanup_(freeconp) +#define _cleanup_context_free_ _cleanup_(context_freep) + +static int cached_use = -1; +static struct selabel_handle *label_hnd = NULL; + +#define log_enforcing(...) log_full(security_getenforce() == 1 ? LOG_ERR : LOG_DEBUG, __VA_ARGS__) +#define log_enforcing_errno(r, ...) log_full_errno(security_getenforce() == 1 ? LOG_ERR : LOG_DEBUG, r, __VA_ARGS__) +#endif + +bool mac_selinux_use(void) { +#if HAVE_SELINUX + if (cached_use < 0) + cached_use = is_selinux_enabled() > 0; + + return cached_use; +#else + return false; +#endif +} + +void mac_selinux_retest(void) { +#if HAVE_SELINUX + cached_use = -1; +#endif +} + +int mac_selinux_init(void) { + int r = 0; + +#if HAVE_SELINUX + usec_t before_timestamp, after_timestamp; + struct mallinfo before_mallinfo, after_mallinfo; + + if (label_hnd) + return 0; + + if (!mac_selinux_use()) + return 0; + + before_mallinfo = mallinfo(); + before_timestamp = now(CLOCK_MONOTONIC); + + label_hnd = selabel_open(SELABEL_CTX_FILE, NULL, 0); + if (!label_hnd) { + log_enforcing_errno(errno, "Failed to initialize SELinux context: %m"); + r = security_getenforce() == 1 ? -errno : 0; + } else { + char timespan[FORMAT_TIMESPAN_MAX]; + int l; + + after_timestamp = now(CLOCK_MONOTONIC); + after_mallinfo = mallinfo(); + + l = after_mallinfo.uordblks > before_mallinfo.uordblks ? after_mallinfo.uordblks - before_mallinfo.uordblks : 0; + + log_debug("Successfully loaded SELinux database in %s, size on heap is %iK.", + format_timespan(timespan, sizeof(timespan), after_timestamp - before_timestamp, 0), + (l+1023)/1024); + } +#endif + + return r; +} + +void mac_selinux_finish(void) { + +#if HAVE_SELINUX + if (!label_hnd) + return; + + selabel_close(label_hnd); + label_hnd = NULL; +#endif +} + +int mac_selinux_fix(const char *path, LabelFixFlags flags) { + +#if HAVE_SELINUX + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + _cleanup_freecon_ char* fcon = NULL; + _cleanup_close_ int fd = -1; + struct stat st; + int r; + + assert(path); + + /* if mac_selinux_init() wasn't called before we are a NOOP */ + if (!label_hnd) + return 0; + + /* Open the file as O_PATH, to pin it while we determine and adjust the label */ + fd = open(path, O_NOFOLLOW|O_CLOEXEC|O_PATH); + if (fd < 0) { + if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT) + return 0; + + return -errno; + } + + if (fstat(fd, &st) < 0) + return -errno; + + if (selabel_lookup_raw(label_hnd, &fcon, path, st.st_mode) < 0) { + r = -errno; + + /* If there's no label to set, then exit without warning */ + if (r == -ENOENT) + return 0; + + goto fail; + } + + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + if (setfilecon_raw(procfs_path, fcon) < 0) { + _cleanup_freecon_ char *oldcon = NULL; + + r = -errno; + + /* If the FS doesn't support labels, then exit without warning */ + if (r == -EOPNOTSUPP) + return 0; + + /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */ + if (r == -EROFS && (flags & LABEL_IGNORE_EROFS)) + return 0; + + /* If the old label is identical to the new one, suppress any kind of error */ + if (getfilecon_raw(procfs_path, &oldcon) >= 0 && streq(fcon, oldcon)) + return 0; + + goto fail; + } + + return 0; + +fail: + log_enforcing_errno(r, "Unable to fix SELinux security context of %s: %m", path); + if (security_getenforce() == 1) + return r; +#endif + + return 0; +} + +int mac_selinux_apply(const char *path, const char *label) { + +#if HAVE_SELINUX + if (!mac_selinux_use()) + return 0; + + assert(path); + assert(label); + + if (setfilecon(path, label) < 0) { + log_enforcing_errno(errno, "Failed to set SELinux security context %s on path %s: %m", label, path); + if (security_getenforce() > 0) + return -errno; + } +#endif + return 0; +} + +int mac_selinux_get_create_label_from_exe(const char *exe, char **label) { + int r = -EOPNOTSUPP; + +#if HAVE_SELINUX + _cleanup_freecon_ char *mycon = NULL, *fcon = NULL; + security_class_t sclass; + + assert(exe); + assert(label); + + if (!mac_selinux_use()) + return -EOPNOTSUPP; + + r = getcon_raw(&mycon); + if (r < 0) + return -errno; + + r = getfilecon_raw(exe, &fcon); + if (r < 0) + return -errno; + + sclass = string_to_security_class("process"); + r = security_compute_create_raw(mycon, fcon, sclass, label); + if (r < 0) + return -errno; +#endif + + return r; +} + +int mac_selinux_get_our_label(char **label) { + int r = -EOPNOTSUPP; + + assert(label); + +#if HAVE_SELINUX + if (!mac_selinux_use()) + return -EOPNOTSUPP; + + r = getcon_raw(label); + if (r < 0) + return -errno; +#endif + + return r; +} + +int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label) { + int r = -EOPNOTSUPP; + +#if HAVE_SELINUX + _cleanup_freecon_ char *mycon = NULL, *peercon = NULL, *fcon = NULL; + _cleanup_context_free_ context_t pcon = NULL, bcon = NULL; + security_class_t sclass; + const char *range = NULL; + + assert(socket_fd >= 0); + assert(exe); + assert(label); + + if (!mac_selinux_use()) + return -EOPNOTSUPP; + + r = getcon_raw(&mycon); + if (r < 0) + return -errno; + + r = getpeercon_raw(socket_fd, &peercon); + if (r < 0) + return -errno; + + if (!exec_label) { + /* If there is no context set for next exec let's use context + of target executable */ + r = getfilecon_raw(exe, &fcon); + if (r < 0) + return -errno; + } + + bcon = context_new(mycon); + if (!bcon) + return -ENOMEM; + + pcon = context_new(peercon); + if (!pcon) + return -ENOMEM; + + range = context_range_get(pcon); + if (!range) + return -errno; + + r = context_range_set(bcon, range); + if (r) + return -errno; + + freecon(mycon); + mycon = strdup(context_str(bcon)); + if (!mycon) + return -ENOMEM; + + sclass = string_to_security_class("process"); + r = security_compute_create_raw(mycon, fcon, sclass, label); + if (r < 0) + return -errno; +#endif + + return r; +} + +char* mac_selinux_free(char *label) { + +#if HAVE_SELINUX + if (!label) + return NULL; + + if (!mac_selinux_use()) + return NULL; + + freecon(label); +#endif + + return NULL; +} + +#if HAVE_SELINUX +static int selinux_create_file_prepare_abspath(const char *abspath, mode_t mode) { + _cleanup_freecon_ char *filecon = NULL; + int r; + + assert(abspath); + assert(path_is_absolute(abspath)); + + r = selabel_lookup_raw(label_hnd, &filecon, abspath, mode); + if (r < 0) { + /* No context specified by the policy? Proceed without setting it. */ + if (errno == ENOENT) + return 0; + + log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", abspath); + } else { + if (setfscreatecon_raw(filecon) >= 0) + return 0; /* Success! */ + + log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", filecon, abspath); + } + + if (security_getenforce() > 0) + return -errno; + + return 0; +} +#endif + +int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode) { + int r = 0; + +#if HAVE_SELINUX + _cleanup_free_ char *abspath = NULL; + + assert(path); + + if (!label_hnd) + return 0; + + if (!path_is_absolute(path)) { + _cleanup_free_ char *p = NULL; + + if (dirfd == AT_FDCWD) + r = safe_getcwd(&p); + else + r = fd_get_path(dirfd, &p); + if (r < 0) + return r; + + path = abspath = path_join(p, path); + if (!path) + return -ENOMEM; + } + + r = selinux_create_file_prepare_abspath(path, mode); +#endif + return r; +} + +int mac_selinux_create_file_prepare(const char *path, mode_t mode) { + int r = 0; + +#if HAVE_SELINUX + _cleanup_free_ char *abspath = NULL; + + assert(path); + + if (!label_hnd) + return 0; + + r = path_make_absolute_cwd(path, &abspath); + if (r < 0) + return r; + + r = selinux_create_file_prepare_abspath(abspath, mode); +#endif + return r; +} + +void mac_selinux_create_file_clear(void) { + +#if HAVE_SELINUX + PROTECT_ERRNO; + + if (!mac_selinux_use()) + return; + + setfscreatecon_raw(NULL); +#endif +} + +int mac_selinux_create_socket_prepare(const char *label) { + +#if HAVE_SELINUX + if (!mac_selinux_use()) + return 0; + + assert(label); + + if (setsockcreatecon(label) < 0) { + log_enforcing_errno(errno, "Failed to set SELinux security context %s for sockets: %m", label); + + if (security_getenforce() == 1) + return -errno; + } +#endif + + return 0; +} + +void mac_selinux_create_socket_clear(void) { + +#if HAVE_SELINUX + PROTECT_ERRNO; + + if (!mac_selinux_use()) + return; + + setsockcreatecon_raw(NULL); +#endif +} + +int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen) { + + /* Binds a socket and label its file system object according to the SELinux policy */ + +#if HAVE_SELINUX + _cleanup_freecon_ char *fcon = NULL; + const struct sockaddr_un *un; + bool context_changed = false; + char *path; + int r; + + assert(fd >= 0); + assert(addr); + assert(addrlen >= sizeof(sa_family_t)); + + if (!label_hnd) + goto skipped; + + /* Filter out non-local sockets */ + if (addr->sa_family != AF_UNIX) + goto skipped; + + /* Filter out anonymous sockets */ + if (addrlen < offsetof(struct sockaddr_un, sun_path) + 1) + goto skipped; + + /* Filter out abstract namespace sockets */ + un = (const struct sockaddr_un*) addr; + if (un->sun_path[0] == 0) + goto skipped; + + path = strndupa(un->sun_path, addrlen - offsetof(struct sockaddr_un, sun_path)); + + if (path_is_absolute(path)) + r = selabel_lookup_raw(label_hnd, &fcon, path, S_IFSOCK); + else { + _cleanup_free_ char *newpath = NULL; + + r = path_make_absolute_cwd(path, &newpath); + if (r < 0) + return r; + + r = selabel_lookup_raw(label_hnd, &fcon, newpath, S_IFSOCK); + } + + if (r < 0) { + /* No context specified by the policy? Proceed without setting it */ + if (errno == ENOENT) + goto skipped; + + log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", path); + if (security_getenforce() > 0) + return -errno; + + } else { + if (setfscreatecon_raw(fcon) < 0) { + log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", fcon, path); + if (security_getenforce() > 0) + return -errno; + } else + context_changed = true; + } + + r = bind(fd, addr, addrlen) < 0 ? -errno : 0; + + if (context_changed) + setfscreatecon_raw(NULL); + + return r; + +skipped: +#endif + if (bind(fd, addr, addrlen) < 0) + return -errno; + + return 0; +} diff --git a/src/basic/selinux-util.h b/src/basic/selinux-util.h new file mode 100644 index 0000000..bd5207c --- /dev/null +++ b/src/basic/selinux-util.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "macro.h" +#include "label.h" + +bool mac_selinux_use(void); +void mac_selinux_retest(void); + +int mac_selinux_init(void); +void mac_selinux_finish(void); + +int mac_selinux_fix(const char *path, LabelFixFlags flags); +int mac_selinux_apply(const char *path, const char *label); + +int mac_selinux_get_create_label_from_exe(const char *exe, char **label); +int mac_selinux_get_our_label(char **label); +int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label); +char* mac_selinux_free(char *label); + +int mac_selinux_create_file_prepare(const char *path, mode_t mode); +int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode); +void mac_selinux_create_file_clear(void); + +int mac_selinux_create_socket_prepare(const char *label); +void mac_selinux_create_socket_clear(void); + +int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen); + +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, mac_selinux_free); diff --git a/src/basic/set.h b/src/basic/set.h new file mode 100644 index 0000000..2a80632 --- /dev/null +++ b/src/basic/set.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "extract-word.h" +#include "hashmap.h" +#include "macro.h" + +Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define set_new(ops) internal_set_new(ops HASHMAP_DEBUG_SRC_ARGS) + +static inline Set *set_free(Set *s) { + return (Set*) internal_hashmap_free(HASHMAP_BASE(s), NULL, NULL); +} + +static inline Set *set_free_free(Set *s) { + return (Set*) internal_hashmap_free(HASHMAP_BASE(s), free, NULL); +} + +/* no set_free_free_free */ + +static inline Set *set_copy(Set *s) { + return (Set*) internal_hashmap_copy(HASHMAP_BASE(s)); +} + +int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define set_ensure_allocated(h, ops) internal_set_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) + +int set_put(Set *s, const void *key); +/* no set_update */ +/* no set_replace */ +static inline void *set_get(Set *s, void *key) { + return internal_hashmap_get(HASHMAP_BASE(s), key); +} +/* no set_get2 */ + +static inline bool set_contains(Set *s, const void *key) { + return internal_hashmap_contains(HASHMAP_BASE(s), key); +} + +static inline void *set_remove(Set *s, const void *key) { + return internal_hashmap_remove(HASHMAP_BASE(s), key); +} + +/* no set_remove2 */ +/* no set_remove_value */ +int set_remove_and_put(Set *s, const void *old_key, const void *new_key); +/* no set_remove_and_replace */ +int set_merge(Set *s, Set *other); + +static inline int set_reserve(Set *h, unsigned entries_add) { + return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add); +} + +static inline int set_move(Set *s, Set *other) { + return internal_hashmap_move(HASHMAP_BASE(s), HASHMAP_BASE(other)); +} + +static inline int set_move_one(Set *s, Set *other, const void *key) { + return internal_hashmap_move_one(HASHMAP_BASE(s), HASHMAP_BASE(other), key); +} + +static inline unsigned set_size(Set *s) { + return internal_hashmap_size(HASHMAP_BASE(s)); +} + +static inline bool set_isempty(Set *s) { + return set_size(s) == 0; +} + +static inline unsigned set_buckets(Set *s) { + return internal_hashmap_buckets(HASHMAP_BASE(s)); +} + +bool set_iterate(Set *s, Iterator *i, void **value); + +static inline void set_clear(Set *s) { + internal_hashmap_clear(HASHMAP_BASE(s), NULL, NULL); +} + +static inline void set_clear_free(Set *s) { + internal_hashmap_clear(HASHMAP_BASE(s), free, NULL); +} + +/* no set_clear_free_free */ + +static inline void *set_steal_first(Set *s) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(s), true, NULL); +} + +#define set_clear_with_destructor(_s, _f) \ + ({ \ + void *_item; \ + while ((_item = set_steal_first(_s))) \ + _f(_item); \ + }) +#define set_free_with_destructor(_s, _f) \ + ({ \ + set_clear_with_destructor(_s, _f); \ + set_free(_s); \ + }) + +/* no set_steal_first_key */ +/* no set_first_key */ + +static inline void *set_first(Set *s) { + return internal_hashmap_first_key_and_value(HASHMAP_BASE(s), false, NULL); +} + +/* no set_next */ + +static inline char **set_get_strv(Set *s) { + return internal_hashmap_get_strv(HASHMAP_BASE(s)); +} + +int set_consume(Set *s, void *value); +int set_put_strdup(Set *s, const char *p); +int set_put_strdupv(Set *s, char **l); +int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags); + +#define SET_FOREACH(e, s, i) \ + for ((i) = ITERATOR_FIRST; set_iterate((s), &(i), (void**)&(e)); ) + +#define SET_FOREACH_MOVE(e, d, s) \ + for (; ({ e = set_first(s); assert_se(!e || set_move_one(d, s, e) >= 0); e; }); ) + +DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free_free); + +#define _cleanup_set_free_ _cleanup_(set_freep) +#define _cleanup_set_free_free_ _cleanup_(set_free_freep) diff --git a/src/basic/sigbus.c b/src/basic/sigbus.c new file mode 100644 index 0000000..d5254ea --- /dev/null +++ b/src/basic/sigbus.c @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <signal.h> +#include <stddef.h> +#include <sys/mman.h> + +#include "macro.h" +#include "sigbus.h" +#include "util.h" + +#define SIGBUS_QUEUE_MAX 64 + +static struct sigaction old_sigaction; +static unsigned n_installed = 0; + +/* We maintain a fixed size list of page addresses that triggered a + SIGBUS. We access with list with atomic operations, so that we + don't have to deal with locks between signal handler and main + programs in possibly multiple threads. */ + +static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX]; +static volatile sig_atomic_t n_sigbus_queue = 0; + +static void sigbus_push(void *addr) { + unsigned u; + + assert(addr); + + /* Find a free place, increase the number of entries and leave, if we can */ + for (u = 0; u < SIGBUS_QUEUE_MAX; u++) + if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) { + __sync_fetch_and_add(&n_sigbus_queue, 1); + return; + } + + /* If we can't, make sure the queue size is out of bounds, to + * mark it as overflow */ + for (;;) { + unsigned c; + + __sync_synchronize(); + c = n_sigbus_queue; + + if (c > SIGBUS_QUEUE_MAX) /* already overflow */ + return; + + if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX)) + return; + } +} + +int sigbus_pop(void **ret) { + assert(ret); + + for (;;) { + unsigned u, c; + + __sync_synchronize(); + c = n_sigbus_queue; + + if (_likely_(c == 0)) + return 0; + + if (_unlikely_(c >= SIGBUS_QUEUE_MAX)) + return -EOVERFLOW; + + for (u = 0; u < SIGBUS_QUEUE_MAX; u++) { + void *addr; + + addr = sigbus_queue[u]; + if (!addr) + continue; + + if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) { + __sync_fetch_and_sub(&n_sigbus_queue, 1); + *ret = addr; + return 1; + } + } + } +} + +static void sigbus_handler(int sn, siginfo_t *si, void *data) { + unsigned long ul; + void *aligned; + + assert(sn == SIGBUS); + assert(si); + + if (si->si_code != BUS_ADRERR || !si->si_addr) { + assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + raise(SIGBUS); + return; + } + + ul = (unsigned long) si->si_addr; + ul = ul / page_size(); + ul = ul * page_size(); + aligned = (void*) ul; + + /* Let's remember which address failed */ + sigbus_push(aligned); + + /* Replace mapping with an anonymous page, so that the + * execution can continue, however with a zeroed out page */ + assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned); +} + +void sigbus_install(void) { + struct sigaction sa = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + /* make sure that sysconf() is not called from a signal handler because + * it is not guaranteed to be async-signal-safe since POSIX.1-2008 */ + (void) page_size(); + + n_installed++; + + if (n_installed == 1) + assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0); + + return; +} + +void sigbus_reset(void) { + + if (n_installed <= 0) + return; + + n_installed--; + + if (n_installed == 0) + assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + + return; +} diff --git a/src/basic/sigbus.h b/src/basic/sigbus.h new file mode 100644 index 0000000..459e19f --- /dev/null +++ b/src/basic/sigbus.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +void sigbus_install(void); +void sigbus_reset(void); + +int sigbus_pop(void **ret); diff --git a/src/basic/signal-util.c b/src/basic/signal-util.c new file mode 100644 index 0000000..fb8a63f --- /dev/null +++ b/src/basic/signal-util.c @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> + +#include "macro.h" +#include "parse-util.h" +#include "signal-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" + +int reset_all_signal_handlers(void) { + static const struct sigaction sa = { + .sa_handler = SIG_DFL, + .sa_flags = SA_RESTART, + }; + int sig, r = 0; + + for (sig = 1; sig < _NSIG; sig++) { + + /* These two cannot be caught... */ + if (IN_SET(sig, SIGKILL, SIGSTOP)) + continue; + + /* On Linux the first two RT signals are reserved by + * glibc, and sigaction() will return EINVAL for them. */ + if ((sigaction(sig, &sa, NULL) < 0)) + if (errno != EINVAL && r >= 0) + r = -errno; + } + + return r; +} + +int reset_signal_mask(void) { + sigset_t ss; + + if (sigemptyset(&ss) < 0) + return -errno; + + if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) + return -errno; + + return 0; +} + +static int sigaction_many_ap(const struct sigaction *sa, int sig, va_list ap) { + int r = 0; + + /* negative signal ends the list. 0 signal is skipped. */ + + if (sig < 0) + return 0; + + if (sig > 0) { + if (sigaction(sig, sa, NULL) < 0) + r = -errno; + } + + while ((sig = va_arg(ap, int)) >= 0) { + + if (sig == 0) + continue; + + if (sigaction(sig, sa, NULL) < 0) { + if (r >= 0) + r = -errno; + } + } + + return r; +} + +int sigaction_many(const struct sigaction *sa, ...) { + va_list ap; + int r; + + va_start(ap, sa); + r = sigaction_many_ap(sa, 0, ap); + va_end(ap); + + return r; +} + +int ignore_signals(int sig, ...) { + + static const struct sigaction sa = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + va_list ap; + int r; + + va_start(ap, sig); + r = sigaction_many_ap(&sa, sig, ap); + va_end(ap); + + return r; +} + +int default_signals(int sig, ...) { + + static const struct sigaction sa = { + .sa_handler = SIG_DFL, + .sa_flags = SA_RESTART, + }; + + va_list ap; + int r; + + va_start(ap, sig); + r = sigaction_many_ap(&sa, sig, ap); + va_end(ap); + + return r; +} + +static int sigset_add_many_ap(sigset_t *ss, va_list ap) { + int sig, r = 0; + + assert(ss); + + while ((sig = va_arg(ap, int)) >= 0) { + + if (sig == 0) + continue; + + if (sigaddset(ss, sig) < 0) { + if (r >= 0) + r = -errno; + } + } + + return r; +} + +int sigset_add_many(sigset_t *ss, ...) { + va_list ap; + int r; + + va_start(ap, ss); + r = sigset_add_many_ap(ss, ap); + va_end(ap); + + return r; +} + +int sigprocmask_many(int how, sigset_t *old, ...) { + va_list ap; + sigset_t ss; + int r; + + if (sigemptyset(&ss) < 0) + return -errno; + + va_start(ap, old); + r = sigset_add_many_ap(&ss, ap); + va_end(ap); + + if (r < 0) + return r; + + if (sigprocmask(how, &ss, old) < 0) + return -errno; + + return 0; +} + +static const char *const __signal_table[] = { + [SIGHUP] = "HUP", + [SIGINT] = "INT", + [SIGQUIT] = "QUIT", + [SIGILL] = "ILL", + [SIGTRAP] = "TRAP", + [SIGABRT] = "ABRT", + [SIGBUS] = "BUS", + [SIGFPE] = "FPE", + [SIGKILL] = "KILL", + [SIGUSR1] = "USR1", + [SIGSEGV] = "SEGV", + [SIGUSR2] = "USR2", + [SIGPIPE] = "PIPE", + [SIGALRM] = "ALRM", + [SIGTERM] = "TERM", +#ifdef SIGSTKFLT + [SIGSTKFLT] = "STKFLT", /* Linux on SPARC doesn't know SIGSTKFLT */ +#endif + [SIGCHLD] = "CHLD", + [SIGCONT] = "CONT", + [SIGSTOP] = "STOP", + [SIGTSTP] = "TSTP", + [SIGTTIN] = "TTIN", + [SIGTTOU] = "TTOU", + [SIGURG] = "URG", + [SIGXCPU] = "XCPU", + [SIGXFSZ] = "XFSZ", + [SIGVTALRM] = "VTALRM", + [SIGPROF] = "PROF", + [SIGWINCH] = "WINCH", + [SIGIO] = "IO", + [SIGPWR] = "PWR", + [SIGSYS] = "SYS" +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP(__signal, int); + +const char *signal_to_string(int signo) { + static thread_local char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int) + 1]; + const char *name; + + name = __signal_to_string(signo); + if (name) + return name; + + if (signo >= SIGRTMIN && signo <= SIGRTMAX) + xsprintf(buf, "RTMIN+%d", signo - SIGRTMIN); + else + xsprintf(buf, "%d", signo); + + return buf; +} + +int signal_from_string(const char *s) { + const char *p; + int signo, r; + + /* Check that the input is a signal number. */ + if (safe_atoi(s, &signo) >= 0) { + if (SIGNAL_VALID(signo)) + return signo; + else + return -ERANGE; + } + + /* Drop "SIG" prefix. */ + if (startswith(s, "SIG")) + s += 3; + + /* Check that the input is a signal name. */ + signo = __signal_from_string(s); + if (signo > 0) + return signo; + + /* Check that the input is RTMIN or + * RTMIN+n (0 <= n <= SIGRTMAX-SIGRTMIN). */ + p = startswith(s, "RTMIN"); + if (p) { + if (*p == '\0') + return SIGRTMIN; + if (*p != '+') + return -EINVAL; + + r = safe_atoi(p, &signo); + if (r < 0) + return r; + + if (signo < 0 || signo > SIGRTMAX - SIGRTMIN) + return -ERANGE; + + return signo + SIGRTMIN; + } + + /* Check that the input is RTMAX or + * RTMAX-n (0 <= n <= SIGRTMAX-SIGRTMIN). */ + p = startswith(s, "RTMAX"); + if (p) { + if (*p == '\0') + return SIGRTMAX; + if (*p != '-') + return -EINVAL; + + r = safe_atoi(p, &signo); + if (r < 0) + return r; + + if (signo > 0 || signo < SIGRTMIN - SIGRTMAX) + return -ERANGE; + + return signo + SIGRTMAX; + } + + return -EINVAL; +} + +void nop_signal_handler(int sig) { + /* nothing here */ +} diff --git a/src/basic/signal-util.h b/src/basic/signal-util.h new file mode 100644 index 0000000..92f2804 --- /dev/null +++ b/src/basic/signal-util.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <signal.h> + +#include "macro.h" + +int reset_all_signal_handlers(void); +int reset_signal_mask(void); + +int ignore_signals(int sig, ...); +int default_signals(int sig, ...); +int sigaction_many(const struct sigaction *sa, ...); + +int sigset_add_many(sigset_t *ss, ...); +int sigprocmask_many(int how, sigset_t *old, ...); + +const char *signal_to_string(int i) _const_; +int signal_from_string(const char *s) _pure_; + +void nop_signal_handler(int sig); + +static inline void block_signals_reset(sigset_t *ss) { + assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0); +} + +#define BLOCK_SIGNALS(...) \ + _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({ \ + sigset_t _t; \ + assert_se(sigprocmask_many(SIG_BLOCK, &_t, __VA_ARGS__, -1) >= 0); \ + _t; \ + }) + +static inline bool SIGNAL_VALID(int signo) { + return signo > 0 && signo < _NSIG; +} + +static inline const char* signal_to_string_with_check(int n) { + if (!SIGNAL_VALID(n)) + return NULL; + + return signal_to_string(n); +} diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c new file mode 100644 index 0000000..6118081 --- /dev/null +++ b/src/basic/siphash24.c @@ -0,0 +1,200 @@ +/* + SipHash reference C implementation + + Written in 2012 by + Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> + Daniel J. Bernstein <djb@cr.yp.to> + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. + + (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd) + (Refactored by Tom Gundersen to split up in several functions and follow systemd + coding style) +*/ + +#include <stdio.h> + +#include "macro.h" +#include "siphash24.h" +#include "unaligned.h" + +static uint64_t rotate_left(uint64_t x, uint8_t b) { + assert(b < 64); + + return (x << b) | (x >> (64 - b)); +} + +static void sipround(struct siphash *state) { + assert(state); + + state->v0 += state->v1; + state->v1 = rotate_left(state->v1, 13); + state->v1 ^= state->v0; + state->v0 = rotate_left(state->v0, 32); + state->v2 += state->v3; + state->v3 = rotate_left(state->v3, 16); + state->v3 ^= state->v2; + state->v0 += state->v3; + state->v3 = rotate_left(state->v3, 21); + state->v3 ^= state->v0; + state->v2 += state->v1; + state->v1 = rotate_left(state->v1, 17); + state->v1 ^= state->v2; + state->v2 = rotate_left(state->v2, 32); +} + +void siphash24_init(struct siphash *state, const uint8_t k[static 16]) { + uint64_t k0, k1; + + assert(state); + assert(k); + + k0 = unaligned_read_le64(k); + k1 = unaligned_read_le64(k + 8); + + *state = (struct siphash) { + /* "somepseudorandomlygeneratedbytes" */ + .v0 = 0x736f6d6570736575ULL ^ k0, + .v1 = 0x646f72616e646f6dULL ^ k1, + .v2 = 0x6c7967656e657261ULL ^ k0, + .v3 = 0x7465646279746573ULL ^ k1, + .padding = 0, + .inlen = 0, + }; +} + +void siphash24_compress(const void *_in, size_t inlen, struct siphash *state) { + + const uint8_t *in = _in; + const uint8_t *end = in + inlen; + size_t left = state->inlen & 7; + uint64_t m; + + assert(in); + assert(state); + + /* Update total length */ + state->inlen += inlen; + + /* If padding exists, fill it out */ + if (left > 0) { + for ( ; in < end && left < 8; in ++, left ++) + state->padding |= ((uint64_t) *in) << (left * 8); + + if (in == end && left < 8) + /* We did not have enough input to fill out the padding completely */ + return; + +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); + printf("(%3zu) compress padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t)state->padding); +#endif + + state->v3 ^= state->padding; + sipround(state); + sipround(state); + state->v0 ^= state->padding; + + state->padding = 0; + } + + end -= (state->inlen % sizeof(uint64_t)); + + for ( ; in < end; in += 8) { + m = unaligned_read_le64(in); +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); + printf("(%3zu) compress %08x %08x\n", state->inlen, (uint32_t) (m >> 32), (uint32_t) m); +#endif + state->v3 ^= m; + sipround(state); + sipround(state); + state->v0 ^= m; + } + + left = state->inlen & 7; + switch (left) { + case 7: + state->padding |= ((uint64_t) in[6]) << 48; + _fallthrough_; + case 6: + state->padding |= ((uint64_t) in[5]) << 40; + _fallthrough_; + case 5: + state->padding |= ((uint64_t) in[4]) << 32; + _fallthrough_; + case 4: + state->padding |= ((uint64_t) in[3]) << 24; + _fallthrough_; + case 3: + state->padding |= ((uint64_t) in[2]) << 16; + _fallthrough_; + case 2: + state->padding |= ((uint64_t) in[1]) << 8; + _fallthrough_; + case 1: + state->padding |= ((uint64_t) in[0]); + _fallthrough_; + case 0: + break; + } +} + +uint64_t siphash24_finalize(struct siphash *state) { + uint64_t b; + + assert(state); + + b = state->padding | (((uint64_t) state->inlen) << 56); + +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); + printf("(%3zu) padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t) state->padding); +#endif + + state->v3 ^= b; + sipround(state); + sipround(state); + state->v0 ^= b; + +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); +#endif + state->v2 ^= 0xff; + + sipround(state); + sipround(state); + sipround(state); + sipround(state); + + return state->v0 ^ state->v1 ^ state->v2 ^ state->v3; +} + +uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]) { + struct siphash state; + + assert(in); + assert(k); + + siphash24_init(&state, k); + siphash24_compress(in, inlen, &state); + + return siphash24_finalize(&state); +} diff --git a/src/basic/siphash24.h b/src/basic/siphash24.h new file mode 100644 index 0000000..67c4f75 --- /dev/null +++ b/src/basic/siphash24.h @@ -0,0 +1,28 @@ +#pragma once + +#include <inttypes.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> + +struct siphash { + uint64_t v0; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t padding; + size_t inlen; +}; + +void siphash24_init(struct siphash *state, const uint8_t k[static 16]); +void siphash24_compress(const void *in, size_t inlen, struct siphash *state); +#define siphash24_compress_byte(byte, state) siphash24_compress((const uint8_t[]) { (byte) }, 1, (state)) + +uint64_t siphash24_finalize(struct siphash *state); + +uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]); + +static inline uint64_t siphash24_string(const char *s, const uint8_t k[static 16]) { + return siphash24(s, strlen(s) + 1, k); +} diff --git a/src/basic/smack-util.c b/src/basic/smack-util.c new file mode 100644 index 0000000..123d00e --- /dev/null +++ b/src/basic/smack-util.c @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/*** + Copyright © 2013 Intel Corporation + + Author: Auke Kok <auke-jan.h.kok@intel.com> +***/ + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "process-util.h" +#include "smack-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "xattr-util.h" + +#if ENABLE_SMACK +bool mac_smack_use(void) { + static int cached_use = -1; + + if (cached_use < 0) + cached_use = access("/sys/fs/smackfs/", F_OK) >= 0; + + return cached_use; +} + +static const char* const smack_attr_table[_SMACK_ATTR_MAX] = { + [SMACK_ATTR_ACCESS] = "security.SMACK64", + [SMACK_ATTR_EXEC] = "security.SMACK64EXEC", + [SMACK_ATTR_MMAP] = "security.SMACK64MMAP", + [SMACK_ATTR_TRANSMUTE] = "security.SMACK64TRANSMUTE", + [SMACK_ATTR_IPIN] = "security.SMACK64IPIN", + [SMACK_ATTR_IPOUT] = "security.SMACK64IPOUT", +}; + +DEFINE_STRING_TABLE_LOOKUP(smack_attr, SmackAttr); + +int mac_smack_read(const char *path, SmackAttr attr, char **label) { + assert(path); + assert(attr >= 0 && attr < _SMACK_ATTR_MAX); + assert(label); + + if (!mac_smack_use()) + return 0; + + return getxattr_malloc(path, smack_attr_to_string(attr), label, true); +} + +int mac_smack_read_fd(int fd, SmackAttr attr, char **label) { + assert(fd >= 0); + assert(attr >= 0 && attr < _SMACK_ATTR_MAX); + assert(label); + + if (!mac_smack_use()) + return 0; + + return fgetxattr_malloc(fd, smack_attr_to_string(attr), label); +} + +int mac_smack_apply(const char *path, SmackAttr attr, const char *label) { + int r; + + assert(path); + assert(attr >= 0 && attr < _SMACK_ATTR_MAX); + + if (!mac_smack_use()) + return 0; + + if (label) + r = lsetxattr(path, smack_attr_to_string(attr), label, strlen(label), 0); + else + r = lremovexattr(path, smack_attr_to_string(attr)); + if (r < 0) + return -errno; + + return 0; +} + +int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) { + int r; + + assert(fd >= 0); + assert(attr >= 0 && attr < _SMACK_ATTR_MAX); + + if (!mac_smack_use()) + return 0; + + if (label) + r = fsetxattr(fd, smack_attr_to_string(attr), label, strlen(label), 0); + else + r = fremovexattr(fd, smack_attr_to_string(attr)); + if (r < 0) + return -errno; + + return 0; +} + +int mac_smack_apply_pid(pid_t pid, const char *label) { + const char *p; + int r = 0; + + assert(label); + + if (!mac_smack_use()) + return 0; + + p = procfs_file_alloca(pid, "attr/current"); + r = write_string_file(p, label, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + return r; +} + +static int smack_fix_fd(int fd , const char *abspath, LabelFixFlags flags) { + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + const char *label; + struct stat st; + int r; + + /* The caller should have done the sanity checks. */ + assert(abspath); + assert(path_is_absolute(abspath)); + + /* Path must be in /dev. */ + if (!path_startswith(abspath, "/dev")) + return 0; + + if (fstat(fd, &st) < 0) + return -errno; + + /* + * Label directories and character devices "*". + * Label symlinks "_". + * Don't change anything else. + */ + + if (S_ISDIR(st.st_mode)) + label = SMACK_STAR_LABEL; + else if (S_ISLNK(st.st_mode)) + label = SMACK_FLOOR_LABEL; + else if (S_ISCHR(st.st_mode)) + label = SMACK_STAR_LABEL; + else + return 0; + + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + if (setxattr(procfs_path, "security.SMACK64", label, strlen(label), 0) < 0) { + _cleanup_free_ char *old_label = NULL; + + r = -errno; + + /* If the FS doesn't support labels, then exit without warning */ + if (r == -EOPNOTSUPP) + return 0; + + /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */ + if (r == -EROFS && (flags & LABEL_IGNORE_EROFS)) + return 0; + + /* If the old label is identical to the new one, suppress any kind of error */ + if (getxattr_malloc(procfs_path, "security.SMACK64", &old_label, false) >= 0 && + streq(old_label, label)) + return 0; + + return log_debug_errno(r, "Unable to fix SMACK label of %s: %m", abspath); + } + + return 0; +} + +int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + int r; + + assert(path); + + if (!mac_smack_use()) + return 0; + + fd = openat(dirfd, path, O_NOFOLLOW|O_CLOEXEC|O_PATH); + if (fd < 0) { + if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT) + return 0; + + return -errno; + } + + if (!path_is_absolute(path)) { + r = fd_get_path(fd, &p); + if (r < 0) + return r; + path = p; + } + + return smack_fix_fd(fd, path, flags); +} + +int mac_smack_fix(const char *path, LabelFixFlags flags) { + _cleanup_free_ char *abspath = NULL; + _cleanup_close_ int fd = -1; + int r; + + assert(path); + + if (!mac_smack_use()) + return 0; + + r = path_make_absolute_cwd(path, &abspath); + if (r < 0) + return r; + + fd = open(abspath, O_NOFOLLOW|O_CLOEXEC|O_PATH); + if (fd < 0) { + if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT) + return 0; + + return -errno; + } + + return smack_fix_fd(fd, abspath, flags); +} + +int mac_smack_copy(const char *dest, const char *src) { + int r = 0; + _cleanup_free_ char *label = NULL; + + assert(dest); + assert(src); + + r = mac_smack_read(src, SMACK_ATTR_ACCESS, &label); + if (r < 0) + return r; + + r = mac_smack_apply(dest, SMACK_ATTR_ACCESS, label); + if (r < 0) + return r; + + return r; +} + +#else +bool mac_smack_use(void) { + return false; +} + +int mac_smack_read(const char *path, SmackAttr attr, char **label) { + return -EOPNOTSUPP; +} + +int mac_smack_read_fd(int fd, SmackAttr attr, char **label) { + return -EOPNOTSUPP; +} + +int mac_smack_apply(const char *path, SmackAttr attr, const char *label) { + return 0; +} + +int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) { + return 0; +} + +int mac_smack_apply_pid(pid_t pid, const char *label) { + return 0; +} + +int mac_smack_fix(const char *path, LabelFixFlags flags) { + return 0; +} + +int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) { + return 0; +} + +int mac_smack_copy(const char *dest, const char *src) { + return 0; +} +#endif diff --git a/src/basic/smack-util.h b/src/basic/smack-util.h new file mode 100644 index 0000000..395ec07 --- /dev/null +++ b/src/basic/smack-util.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/*** + Copyright © 2013 Intel Corporation + + Author: Auke Kok <auke-jan.h.kok@intel.com> +***/ + +#include <stdbool.h> +#include <sys/types.h> + +#include "label.h" +#include "macro.h" + +#define SMACK_FLOOR_LABEL "_" +#define SMACK_STAR_LABEL "*" + +typedef enum SmackAttr { + SMACK_ATTR_ACCESS, + SMACK_ATTR_EXEC, + SMACK_ATTR_MMAP, + SMACK_ATTR_TRANSMUTE, + SMACK_ATTR_IPIN, + SMACK_ATTR_IPOUT, + _SMACK_ATTR_MAX, + _SMACK_ATTR_INVALID = -1, +} SmackAttr; + +bool mac_smack_use(void); + +int mac_smack_fix(const char *path, LabelFixFlags flags); +int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags); + +const char* smack_attr_to_string(SmackAttr i) _const_; +SmackAttr smack_attr_from_string(const char *s) _pure_; +int mac_smack_read(const char *path, SmackAttr attr, char **label); +int mac_smack_read_fd(int fd, SmackAttr attr, char **label); +int mac_smack_apply(const char *path, SmackAttr attr, const char *label); +int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label); +int mac_smack_apply_pid(pid_t pid, const char *label); +int mac_smack_copy(const char *dest, const char *src); diff --git a/src/basic/socket-label.c b/src/basic/socket-label.c new file mode 100644 index 0000000..4ed19cd --- /dev/null +++ b/src/basic/socket-label.c @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <netinet/in.h> +#include <stdbool.h> +#include <stddef.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "mkdir.h" +#include "selinux-util.h" +#include "socket-util.h" +#include "umask-util.h" + +int socket_address_listen( + const SocketAddress *a, + int flags, + int backlog, + SocketAddressBindIPv6Only only, + const char *bind_to_device, + bool reuse_port, + bool free_bind, + bool transparent, + mode_t directory_mode, + mode_t socket_mode, + const char *label) { + + _cleanup_close_ int fd = -1; + const char *p; + int r; + + assert(a); + + r = socket_address_verify(a, true); + if (r < 0) + return r; + + if (socket_address_family(a) == AF_INET6 && !socket_ipv6_is_supported()) + return -EAFNOSUPPORT; + + if (label) { + r = mac_selinux_create_socket_prepare(label); + if (r < 0) + return r; + } + + fd = socket(socket_address_family(a), a->type | flags, a->protocol); + r = fd < 0 ? -errno : 0; + + if (label) + mac_selinux_create_socket_clear(); + + if (r < 0) + return r; + + if (socket_address_family(a) == AF_INET6 && only != SOCKET_ADDRESS_DEFAULT) { + r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_V6ONLY, only == SOCKET_ADDRESS_IPV6_ONLY); + if (r < 0) + return r; + } + + if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) { + if (bind_to_device) + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0) + return -errno; + + if (reuse_port) { + r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEPORT, true); + if (r < 0) + log_warning_errno(r, "SO_REUSEPORT failed: %m"); + } + + if (free_bind) { + r = setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, true); + if (r < 0) + log_warning_errno(r, "IP_FREEBIND failed: %m"); + } + + if (transparent) { + r = setsockopt_int(fd, IPPROTO_IP, IP_TRANSPARENT, true); + if (r < 0) + log_warning_errno(r, "IP_TRANSPARENT failed: %m"); + } + } + + r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true); + if (r < 0) + return r; + + p = socket_address_get_path(a); + if (p) { + /* Create parents */ + (void) mkdir_parents_label(p, directory_mode); + + /* Enforce the right access mode for the socket */ + RUN_WITH_UMASK(~socket_mode) { + r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size); + if (r == -EADDRINUSE) { + /* Unlink and try again */ + + if (unlink(p) < 0) + return r; /* didn't work, return original error */ + + r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size); + } + if (r < 0) + return r; + } + } else { + if (bind(fd, &a->sockaddr.sa, a->size) < 0) + return -errno; + } + + if (socket_address_can_accept(a)) + if (listen(fd, backlog) < 0) + return -errno; + + /* Let's trigger an inotify event on the socket node, so that anyone waiting for this socket to be connectable + * gets notified */ + if (p) + (void) touch(p); + + r = fd; + fd = -1; + + return r; +} + +int make_socket_fd(int log_level, const char* address, int type, int flags) { + SocketAddress a; + int fd, r; + + r = socket_address_parse(&a, address); + if (r < 0) + return log_error_errno(r, "Failed to parse socket address \"%s\": %m", address); + + a.type = type; + + fd = socket_address_listen(&a, type | flags, SOMAXCONN, SOCKET_ADDRESS_DEFAULT, + NULL, false, false, false, 0755, 0644, NULL); + if (fd < 0 || log_get_max_level() >= log_level) { + _cleanup_free_ char *p = NULL; + + r = socket_address_print(&a, &p); + if (r < 0) + return log_error_errno(r, "socket_address_print(): %m"); + + if (fd < 0) + log_error_errno(fd, "Failed to listen on %s: %m", p); + else + log_full(log_level, "Listening on %s", p); + } + + return fd; +} diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c new file mode 100644 index 0000000..91bf801 --- /dev/null +++ b/src/basic/socket-util.c @@ -0,0 +1,1347 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <arpa/inet.h> +#include <errno.h> +#include <limits.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <poll.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "utf8.h" +#include "util.h" + +#if ENABLE_IDN +# define IDN_FLAGS NI_IDN +#else +# define IDN_FLAGS 0 +#endif + +static const char* const socket_address_type_table[] = { + [SOCK_STREAM] = "Stream", + [SOCK_DGRAM] = "Datagram", + [SOCK_RAW] = "Raw", + [SOCK_RDM] = "ReliableDatagram", + [SOCK_SEQPACKET] = "SequentialPacket", + [SOCK_DCCP] = "DatagramCongestionControl", +}; + +DEFINE_STRING_TABLE_LOOKUP(socket_address_type, int); + +int socket_address_parse(SocketAddress *a, const char *s) { + _cleanup_free_ char *n = NULL; + char *e; + int r; + + assert(a); + assert(s); + + *a = (SocketAddress) { + .type = SOCK_STREAM, + }; + + if (*s == '[') { + uint16_t port; + + /* IPv6 in [x:.....:z]:p notation */ + + e = strchr(s+1, ']'); + if (!e) + return -EINVAL; + + n = strndup(s+1, e-s-1); + if (!n) + return -ENOMEM; + + errno = 0; + if (inet_pton(AF_INET6, n, &a->sockaddr.in6.sin6_addr) <= 0) + return errno > 0 ? -errno : -EINVAL; + + e++; + if (*e != ':') + return -EINVAL; + + e++; + r = parse_ip_port(e, &port); + if (r < 0) + return r; + + a->sockaddr.in6.sin6_family = AF_INET6; + a->sockaddr.in6.sin6_port = htobe16(port); + a->size = sizeof(struct sockaddr_in6); + + } else if (*s == '/') { + /* AF_UNIX socket */ + + size_t l; + + l = strlen(s); + if (l >= sizeof(a->sockaddr.un.sun_path)) /* Note that we refuse non-NUL-terminated sockets when + * parsing (the kernel itself is less strict here in what it + * accepts) */ + return -EINVAL; + + a->sockaddr.un.sun_family = AF_UNIX; + memcpy(a->sockaddr.un.sun_path, s, l); + a->size = offsetof(struct sockaddr_un, sun_path) + l + 1; + + } else if (*s == '@') { + /* Abstract AF_UNIX socket */ + size_t l; + + l = strlen(s+1); + if (l >= sizeof(a->sockaddr.un.sun_path) - 1) /* Note that we refuse non-NUL-terminated sockets here + * when parsing, even though abstract namespace sockets + * explicitly allow embedded NUL bytes and don't consider + * them special. But it's simply annoying to debug such + * sockets. */ + return -EINVAL; + + a->sockaddr.un.sun_family = AF_UNIX; + memcpy(a->sockaddr.un.sun_path+1, s+1, l); + a->size = offsetof(struct sockaddr_un, sun_path) + 1 + l; + + } else if (startswith(s, "vsock:")) { + /* AF_VSOCK socket in vsock:cid:port notation */ + const char *cid_start = s + STRLEN("vsock:"); + unsigned port; + + e = strchr(cid_start, ':'); + if (!e) + return -EINVAL; + + r = safe_atou(e+1, &port); + if (r < 0) + return r; + + n = strndup(cid_start, e - cid_start); + if (!n) + return -ENOMEM; + + if (!isempty(n)) { + r = safe_atou(n, &a->sockaddr.vm.svm_cid); + if (r < 0) + return r; + } else + a->sockaddr.vm.svm_cid = VMADDR_CID_ANY; + + a->sockaddr.vm.svm_family = AF_VSOCK; + a->sockaddr.vm.svm_port = port; + a->size = sizeof(struct sockaddr_vm); + + } else { + uint16_t port; + + e = strchr(s, ':'); + if (e) { + r = parse_ip_port(e + 1, &port); + if (r < 0) + return r; + + n = strndup(s, e-s); + if (!n) + return -ENOMEM; + + /* IPv4 in w.x.y.z:p notation? */ + r = inet_pton(AF_INET, n, &a->sockaddr.in.sin_addr); + if (r < 0) + return -errno; + + if (r > 0) { + /* Gotcha, it's a traditional IPv4 address */ + a->sockaddr.in.sin_family = AF_INET; + a->sockaddr.in.sin_port = htobe16(port); + a->size = sizeof(struct sockaddr_in); + } else { + unsigned idx; + + if (strlen(n) > IF_NAMESIZE-1) + return -EINVAL; + + /* Uh, our last resort, an interface name */ + idx = if_nametoindex(n); + if (idx == 0) + return -EINVAL; + + a->sockaddr.in6.sin6_family = AF_INET6; + a->sockaddr.in6.sin6_port = htobe16(port); + a->sockaddr.in6.sin6_scope_id = idx; + a->sockaddr.in6.sin6_addr = in6addr_any; + a->size = sizeof(struct sockaddr_in6); + } + } else { + + /* Just a port */ + r = parse_ip_port(s, &port); + if (r < 0) + return r; + + if (socket_ipv6_is_supported()) { + a->sockaddr.in6.sin6_family = AF_INET6; + a->sockaddr.in6.sin6_port = htobe16(port); + a->sockaddr.in6.sin6_addr = in6addr_any; + a->size = sizeof(struct sockaddr_in6); + } else { + a->sockaddr.in.sin_family = AF_INET; + a->sockaddr.in.sin_port = htobe16(port); + a->sockaddr.in.sin_addr.s_addr = INADDR_ANY; + a->size = sizeof(struct sockaddr_in); + } + } + } + + return 0; +} + +int socket_address_parse_and_warn(SocketAddress *a, const char *s) { + SocketAddress b; + int r; + + /* Similar to socket_address_parse() but warns for IPv6 sockets when we don't support them. */ + + r = socket_address_parse(&b, s); + if (r < 0) + return r; + + if (!socket_ipv6_is_supported() && b.sockaddr.sa.sa_family == AF_INET6) { + log_warning("Binding to IPv6 address not available since kernel does not support IPv6."); + return -EAFNOSUPPORT; + } + + *a = b; + return 0; +} + +int socket_address_parse_netlink(SocketAddress *a, const char *s) { + int family; + unsigned group = 0; + _cleanup_free_ char *sfamily = NULL; + assert(a); + assert(s); + + zero(*a); + a->type = SOCK_RAW; + + errno = 0; + if (sscanf(s, "%ms %u", &sfamily, &group) < 1) + return errno > 0 ? -errno : -EINVAL; + + family = netlink_family_from_string(sfamily); + if (family < 0) + return -EINVAL; + + a->sockaddr.nl.nl_family = AF_NETLINK; + a->sockaddr.nl.nl_groups = group; + + a->type = SOCK_RAW; + a->size = sizeof(struct sockaddr_nl); + a->protocol = family; + + return 0; +} + +int socket_address_verify(const SocketAddress *a, bool strict) { + assert(a); + + /* With 'strict' we enforce additional sanity constraints which are not set by the standard, + * but should only apply to sockets we create ourselves. */ + + switch (socket_address_family(a)) { + + case AF_INET: + if (a->size != sizeof(struct sockaddr_in)) + return -EINVAL; + + if (a->sockaddr.in.sin_port == 0) + return -EINVAL; + + if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + case AF_INET6: + if (a->size != sizeof(struct sockaddr_in6)) + return -EINVAL; + + if (a->sockaddr.in6.sin6_port == 0) + return -EINVAL; + + if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + case AF_UNIX: + if (a->size < offsetof(struct sockaddr_un, sun_path)) + return -EINVAL; + if (a->size > sizeof(struct sockaddr_un) + !strict) + /* If !strict, allow one extra byte, since getsockname() on Linux will append + * a NUL byte if we have path sockets that are above sun_path's full size. */ + return -EINVAL; + + if (a->size > offsetof(struct sockaddr_un, sun_path) && + a->sockaddr.un.sun_path[0] != 0 && + strict) { + /* Only validate file system sockets here, and only in strict mode */ + const char *e; + + e = memchr(a->sockaddr.un.sun_path, 0, sizeof(a->sockaddr.un.sun_path)); + if (e) { + /* If there's an embedded NUL byte, make sure the size of the socket address matches it */ + if (a->size != offsetof(struct sockaddr_un, sun_path) + (e - a->sockaddr.un.sun_path) + 1) + return -EINVAL; + } else { + /* If there's no embedded NUL byte, then then the size needs to match the whole + * structure or the structure with one extra NUL byte suffixed. (Yeah, Linux is awful, + * and considers both equivalent: getsockname() even extends sockaddr_un beyond its + * size if the path is non NUL terminated.)*/ + if (!IN_SET(a->size, sizeof(a->sockaddr.un.sun_path), sizeof(a->sockaddr.un.sun_path)+1)) + return -EINVAL; + } + } + + if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET)) + return -EINVAL; + + return 0; + + case AF_NETLINK: + + if (a->size != sizeof(struct sockaddr_nl)) + return -EINVAL; + + if (!IN_SET(a->type, SOCK_RAW, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + case AF_VSOCK: + if (a->size != sizeof(struct sockaddr_vm)) + return -EINVAL; + + if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + default: + return -EAFNOSUPPORT; + } +} + +int socket_address_print(const SocketAddress *a, char **ret) { + int r; + + assert(a); + assert(ret); + + r = socket_address_verify(a, false); /* We do non-strict validation, because we want to be + * able to pretty-print any socket the kernel considers + * valid. We still need to do validation to know if we + * can meaningfully print the address. */ + if (r < 0) + return r; + + if (socket_address_family(a) == AF_NETLINK) { + _cleanup_free_ char *sfamily = NULL; + + r = netlink_family_to_string_alloc(a->protocol, &sfamily); + if (r < 0) + return r; + + r = asprintf(ret, "%s %u", sfamily, a->sockaddr.nl.nl_groups); + if (r < 0) + return -ENOMEM; + + return 0; + } + + return sockaddr_pretty(&a->sockaddr.sa, a->size, false, true, ret); +} + +bool socket_address_can_accept(const SocketAddress *a) { + assert(a); + + return + IN_SET(a->type, SOCK_STREAM, SOCK_SEQPACKET); +} + +bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) { + assert(a); + assert(b); + + /* Invalid addresses are unequal to all */ + if (socket_address_verify(a, false) < 0 || + socket_address_verify(b, false) < 0) + return false; + + if (a->type != b->type) + return false; + + if (socket_address_family(a) != socket_address_family(b)) + return false; + + switch (socket_address_family(a)) { + + case AF_INET: + if (a->sockaddr.in.sin_addr.s_addr != b->sockaddr.in.sin_addr.s_addr) + return false; + + if (a->sockaddr.in.sin_port != b->sockaddr.in.sin_port) + return false; + + break; + + case AF_INET6: + if (memcmp(&a->sockaddr.in6.sin6_addr, &b->sockaddr.in6.sin6_addr, sizeof(a->sockaddr.in6.sin6_addr)) != 0) + return false; + + if (a->sockaddr.in6.sin6_port != b->sockaddr.in6.sin6_port) + return false; + + break; + + case AF_UNIX: + if (a->size <= offsetof(struct sockaddr_un, sun_path) || + b->size <= offsetof(struct sockaddr_un, sun_path)) + return false; + + if ((a->sockaddr.un.sun_path[0] == 0) != (b->sockaddr.un.sun_path[0] == 0)) + return false; + + if (a->sockaddr.un.sun_path[0]) { + if (!path_equal_or_files_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0)) + return false; + } else { + if (a->size != b->size) + return false; + + if (memcmp(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, a->size) != 0) + return false; + } + + break; + + case AF_NETLINK: + if (a->protocol != b->protocol) + return false; + + if (a->sockaddr.nl.nl_groups != b->sockaddr.nl.nl_groups) + return false; + + break; + + case AF_VSOCK: + if (a->sockaddr.vm.svm_cid != b->sockaddr.vm.svm_cid) + return false; + + if (a->sockaddr.vm.svm_port != b->sockaddr.vm.svm_port) + return false; + + break; + + default: + /* Cannot compare, so we assume the addresses are different */ + return false; + } + + return true; +} + +bool socket_address_is(const SocketAddress *a, const char *s, int type) { + struct SocketAddress b; + + assert(a); + assert(s); + + if (socket_address_parse(&b, s) < 0) + return false; + + b.type = type; + + return socket_address_equal(a, &b); +} + +bool socket_address_is_netlink(const SocketAddress *a, const char *s) { + struct SocketAddress b; + + assert(a); + assert(s); + + if (socket_address_parse_netlink(&b, s) < 0) + return false; + + return socket_address_equal(a, &b); +} + +const char* socket_address_get_path(const SocketAddress *a) { + assert(a); + + if (socket_address_family(a) != AF_UNIX) + return NULL; + + if (a->sockaddr.un.sun_path[0] == 0) + return NULL; + + /* Note that this is only safe because we know that there's an extra NUL byte after the sockaddr_un + * structure. On Linux AF_UNIX file system socket addresses don't have to be NUL terminated if they take up the + * full sun_path space. */ + assert_cc(sizeof(union sockaddr_union) >= sizeof(struct sockaddr_un)+1); + return a->sockaddr.un.sun_path; +} + +bool socket_ipv6_is_supported(void) { + if (access("/proc/net/if_inet6", F_OK) != 0) + return false; + + return true; +} + +bool socket_address_matches_fd(const SocketAddress *a, int fd) { + SocketAddress b; + socklen_t solen; + + assert(a); + assert(fd >= 0); + + b.size = sizeof(b.sockaddr); + if (getsockname(fd, &b.sockaddr.sa, &b.size) < 0) + return false; + + if (b.sockaddr.sa.sa_family != a->sockaddr.sa.sa_family) + return false; + + solen = sizeof(b.type); + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &b.type, &solen) < 0) + return false; + + if (b.type != a->type) + return false; + + if (a->protocol != 0) { + solen = sizeof(b.protocol); + if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &b.protocol, &solen) < 0) + return false; + + if (b.protocol != a->protocol) + return false; + } + + return socket_address_equal(a, &b); +} + +int sockaddr_port(const struct sockaddr *_sa, unsigned *ret_port) { + union sockaddr_union *sa = (union sockaddr_union*) _sa; + + /* Note, this returns the port as 'unsigned' rather than 'uint16_t', as AF_VSOCK knows larger ports */ + + assert(sa); + + switch (sa->sa.sa_family) { + + case AF_INET: + *ret_port = be16toh(sa->in.sin_port); + return 0; + + case AF_INET6: + *ret_port = be16toh(sa->in6.sin6_port); + return 0; + + case AF_VSOCK: + *ret_port = sa->vm.svm_port; + return 0; + + default: + return -EAFNOSUPPORT; + } +} + +int sockaddr_pretty( + const struct sockaddr *_sa, + socklen_t salen, + bool translate_ipv6, + bool include_port, + char **ret) { + + union sockaddr_union *sa = (union sockaddr_union*) _sa; + char *p; + int r; + + assert(sa); + assert(salen >= sizeof(sa->sa.sa_family)); + + switch (sa->sa.sa_family) { + + case AF_INET: { + uint32_t a; + + a = be32toh(sa->in.sin_addr.s_addr); + + if (include_port) + r = asprintf(&p, + "%u.%u.%u.%u:%u", + a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF, + be16toh(sa->in.sin_port)); + else + r = asprintf(&p, + "%u.%u.%u.%u", + a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF); + if (r < 0) + return -ENOMEM; + break; + } + + case AF_INET6: { + static const unsigned char ipv4_prefix[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF + }; + + if (translate_ipv6 && + memcmp(&sa->in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) { + const uint8_t *a = sa->in6.sin6_addr.s6_addr+12; + if (include_port) + r = asprintf(&p, + "%u.%u.%u.%u:%u", + a[0], a[1], a[2], a[3], + be16toh(sa->in6.sin6_port)); + else + r = asprintf(&p, + "%u.%u.%u.%u", + a[0], a[1], a[2], a[3]); + if (r < 0) + return -ENOMEM; + } else { + char a[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, &sa->in6.sin6_addr, a, sizeof(a)); + + if (include_port) { + r = asprintf(&p, + "[%s]:%u", + a, + be16toh(sa->in6.sin6_port)); + if (r < 0) + return -ENOMEM; + } else { + p = strdup(a); + if (!p) + return -ENOMEM; + } + } + + break; + } + + case AF_UNIX: + if (salen <= offsetof(struct sockaddr_un, sun_path) || + (sa->un.sun_path[0] == 0 && salen == offsetof(struct sockaddr_un, sun_path) + 1)) + /* The name must have at least one character (and the leading NUL does not count) */ + p = strdup("<unnamed>"); + else { + /* Note that we calculate the path pointer here through the .un_buffer[] field, in order to + * outtrick bounds checking tools such as ubsan, which are too smart for their own good: on + * Linux the kernel may return sun_path[] data one byte longer than the declared size of the + * field. */ + char *path = (char*) sa->un_buffer + offsetof(struct sockaddr_un, sun_path); + size_t path_len = salen - offsetof(struct sockaddr_un, sun_path); + + if (path[0] == 0) { + /* Abstract socket. When parsing address information from, we + * explicitly reject overly long paths and paths with embedded NULs. + * But we might get such a socket from the outside. Let's return + * something meaningful and printable in this case. */ + + _cleanup_free_ char *e = NULL; + + e = cescape_length(path + 1, path_len - 1); + if (!e) + return -ENOMEM; + + p = strjoin("@", e); + } else { + if (path[path_len - 1] == '\0') + /* We expect a terminating NUL and don't print it */ + path_len --; + + p = cescape_length(path, path_len); + } + } + if (!p) + return -ENOMEM; + + break; + + case AF_VSOCK: + if (include_port) { + if (sa->vm.svm_cid == VMADDR_CID_ANY) + r = asprintf(&p, "vsock::%u", sa->vm.svm_port); + else + r = asprintf(&p, "vsock:%u:%u", sa->vm.svm_cid, sa->vm.svm_port); + } else + r = asprintf(&p, "vsock:%u", sa->vm.svm_cid); + if (r < 0) + return -ENOMEM; + break; + + default: + return -EOPNOTSUPP; + } + + *ret = p; + return 0; +} + +int getpeername_pretty(int fd, bool include_port, char **ret) { + union sockaddr_union sa; + socklen_t salen = sizeof(sa); + int r; + + assert(fd >= 0); + assert(ret); + + if (getpeername(fd, &sa.sa, &salen) < 0) + return -errno; + + if (sa.sa.sa_family == AF_UNIX) { + struct ucred ucred = {}; + + /* UNIX connection sockets are anonymous, so let's use + * PID/UID as pretty credentials instead */ + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + if (asprintf(ret, "PID "PID_FMT"/UID "UID_FMT, ucred.pid, ucred.uid) < 0) + return -ENOMEM; + + return 0; + } + + /* For remote sockets we translate IPv6 addresses back to IPv4 + * if applicable, since that's nicer. */ + + return sockaddr_pretty(&sa.sa, salen, true, include_port, ret); +} + +int getsockname_pretty(int fd, char **ret) { + union sockaddr_union sa; + socklen_t salen = sizeof(sa); + + assert(fd >= 0); + assert(ret); + + if (getsockname(fd, &sa.sa, &salen) < 0) + return -errno; + + /* For local sockets we do not translate IPv6 addresses back + * to IPv6 if applicable, since this is usually used for + * listening sockets where the difference between IPv4 and + * IPv6 matters. */ + + return sockaddr_pretty(&sa.sa, salen, false, true, ret); +} + +int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret) { + int r; + char host[NI_MAXHOST], *ret; + + assert(_ret); + + r = getnameinfo(&sa->sa, salen, host, sizeof(host), NULL, 0, IDN_FLAGS); + if (r != 0) { + int saved_errno = errno; + + r = sockaddr_pretty(&sa->sa, salen, true, true, &ret); + if (r < 0) + return r; + + log_debug_errno(saved_errno, "getnameinfo(%s) failed: %m", ret); + } else { + ret = strdup(host); + if (!ret) + return -ENOMEM; + } + + *_ret = ret; + return 0; +} + +static const char* const netlink_family_table[] = { + [NETLINK_ROUTE] = "route", + [NETLINK_FIREWALL] = "firewall", + [NETLINK_INET_DIAG] = "inet-diag", + [NETLINK_NFLOG] = "nflog", + [NETLINK_XFRM] = "xfrm", + [NETLINK_SELINUX] = "selinux", + [NETLINK_ISCSI] = "iscsi", + [NETLINK_AUDIT] = "audit", + [NETLINK_FIB_LOOKUP] = "fib-lookup", + [NETLINK_CONNECTOR] = "connector", + [NETLINK_NETFILTER] = "netfilter", + [NETLINK_IP6_FW] = "ip6-fw", + [NETLINK_DNRTMSG] = "dnrtmsg", + [NETLINK_KOBJECT_UEVENT] = "kobject-uevent", + [NETLINK_GENERIC] = "generic", + [NETLINK_SCSITRANSPORT] = "scsitransport", + [NETLINK_ECRYPTFS] = "ecryptfs", + [NETLINK_RDMA] = "rdma", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(netlink_family, int, INT_MAX); + +static const char* const socket_address_bind_ipv6_only_table[_SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX] = { + [SOCKET_ADDRESS_DEFAULT] = "default", + [SOCKET_ADDRESS_BOTH] = "both", + [SOCKET_ADDRESS_IPV6_ONLY] = "ipv6-only" +}; + +DEFINE_STRING_TABLE_LOOKUP(socket_address_bind_ipv6_only, SocketAddressBindIPv6Only); + +SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *n) { + int r; + + r = parse_boolean(n); + if (r > 0) + return SOCKET_ADDRESS_IPV6_ONLY; + if (r == 0) + return SOCKET_ADDRESS_BOTH; + + return socket_address_bind_ipv6_only_from_string(n); +} + +bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b) { + assert(a); + assert(b); + + if (a->sa.sa_family != b->sa.sa_family) + return false; + + if (a->sa.sa_family == AF_INET) + return a->in.sin_addr.s_addr == b->in.sin_addr.s_addr; + + if (a->sa.sa_family == AF_INET6) + return memcmp(&a->in6.sin6_addr, &b->in6.sin6_addr, sizeof(a->in6.sin6_addr)) == 0; + + if (a->sa.sa_family == AF_VSOCK) + return a->vm.svm_cid == b->vm.svm_cid; + + return false; +} + +int fd_inc_sndbuf(int fd, size_t n) { + int r, value; + socklen_t l = sizeof(value); + + r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l); + if (r >= 0 && l == sizeof(value) && (size_t) value >= n*2) + return 0; + + /* If we have the privileges we will ignore the kernel limit. */ + + if (setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, n) < 0) { + r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, n); + if (r < 0) + return r; + } + + return 1; +} + +int fd_inc_rcvbuf(int fd, size_t n) { + int r, value; + socklen_t l = sizeof(value); + + r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l); + if (r >= 0 && l == sizeof(value) && (size_t) value >= n*2) + return 0; + + /* If we have the privileges we will ignore the kernel limit. */ + + if (setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, n) < 0) { + r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, n); + if (r < 0) + return r; + } + + return 1; +} + +static const char* const ip_tos_table[] = { + [IPTOS_LOWDELAY] = "low-delay", + [IPTOS_THROUGHPUT] = "throughput", + [IPTOS_RELIABILITY] = "reliability", + [IPTOS_LOWCOST] = "low-cost", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ip_tos, int, 0xff); + +bool ifname_valid(const char *p) { + bool numeric = true; + + /* Checks whether a network interface name is valid. This is inspired by dev_valid_name() in the kernel sources + * but slightly stricter, as we only allow non-control, non-space ASCII characters in the interface name. We + * also don't permit names that only container numbers, to avoid confusion with numeric interface indexes. */ + + if (isempty(p)) + return false; + + if (strlen(p) >= IFNAMSIZ) + return false; + + if (dot_or_dot_dot(p)) + return false; + + while (*p) { + if ((unsigned char) *p >= 127U) + return false; + + if ((unsigned char) *p <= 32U) + return false; + + if (IN_SET(*p, ':', '/')) + return false; + + numeric = numeric && (*p >= '0' && *p <= '9'); + p++; + } + + if (numeric) + return false; + + return true; +} + +bool address_label_valid(const char *p) { + + if (isempty(p)) + return false; + + if (strlen(p) >= IFNAMSIZ) + return false; + + while (*p) { + if ((uint8_t) *p >= 127U) + return false; + + if ((uint8_t) *p <= 31U) + return false; + p++; + } + + return true; +} + +int getpeercred(int fd, struct ucred *ucred) { + socklen_t n = sizeof(struct ucred); + struct ucred u; + int r; + + assert(fd >= 0); + assert(ucred); + + r = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &u, &n); + if (r < 0) + return -errno; + + if (n != sizeof(struct ucred)) + return -EIO; + + /* Check if the data is actually useful and not suppressed due to namespacing issues */ + if (!pid_is_valid(u.pid)) + return -ENODATA; + + /* Note that we don't check UID/GID here, as namespace translation works differently there: instead of + * receiving in "invalid" user/group we get the overflow UID/GID. */ + + *ucred = u; + return 0; +} + +int getpeersec(int fd, char **ret) { + _cleanup_free_ char *s = NULL; + socklen_t n = 64; + + assert(fd >= 0); + assert(ret); + + for (;;) { + s = new0(char, n+1); + if (!s) + return -ENOMEM; + + if (getsockopt(fd, SOL_SOCKET, SO_PEERSEC, s, &n) >= 0) + break; + + if (errno != ERANGE) + return -errno; + + s = mfree(s); + } + + if (isempty(s)) + return -EOPNOTSUPP; + + *ret = TAKE_PTR(s); + + return 0; +} + +int getpeergroups(int fd, gid_t **ret) { + socklen_t n = sizeof(gid_t) * 64; + _cleanup_free_ gid_t *d = NULL; + + assert(fd >= 0); + assert(ret); + + for (;;) { + d = malloc(n); + if (!d) + return -ENOMEM; + + if (getsockopt(fd, SOL_SOCKET, SO_PEERGROUPS, d, &n) >= 0) + break; + + if (errno != ERANGE) + return -errno; + + d = mfree(d); + } + + assert_se(n % sizeof(gid_t) == 0); + n /= sizeof(gid_t); + + if ((socklen_t) (int) n != n) + return -E2BIG; + + *ret = TAKE_PTR(d); + + return (int) n; +} + +ssize_t send_one_fd_iov_sa( + int transport_fd, + int fd, + struct iovec *iov, size_t iovlen, + const struct sockaddr *sa, socklen_t len, + int flags) { + + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_name = (struct sockaddr*) sa, + .msg_namelen = len, + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + ssize_t k; + + assert(transport_fd >= 0); + + /* + * We need either an FD or data to send. + * If there's nothing, return an error. + */ + if (fd < 0 && !iov) + return -EINVAL; + + if (fd >= 0) { + struct cmsghdr *cmsg; + + mh.msg_control = &control; + mh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); + + mh.msg_controllen = CMSG_SPACE(sizeof(int)); + } + k = sendmsg(transport_fd, &mh, MSG_NOSIGNAL | flags); + if (k < 0) + return (ssize_t) -errno; + + return k; +} + +int send_one_fd_sa( + int transport_fd, + int fd, + const struct sockaddr *sa, socklen_t len, + int flags) { + + assert(fd >= 0); + + return (int) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, sa, len, flags); +} + +ssize_t receive_one_fd_iov( + int transport_fd, + struct iovec *iov, size_t iovlen, + int flags, + int *ret_fd) { + + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + struct cmsghdr *cmsg, *found = NULL; + ssize_t k; + + assert(transport_fd >= 0); + assert(ret_fd); + + /* + * Receive a single FD via @transport_fd. We don't care for + * the transport-type. We retrieve a single FD at most, so for + * packet-based transports, the caller must ensure to send + * only a single FD per packet. This is best used in + * combination with send_one_fd(). + */ + + k = recvmsg(transport_fd, &mh, MSG_CMSG_CLOEXEC | flags); + if (k < 0) + return (ssize_t) -errno; + + CMSG_FOREACH(cmsg, &mh) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS && + cmsg->cmsg_len == CMSG_LEN(sizeof(int))) { + assert(!found); + found = cmsg; + break; + } + } + + if (!found) + cmsg_close_all(&mh); + + /* If didn't receive an FD or any data, return an error. */ + if (k == 0 && !found) + return -EIO; + + if (found) + *ret_fd = *(int*) CMSG_DATA(found); + else + *ret_fd = -1; + + return k; +} + +int receive_one_fd(int transport_fd, int flags) { + int fd; + ssize_t k; + + k = receive_one_fd_iov(transport_fd, NULL, 0, flags, &fd); + if (k == 0) + return fd; + + /* k must be negative, since receive_one_fd_iov() only returns + * a positive value if data was received through the iov. */ + assert(k < 0); + return (int) k; +} + +ssize_t next_datagram_size_fd(int fd) { + ssize_t l; + int k; + + /* This is a bit like FIONREAD/SIOCINQ, however a bit more powerful. The difference being: recv(MSG_PEEK) will + * actually cause the next datagram in the queue to be validated regarding checksums, which FIONREAD doesn't + * do. This difference is actually of major importance as we need to be sure that the size returned here + * actually matches what we will read with recvmsg() next, as otherwise we might end up allocating a buffer of + * the wrong size. */ + + l = recv(fd, NULL, 0, MSG_PEEK|MSG_TRUNC); + if (l < 0) { + if (IN_SET(errno, EOPNOTSUPP, EFAULT)) + goto fallback; + + return -errno; + } + if (l == 0) + goto fallback; + + return l; + +fallback: + k = 0; + + /* Some sockets (AF_PACKET) do not support null-sized recv() with MSG_TRUNC set, let's fall back to FIONREAD + * for them. Checksums don't matter for raw sockets anyway, hence this should be fine. */ + + if (ioctl(fd, FIONREAD, &k) < 0) + return -errno; + + return (ssize_t) k; +} + +int flush_accept(int fd) { + + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN, + }; + int r; + + /* Similar to flush_fd() but flushes all incoming connection by accepting them and immediately closing them. */ + + for (;;) { + int cfd; + + r = poll(&pollfd, 1, 0); + if (r < 0) { + if (errno == EINTR) + continue; + + return -errno; + + } else if (r == 0) + return 0; + + cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + if (cfd < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN) + return 0; + + return -errno; + } + + close(cfd); + } +} + +struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length) { + struct cmsghdr *cmsg; + + assert(mh); + + CMSG_FOREACH(cmsg, mh) + if (cmsg->cmsg_level == level && + cmsg->cmsg_type == type && + (length == (socklen_t) -1 || length == cmsg->cmsg_len)) + return cmsg; + + return NULL; +} + +int socket_ioctl_fd(void) { + int fd; + + /* Create a socket to invoke the various network interface ioctl()s on. Traditionally only AF_INET was good for + * that. Since kernel 4.6 AF_NETLINK works for this too. We first try to use AF_INET hence, but if that's not + * available (for example, because it is made unavailable via SECCOMP or such), we'll fall back to the more + * generic AF_NETLINK. */ + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (fd < 0) + fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC); + if (fd < 0) + return -errno; + + return fd; +} + +int sockaddr_un_unlink(const struct sockaddr_un *sa) { + const char *p, * nul; + + assert(sa); + + if (sa->sun_family != AF_UNIX) + return -EPROTOTYPE; + + if (sa->sun_path[0] == 0) /* Nothing to do for abstract sockets */ + return 0; + + /* The path in .sun_path is not necessarily NUL terminated. Let's fix that. */ + nul = memchr(sa->sun_path, 0, sizeof(sa->sun_path)); + if (nul) + p = sa->sun_path; + else + p = memdupa_suffix0(sa->sun_path, sizeof(sa->sun_path)); + + if (unlink(p) < 0) + return -errno; + + return 1; +} + +int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path) { + size_t l; + + assert(ret); + assert(path); + + /* Initialize ret->sun_path from the specified argument. This will interpret paths starting with '@' as + * abstract namespace sockets, and those starting with '/' as regular filesystem sockets. It won't accept + * anything else (i.e. no relative paths), to avoid ambiguities. Note that this function cannot be used to + * reference paths in the abstract namespace that include NUL bytes in the name. */ + + l = strlen(path); + if (l == 0) + return -EINVAL; + if (!IN_SET(path[0], '/', '@')) + return -EINVAL; + if (path[1] == 0) + return -EINVAL; + + /* Don't allow paths larger than the space in sockaddr_un. Note that we are a tiny bit more restrictive than + * the kernel is: we insist on NUL termination (both for abstract namespace and regular file system socket + * addresses!), which the kernel doesn't. We do this to reduce chance of incompatibility with other apps that + * do not expect non-NUL terminated file system path*/ + if (l+1 > sizeof(ret->sun_path)) + return -EINVAL; + + *ret = (struct sockaddr_un) { + .sun_family = AF_UNIX, + }; + + if (path[0] == '@') { + /* Abstract namespace socket */ + memcpy(ret->sun_path + 1, path + 1, l); /* copy *with* trailing NUL byte */ + return (int) (offsetof(struct sockaddr_un, sun_path) + l); /* 🔥 *don't* 🔥 include trailing NUL in size */ + + } else { + assert(path[0] == '/'); + + /* File system socket */ + memcpy(ret->sun_path, path, l + 1); /* copy *with* trailing NUL byte */ + return (int) (offsetof(struct sockaddr_un, sun_path) + l + 1); /* include trailing NUL in size */ + } +} diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h new file mode 100644 index 0000000..574d2b7 --- /dev/null +++ b/src/basic/socket-util.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> +#include <linux/netlink.h> +#include <linux/if_infiniband.h> +#include <linux/if_packet.h> +#include <netinet/ether.h> +#include <netinet/in.h> +#include <stdbool.h> +#include <stddef.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "macro.h" +#include "missing_socket.h" +#include "sparse-endian.h" + +union sockaddr_union { + /* The minimal, abstract version */ + struct sockaddr sa; + + /* The libc provided version that allocates "enough room" for every protocol */ + struct sockaddr_storage storage; + + /* Protoctol-specific implementations */ + struct sockaddr_in in; + struct sockaddr_in6 in6; + struct sockaddr_un un; + struct sockaddr_nl nl; + struct sockaddr_ll ll; + struct sockaddr_vm vm; + + /* Ensure there is enough space to store Infiniband addresses */ + uint8_t ll_buffer[offsetof(struct sockaddr_ll, sll_addr) + CONST_MAX(ETH_ALEN, INFINIBAND_ALEN)]; + + /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path + * component is always followed by at least one NUL byte. */ + uint8_t un_buffer[sizeof(struct sockaddr_un) + 1]; +}; + +typedef struct SocketAddress { + union sockaddr_union sockaddr; + + /* We store the size here explicitly due to the weird + * sockaddr_un semantics for abstract sockets */ + socklen_t size; + + /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */ + int type; + + /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */ + int protocol; +} SocketAddress; + +typedef enum SocketAddressBindIPv6Only { + SOCKET_ADDRESS_DEFAULT, + SOCKET_ADDRESS_BOTH, + SOCKET_ADDRESS_IPV6_ONLY, + _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX, + _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID = -1 +} SocketAddressBindIPv6Only; + +#define socket_address_family(a) ((a)->sockaddr.sa.sa_family) + +const char* socket_address_type_to_string(int t) _const_; +int socket_address_type_from_string(const char *s) _pure_; + +int socket_address_parse(SocketAddress *a, const char *s); +int socket_address_parse_and_warn(SocketAddress *a, const char *s); +int socket_address_parse_netlink(SocketAddress *a, const char *s); +int socket_address_print(const SocketAddress *a, char **p); +int socket_address_verify(const SocketAddress *a, bool strict) _pure_; + +int sockaddr_un_unlink(const struct sockaddr_un *sa); + +static inline int socket_address_unlink(const SocketAddress *a) { + return socket_address_family(a) == AF_UNIX ? sockaddr_un_unlink(&a->sockaddr.un) : 0; +} + +bool socket_address_can_accept(const SocketAddress *a) _pure_; + +int socket_address_listen( + const SocketAddress *a, + int flags, + int backlog, + SocketAddressBindIPv6Only only, + const char *bind_to_device, + bool reuse_port, + bool free_bind, + bool transparent, + mode_t directory_mode, + mode_t socket_mode, + const char *label); +int make_socket_fd(int log_level, const char* address, int type, int flags); + +bool socket_address_is(const SocketAddress *a, const char *s, int type); +bool socket_address_is_netlink(const SocketAddress *a, const char *s); + +bool socket_address_matches_fd(const SocketAddress *a, int fd); + +bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) _pure_; + +const char* socket_address_get_path(const SocketAddress *a); + +bool socket_ipv6_is_supported(void); + +int sockaddr_port(const struct sockaddr *_sa, unsigned *port); + +int sockaddr_pretty(const struct sockaddr *_sa, socklen_t salen, bool translate_ipv6, bool include_port, char **ret); +int getpeername_pretty(int fd, bool include_port, char **ret); +int getsockname_pretty(int fd, char **ret); + +int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret); + +const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b) _const_; +SocketAddressBindIPv6Only socket_address_bind_ipv6_only_from_string(const char *s) _pure_; +SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *s); + +int netlink_family_to_string_alloc(int b, char **s); +int netlink_family_from_string(const char *s) _pure_; + +bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b); + +int fd_inc_sndbuf(int fd, size_t n); +int fd_inc_rcvbuf(int fd, size_t n); + +int ip_tos_to_string_alloc(int i, char **s); +int ip_tos_from_string(const char *s); + +bool ifname_valid(const char *p); +bool address_label_valid(const char *p); + +int getpeercred(int fd, struct ucred *ucred); +int getpeersec(int fd, char **ret); +int getpeergroups(int fd, gid_t **ret); + +ssize_t send_one_fd_iov_sa( + int transport_fd, + int fd, + struct iovec *iov, size_t iovlen, + const struct sockaddr *sa, socklen_t len, + int flags); +int send_one_fd_sa(int transport_fd, + int fd, + const struct sockaddr *sa, socklen_t len, + int flags); +#define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags) +#define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags) +ssize_t receive_one_fd_iov(int transport_fd, struct iovec *iov, size_t iovlen, int flags, int *ret_fd); +int receive_one_fd(int transport_fd, int flags); + +ssize_t next_datagram_size_fd(int fd); + +int flush_accept(int fd); + +#define CMSG_FOREACH(cmsg, mh) \ + for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg))) + +struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length); + +/* + * Certain hardware address types (e.g Infiniband) do not fit into sll_addr + * (8 bytes) and run over the structure. This macro returns the correct size that + * must be passed to kernel. + */ +#define SOCKADDR_LL_LEN(sa) \ + ({ \ + const struct sockaddr_ll *_sa = &(sa); \ + size_t _mac_len = sizeof(_sa->sll_addr); \ + assert(_sa->sll_family == AF_PACKET); \ + if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER) \ + _mac_len = MAX(_mac_len, (size_t) ETH_ALEN); \ + if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND) \ + _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \ + offsetof(struct sockaddr_ll, sll_addr) + _mac_len; \ + }) + +/* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */ +#define SOCKADDR_UN_LEN(sa) \ + ({ \ + const struct sockaddr_un *_sa = &(sa); \ + assert(_sa->sun_family == AF_UNIX); \ + offsetof(struct sockaddr_un, sun_path) + \ + (_sa->sun_path[0] == 0 ? \ + 1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \ + strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \ + }) + +int socket_ioctl_fd(void); + +int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path); + +static inline int setsockopt_int(int fd, int level, int optname, int value) { + if (setsockopt(fd, level, optname, &value, sizeof(value)) < 0) + return -errno; + + return 0; +} diff --git a/src/basic/sparse-endian.h b/src/basic/sparse-endian.h new file mode 100644 index 0000000..9583dda --- /dev/null +++ b/src/basic/sparse-endian.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (c) 2012 Josh Triplett <josh@joshtriplett.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once + +#include <byteswap.h> +#include <endian.h> +#include <stdint.h> + +#ifdef __CHECKER__ +#define __sd_bitwise __attribute__((__bitwise__)) +#define __sd_force __attribute__((__force__)) +#else +#define __sd_bitwise +#define __sd_force +#endif + +typedef uint16_t __sd_bitwise le16_t; +typedef uint16_t __sd_bitwise be16_t; +typedef uint32_t __sd_bitwise le32_t; +typedef uint32_t __sd_bitwise be32_t; +typedef uint64_t __sd_bitwise le64_t; +typedef uint64_t __sd_bitwise be64_t; + +#undef htobe16 +#undef htole16 +#undef be16toh +#undef le16toh +#undef htobe32 +#undef htole32 +#undef be32toh +#undef le32toh +#undef htobe64 +#undef htole64 +#undef be64toh +#undef le64toh + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define bswap_16_on_le(x) __bswap_16(x) +#define bswap_32_on_le(x) __bswap_32(x) +#define bswap_64_on_le(x) __bswap_64(x) +#define bswap_16_on_be(x) (x) +#define bswap_32_on_be(x) (x) +#define bswap_64_on_be(x) (x) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define bswap_16_on_le(x) (x) +#define bswap_32_on_le(x) (x) +#define bswap_64_on_le(x) (x) +#define bswap_16_on_be(x) __bswap_16(x) +#define bswap_32_on_be(x) __bswap_32(x) +#define bswap_64_on_be(x) __bswap_64(x) +#endif + +static inline le16_t htole16(uint16_t value) { return (le16_t __sd_force) bswap_16_on_be(value); } +static inline le32_t htole32(uint32_t value) { return (le32_t __sd_force) bswap_32_on_be(value); } +static inline le64_t htole64(uint64_t value) { return (le64_t __sd_force) bswap_64_on_be(value); } + +static inline be16_t htobe16(uint16_t value) { return (be16_t __sd_force) bswap_16_on_le(value); } +static inline be32_t htobe32(uint32_t value) { return (be32_t __sd_force) bswap_32_on_le(value); } +static inline be64_t htobe64(uint64_t value) { return (be64_t __sd_force) bswap_64_on_le(value); } + +static inline uint16_t le16toh(le16_t value) { return bswap_16_on_be((uint16_t __sd_force)value); } +static inline uint32_t le32toh(le32_t value) { return bswap_32_on_be((uint32_t __sd_force)value); } +static inline uint64_t le64toh(le64_t value) { return bswap_64_on_be((uint64_t __sd_force)value); } + +static inline uint16_t be16toh(be16_t value) { return bswap_16_on_le((uint16_t __sd_force)value); } +static inline uint32_t be32toh(be32_t value) { return bswap_32_on_le((uint32_t __sd_force)value); } +static inline uint64_t be64toh(be64_t value) { return bswap_64_on_le((uint64_t __sd_force)value); } + +#undef __sd_bitwise +#undef __sd_force diff --git a/src/basic/special.h b/src/basic/special.h new file mode 100644 index 0000000..379a3d7 --- /dev/null +++ b/src/basic/special.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#define SPECIAL_DEFAULT_TARGET "default.target" + +/* Shutdown targets */ +#define SPECIAL_UMOUNT_TARGET "umount.target" +/* This is not really intended to be started by directly. This is + * mostly so that other targets (reboot/halt/poweroff) can depend on + * it to bring all services down that want to be brought down on + * system shutdown. */ +#define SPECIAL_SHUTDOWN_TARGET "shutdown.target" +#define SPECIAL_HALT_TARGET "halt.target" +#define SPECIAL_POWEROFF_TARGET "poweroff.target" +#define SPECIAL_REBOOT_TARGET "reboot.target" +#define SPECIAL_KEXEC_TARGET "kexec.target" +#define SPECIAL_EXIT_TARGET "exit.target" +#define SPECIAL_SUSPEND_TARGET "suspend.target" +#define SPECIAL_HIBERNATE_TARGET "hibernate.target" +#define SPECIAL_HYBRID_SLEEP_TARGET "hybrid-sleep.target" +#define SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET "suspend-then-hibernate.target" + +/* Special boot targets */ +#define SPECIAL_RESCUE_TARGET "rescue.target" +#define SPECIAL_EMERGENCY_TARGET "emergency.target" +#define SPECIAL_MULTI_USER_TARGET "multi-user.target" +#define SPECIAL_GRAPHICAL_TARGET "graphical.target" + +/* Early boot targets */ +#define SPECIAL_SYSINIT_TARGET "sysinit.target" +#define SPECIAL_SOCKETS_TARGET "sockets.target" +#define SPECIAL_TIMERS_TARGET "timers.target" +#define SPECIAL_PATHS_TARGET "paths.target" +#define SPECIAL_LOCAL_FS_TARGET "local-fs.target" +#define SPECIAL_LOCAL_FS_PRE_TARGET "local-fs-pre.target" +#define SPECIAL_INITRD_FS_TARGET "initrd-fs.target" +#define SPECIAL_INITRD_ROOT_DEVICE_TARGET "initrd-root-device.target" +#define SPECIAL_INITRD_ROOT_FS_TARGET "initrd-root-fs.target" +#define SPECIAL_REMOTE_FS_TARGET "remote-fs.target" /* LSB's $remote_fs */ +#define SPECIAL_REMOTE_FS_PRE_TARGET "remote-fs-pre.target" +#define SPECIAL_SWAP_TARGET "swap.target" +#define SPECIAL_NETWORK_ONLINE_TARGET "network-online.target" +#define SPECIAL_TIME_SYNC_TARGET "time-sync.target" /* LSB's $time */ +#define SPECIAL_BASIC_TARGET "basic.target" + +/* LSB compatibility */ +#define SPECIAL_NETWORK_TARGET "network.target" /* LSB's $network */ +#define SPECIAL_NSS_LOOKUP_TARGET "nss-lookup.target" /* LSB's $named */ +#define SPECIAL_RPCBIND_TARGET "rpcbind.target" /* LSB's $portmap */ + +/* + * Rules regarding adding further high level targets like the above: + * + * - Be conservative, only add more of these when we really need + * them. We need strong usecases for further additions. + * + * - When there can be multiple implementations running side-by-side, + * it needs to be a .target unit which can pull in all + * implementations. + * + * - If something can be implemented with socket activation, and + * without, it needs to be a .target unit, so that it can pull in + * the appropriate unit. + * + * - Otherwise, it should be a .service unit. + * + * - In some cases it is OK to have both a .service and a .target + * unit, i.e. if there can be multiple parallel implementations, but + * only one is the "system" one. Example: syslog. + * + * Or to put this in other words: .service symlinks can be used to + * arbitrate between multiple implementations if there can be only one + * of a kind. .target units can be used to support multiple + * implementations that can run side-by-side. + */ + +/* Magic early boot services */ +#define SPECIAL_FSCK_SERVICE "systemd-fsck@.service" +#define SPECIAL_QUOTACHECK_SERVICE "systemd-quotacheck.service" +#define SPECIAL_QUOTAON_SERVICE "quotaon.service" +#define SPECIAL_REMOUNT_FS_SERVICE "systemd-remount-fs.service" + +/* Services systemd relies on */ +#define SPECIAL_DBUS_SERVICE "dbus.service" +#define SPECIAL_DBUS_SOCKET "dbus.socket" +#define SPECIAL_JOURNALD_SOCKET "systemd-journald.socket" +#define SPECIAL_JOURNALD_SERVICE "systemd-journald.service" +#define SPECIAL_TMPFILES_SETUP_SERVICE "systemd-tmpfiles-setup.service" + +/* Magic init signals */ +#define SPECIAL_KBREQUEST_TARGET "kbrequest.target" +#define SPECIAL_SIGPWR_TARGET "sigpwr.target" +#define SPECIAL_CTRL_ALT_DEL_TARGET "ctrl-alt-del.target" + +/* Where we add all our system units, users and machines by default */ +#define SPECIAL_SYSTEM_SLICE "system.slice" +#define SPECIAL_USER_SLICE "user.slice" +#define SPECIAL_MACHINE_SLICE "machine.slice" +#define SPECIAL_ROOT_SLICE "-.slice" + +/* The scope unit systemd itself lives in. */ +#define SPECIAL_INIT_SCOPE "init.scope" + +/* The root directory. */ +#define SPECIAL_ROOT_MOUNT "-.mount" diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c new file mode 100644 index 0000000..ea2bbc3 --- /dev/null +++ b/src/basic/stat-util.c @@ -0,0 +1,427 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/magic.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/types.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "stat-util.h" +#include "string-util.h" + +int is_symlink(const char *path) { + struct stat info; + + assert(path); + + if (lstat(path, &info) < 0) + return -errno; + + return !!S_ISLNK(info.st_mode); +} + +int is_dir(const char* path, bool follow) { + struct stat st; + int r; + + assert(path); + + if (follow) + r = stat(path, &st); + else + r = lstat(path, &st); + if (r < 0) + return -errno; + + return !!S_ISDIR(st.st_mode); +} + +int is_dir_fd(int fd) { + struct stat st; + + if (fstat(fd, &st) < 0) + return -errno; + + return !!S_ISDIR(st.st_mode); +} + +int is_device_node(const char *path) { + struct stat info; + + assert(path); + + if (lstat(path, &info) < 0) + return -errno; + + return !!(S_ISBLK(info.st_mode) || S_ISCHR(info.st_mode)); +} + +int dir_is_empty_at(int dir_fd, const char *path) { + _cleanup_close_ int fd = -1; + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + if (path) + fd = openat(dir_fd, path, O_RDONLY|O_DIRECTORY|O_CLOEXEC); + else + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (fd < 0) + return -errno; + + d = fdopendir(fd); + if (!d) + return -errno; + fd = -1; + + FOREACH_DIRENT(de, d, return -errno) + return 0; + + return 1; +} + +bool null_or_empty(struct stat *st) { + assert(st); + + if (S_ISREG(st->st_mode) && st->st_size <= 0) + return true; + + /* We don't want to hardcode the major/minor of /dev/null, + * hence we do a simpler "is this a device node?" check. */ + + if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) + return true; + + return false; +} + +int null_or_empty_path(const char *fn) { + struct stat st; + + assert(fn); + + if (stat(fn, &st) < 0) + return -errno; + + return null_or_empty(&st); +} + +int null_or_empty_fd(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return null_or_empty(&st); +} + +int path_is_read_only_fs(const char *path) { + struct statvfs st; + + assert(path); + + if (statvfs(path, &st) < 0) + return -errno; + + if (st.f_flag & ST_RDONLY) + return true; + + /* On NFS, statvfs() might not reflect whether we can actually + * write to the remote share. Let's try again with + * access(W_OK) which is more reliable, at least sometimes. */ + if (access(path, W_OK) < 0 && errno == EROFS) + return true; + + return false; +} + +int files_same(const char *filea, const char *fileb, int flags) { + struct stat a, b; + + assert(filea); + assert(fileb); + + if (fstatat(AT_FDCWD, filea, &a, flags) < 0) + return -errno; + + if (fstatat(AT_FDCWD, fileb, &b, flags) < 0) + return -errno; + + return a.st_dev == b.st_dev && + a.st_ino == b.st_ino; +} + +bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) { + assert(s); + assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type)); + + return F_TYPE_EQUAL(s->f_type, magic_value); +} + +int fd_is_fs_type(int fd, statfs_f_type_t magic_value) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_fs_type(&s, magic_value); +} + +int path_is_fs_type(const char *path, statfs_f_type_t magic_value) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH); + if (fd < 0) + return -errno; + + return fd_is_fs_type(fd, magic_value); +} + +bool is_temporary_fs(const struct statfs *s) { + return is_fs_type(s, TMPFS_MAGIC) || + is_fs_type(s, RAMFS_MAGIC); +} + +bool is_network_fs(const struct statfs *s) { + return is_fs_type(s, CIFS_MAGIC_NUMBER) || + is_fs_type(s, CODA_SUPER_MAGIC) || + is_fs_type(s, NCP_SUPER_MAGIC) || + is_fs_type(s, NFS_SUPER_MAGIC) || + is_fs_type(s, SMB_SUPER_MAGIC) || + is_fs_type(s, V9FS_MAGIC) || + is_fs_type(s, AFS_SUPER_MAGIC) || + is_fs_type(s, OCFS2_SUPER_MAGIC); +} + +int fd_is_temporary_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_temporary_fs(&s); +} + +int fd_is_network_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_network_fs(&s); +} + +int fd_is_network_ns(int fd) { + struct statfs s; + int r; + + /* Checks whether the specified file descriptor refers to a network namespace. On old kernels there's no nice + * way to detect that, hence on those we'll return a recognizable error (EUCLEAN), so that callers can handle + * this somewhat nicely. + * + * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not + * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */ + + if (fstatfs(fd, &s) < 0) + return -errno; + + if (!is_fs_type(&s, NSFS_MAGIC)) { + /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs + * instead. Handle that in a somewhat smart way. */ + + if (is_fs_type(&s, PROC_SUPER_MAGIC)) { + struct statfs t; + + /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the + * passed fd might refer to a network namespace, but we can't know for sure. In that case, + * return a recognizable error. */ + + if (statfs("/proc/self/ns/net", &t) < 0) + return -errno; + + if (s.f_type == t.f_type) + return -EUCLEAN; /* It's possible, we simply don't know */ + } + + return 0; /* No! */ + } + + r = ioctl(fd, NS_GET_NSTYPE); + if (r < 0) { + if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */ + return -EUCLEAN; + + return -errno; + } + + return r == CLONE_NEWNET; +} + +int path_is_temporary_fs(const char *path) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH); + if (fd < 0) + return -errno; + + return fd_is_temporary_fs(fd); +} + +int stat_verify_regular(const struct stat *st) { + assert(st); + + /* Checks whether the specified stat() structure refers to a regular file. If not returns an appropriate error + * code. */ + + if (S_ISDIR(st->st_mode)) + return -EISDIR; + + if (S_ISLNK(st->st_mode)) + return -ELOOP; + + if (!S_ISREG(st->st_mode)) + return -EBADFD; + + return 0; +} + +int fd_verify_regular(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return stat_verify_regular(&st); +} + +int stat_verify_directory(const struct stat *st) { + assert(st); + + if (S_ISLNK(st->st_mode)) + return -ELOOP; + + if (!S_ISDIR(st->st_mode)) + return -ENOTDIR; + + return 0; +} + +int fd_verify_directory(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return stat_verify_directory(&st); +} + +int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret) { + const char *t; + + /* Generates the /dev/{char|block}/MAJOR:MINOR path for a dev_t */ + + if (S_ISCHR(mode)) + t = "char"; + else if (S_ISBLK(mode)) + t = "block"; + else + return -ENODEV; + + if (asprintf(ret, "/dev/%s/%u:%u", t, major(devno), minor(devno)) < 0) + return -ENOMEM; + + return 0; +} + +int device_path_make_canonical(mode_t mode, dev_t devno, char **ret) { + _cleanup_free_ char *p = NULL; + int r; + + /* Finds the canonical path for a device, i.e. resolves the /dev/{char|block}/MAJOR:MINOR path to the end. */ + + assert(ret); + + if (major(devno) == 0 && minor(devno) == 0) { + char *s; + + /* A special hack to make sure our 'inaccessible' device nodes work. They won't have symlinks in + * /dev/block/ and /dev/char/, hence we handle them specially here. */ + + if (S_ISCHR(mode)) + s = strdup("/run/systemd/inaccessible/chr"); + else if (S_ISBLK(mode)) + s = strdup("/run/systemd/inaccessible/blk"); + else + return -ENODEV; + + if (!s) + return -ENOMEM; + + *ret = s; + return 0; + } + + r = device_path_make_major_minor(mode, devno, &p); + if (r < 0) + return r; + + return chase_symlinks(p, NULL, 0, ret); +} + +int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno) { + mode_t mode; + dev_t devno; + int r; + + /* Tries to extract the major/minor directly from the device path if we can. Handles /dev/block/ and /dev/char/ + * paths, as well out synthetic inaccessible device nodes. Never goes to disk. Returns -ENODEV if the device + * path cannot be parsed like this. */ + + if (path_equal(path, "/run/systemd/inaccessible/chr")) { + mode = S_IFCHR; + devno = makedev(0, 0); + } else if (path_equal(path, "/run/systemd/inaccessible/blk")) { + mode = S_IFBLK; + devno = makedev(0, 0); + } else { + const char *w; + + w = path_startswith(path, "/dev/block/"); + if (w) + mode = S_IFBLK; + else { + w = path_startswith(path, "/dev/char/"); + if (!w) + return -ENODEV; + + mode = S_IFCHR; + } + + r = parse_dev(w, &devno); + if (r < 0) + return r; + } + + if (ret_mode) + *ret_mode = mode; + if (ret_devno) + *ret_devno = devno; + + return 0; +} diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h new file mode 100644 index 0000000..74fb725 --- /dev/null +++ b/src/basic/stat-util.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/types.h> +#include <sys/vfs.h> + +#include "macro.h" + +int is_symlink(const char *path); +int is_dir(const char *path, bool follow); +int is_dir_fd(int fd); +int is_device_node(const char *path); + +int dir_is_empty_at(int dir_fd, const char *path); +static inline int dir_is_empty(const char *path) { + return dir_is_empty_at(AT_FDCWD, path); +} + +static inline int dir_is_populated(const char *path) { + int r; + r = dir_is_empty(path); + if (r < 0) + return r; + return !r; +} + +bool null_or_empty(struct stat *st) _pure_; +int null_or_empty_path(const char *fn); +int null_or_empty_fd(int fd); + +int path_is_read_only_fs(const char *path); + +int files_same(const char *filea, const char *fileb, int flags); + +/* The .f_type field of struct statfs is really weird defined on + * different archs. Let's give its type a name. */ +typedef typeof(((struct statfs*)NULL)->f_type) statfs_f_type_t; + +bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_; +int fd_is_fs_type(int fd, statfs_f_type_t magic_value); +int path_is_fs_type(const char *path, statfs_f_type_t magic_value); + +bool is_temporary_fs(const struct statfs *s) _pure_; +bool is_network_fs(const struct statfs *s) _pure_; + +int fd_is_temporary_fs(int fd); +int fd_is_network_fs(int fd); + +int fd_is_network_ns(int fd); + +int path_is_temporary_fs(const char *path); + +/* Because statfs.t_type can be int on some architectures, we have to cast + * the const magic to the type, otherwise the compiler warns about + * signed/unsigned comparison, because the magic can be 32 bit unsigned. + */ +#define F_TYPE_EQUAL(a, b) (a == (typeof(a)) b) + +int stat_verify_regular(const struct stat *st); +int fd_verify_regular(int fd); + +int stat_verify_directory(const struct stat *st); +int fd_verify_directory(int fd); + +/* glibc and the Linux kernel have different ideas about the major/minor size. These calls will check whether the + * specified major is valid by the Linux kernel's standards, not by glibc's. Linux has 20bits of minor, and 12 bits of + * major space. See MINORBITS in linux/kdev_t.h in the kernel sources. (If you wonder why we define _y here, instead of + * comparing directly >= 0: it's to trick out -Wtype-limits, which would otherwise complain if the type is unsigned, as + * such a test would be pointless in such a case.) */ + +#define DEVICE_MAJOR_VALID(x) \ + ({ \ + typeof(x) _x = (x), _y = 0; \ + _x >= _y && _x < (UINT32_C(1) << 12); \ + \ + }) + +#define DEVICE_MINOR_VALID(x) \ + ({ \ + typeof(x) _x = (x), _y = 0; \ + _x >= _y && _x < (UINT32_C(1) << 20); \ + }) + +int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret); +int device_path_make_canonical(mode_t mode, dev_t devno, char **ret); +int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno); diff --git a/src/basic/static-destruct.h b/src/basic/static-destruct.h new file mode 100644 index 0000000..443c0e8 --- /dev/null +++ b/src/basic/static-destruct.h @@ -0,0 +1,56 @@ +#pragma once + +#include "alloc-util.h" +#include "macro.h" + +/* A framework for registering static variables that shall be freed on shutdown of a process. It's a bit like gcc's + * destructor attribute, but allows us to precisely schedule when we want to free the variables. This is supposed to + * feel a bit like the gcc cleanup attribute, but for static variables. Note that this does not work for static + * variables declared in .so's, as the list is private to the same linking unit. But maybe that's a good thing. */ + +typedef struct StaticDestructor { + void *data; + free_func_t destroy; +} StaticDestructor; + +#define STATIC_DESTRUCTOR_REGISTER(variable, func) \ + _STATIC_DESTRUCTOR_REGISTER(UNIQ, variable, func) + +#define _STATIC_DESTRUCTOR_REGISTER(uq, variable, func) \ + /* Type-safe destructor */ \ + static void UNIQ_T(static_destructor_wrapper, uq)(void *p) { \ + typeof(variable) *q = p; \ + func(q); \ + } \ + /* The actual destructor structure we place in a special section to find it */ \ + _section_("SYSTEMD_STATIC_DESTRUCT") \ + /* We pick pointer alignment, since that is apparently what gcc does for static variables */ \ + _alignptr_ \ + /* Make sure this is not dropped from the image because not explicitly referenced */ \ + _used_ \ + /* Make sure that AddressSanitizer doesn't pad this variable: we want everything in this section packed next to each other so that we can enumerate it. */ \ + _variable_no_sanitize_address_ \ + static const StaticDestructor UNIQ_T(static_destructor_entry, uq) = { \ + .data = &(variable), \ + .destroy = UNIQ_T(static_destructor_wrapper, uq), \ + } + +/* Beginning and end of our section listing the destructors. We define these as weak as we want this to work even if + * there's not a single destructor is defined in which case the section will be missing. */ +extern const struct StaticDestructor _weak_ __start_SYSTEMD_STATIC_DESTRUCT[]; +extern const struct StaticDestructor _weak_ __stop_SYSTEMD_STATIC_DESTRUCT[]; + +/* The function to destroy everything. (Note that this must be static inline, as it's key that it remains in the same + * linking unit as the variables we want to destroy. */ +static inline void static_destruct(void) { + const StaticDestructor *d; + + if (!__start_SYSTEMD_STATIC_DESTRUCT) + return; + + d = ALIGN_TO_PTR(__start_SYSTEMD_STATIC_DESTRUCT, sizeof(void*)); + while (d < __stop_SYSTEMD_STATIC_DESTRUCT) { + d->destroy(d->data); + d = ALIGN_TO_PTR(d + 1, sizeof(void*)); + } +} diff --git a/src/basic/stdio-util.h b/src/basic/stdio-util.h new file mode 100644 index 0000000..dc67b6e --- /dev/null +++ b/src/basic/stdio-util.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <printf.h> +#include <stdarg.h> +#include <stdio.h> +#include <sys/types.h> + +#include "macro.h" +#include "util.h" + +#define snprintf_ok(buf, len, fmt, ...) \ + ((size_t) snprintf(buf, len, fmt, __VA_ARGS__) < (len)) + +#define xsprintf(buf, fmt, ...) \ + assert_message_se(snprintf_ok(buf, ELEMENTSOF(buf), fmt, __VA_ARGS__), "xsprintf: " #buf "[] must be big enough") + +#define VA_FORMAT_ADVANCE(format, ap) \ +do { \ + int _argtypes[128]; \ + size_t _i, _k; \ + /* See https://github.com/google/sanitizers/issues/992 */ \ + if (HAS_FEATURE_MEMORY_SANITIZER) \ + zero(_argtypes); \ + _k = parse_printf_format((format), ELEMENTSOF(_argtypes), _argtypes); \ + assert(_k < ELEMENTSOF(_argtypes)); \ + for (_i = 0; _i < _k; _i++) { \ + if (_argtypes[_i] & PA_FLAG_PTR) { \ + (void) va_arg(ap, void*); \ + continue; \ + } \ + \ + switch (_argtypes[_i]) { \ + case PA_INT: \ + case PA_INT|PA_FLAG_SHORT: \ + case PA_CHAR: \ + (void) va_arg(ap, int); \ + break; \ + case PA_INT|PA_FLAG_LONG: \ + (void) va_arg(ap, long int); \ + break; \ + case PA_INT|PA_FLAG_LONG_LONG: \ + (void) va_arg(ap, long long int); \ + break; \ + case PA_WCHAR: \ + (void) va_arg(ap, wchar_t); \ + break; \ + case PA_WSTRING: \ + case PA_STRING: \ + case PA_POINTER: \ + (void) va_arg(ap, void*); \ + break; \ + case PA_FLOAT: \ + case PA_DOUBLE: \ + (void) va_arg(ap, double); \ + break; \ + case PA_DOUBLE|PA_FLAG_LONG_DOUBLE: \ + (void) va_arg(ap, long double); \ + break; \ + default: \ + assert_not_reached("Unknown format string argument."); \ + } \ + } \ +} while (false) diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c new file mode 100644 index 0000000..81f4f21 --- /dev/null +++ b/src/basic/strbuf.c @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "strbuf.h" +#include "util.h" + +/* + * Strbuf stores given strings in a single continuous allocated memory + * area. Identical strings are de-duplicated and return the same offset + * as the first string stored. If the tail of a string already exists + * in the buffer, the tail is returned. + * + * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the + * information about the stored strings. + * + * Example of udev rules: + * $ ./udevadm test . + * ... + * read rules file: /usr/lib/udev/rules.d/99-systemd.rules + * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings + * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used + * ... + */ + +struct strbuf *strbuf_new(void) { + struct strbuf *str; + + str = new(struct strbuf, 1); + if (!str) + return NULL; + *str = (struct strbuf) { + .buf = new0(char, 1), + .root = new0(struct strbuf_node, 1), + .len = 1, + .nodes_count = 1, + }; + if (!str->buf || !str->root) { + free(str->buf); + free(str->root); + return mfree(str); + } + + return str; +} + +static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) { + size_t i; + + for (i = 0; i < node->children_count; i++) + strbuf_node_cleanup(node->children[i].child); + free(node->children); + return mfree(node); +} + +/* clean up trie data, leave only the string buffer */ +void strbuf_complete(struct strbuf *str) { + if (!str) + return; + if (str->root) + str->root = strbuf_node_cleanup(str->root); +} + +/* clean up everything */ +void strbuf_cleanup(struct strbuf *str) { + if (!str) + return; + + strbuf_complete(str); + free(str->buf); + free(str); +} + +static int strbuf_children_cmp(const struct strbuf_child_entry *n1, + const struct strbuf_child_entry *n2) { + return n1->c - n2->c; +} + +static void bubbleinsert(struct strbuf_node *node, + uint8_t c, + struct strbuf_node *node_child) { + + struct strbuf_child_entry new = { + .c = c, + .child = node_child, + }; + int left = 0, right = node->children_count; + + while (right > left) { + int middle = (right + left) / 2 ; + if (strbuf_children_cmp(&node->children[middle], &new) <= 0) + left = middle + 1; + else + right = middle; + } + + memmove(node->children + left + 1, node->children + left, + sizeof(struct strbuf_child_entry) * (node->children_count - left)); + node->children[left] = new; + + node->children_count++; +} + +/* add string, return the index/offset into the buffer */ +ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { + uint8_t c; + struct strbuf_node *node; + size_t depth; + char *buf_new; + struct strbuf_child_entry *child; + struct strbuf_node *node_child; + ssize_t off; + + if (!str->root) + return -EINVAL; + + /* search string; start from last character to find possibly matching tails */ + + str->in_count++; + if (len == 0) { + str->dedup_count++; + return 0; + } + str->in_len += len; + + node = str->root; + for (depth = 0; depth <= len; depth++) { + struct strbuf_child_entry search; + + /* match against current node */ + off = node->value_off + node->value_len - len; + if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { + str->dedup_len += len; + str->dedup_count++; + return off; + } + + c = s[len - 1 - depth]; + + /* lookup child node */ + search.c = c; + child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp); + if (!child) + break; + node = child->child; + } + + /* add new string */ + buf_new = realloc(str->buf, str->len + len+1); + if (!buf_new) + return -ENOMEM; + str->buf = buf_new; + off = str->len; + memcpy(str->buf + off, s, len); + str->len += len; + str->buf[str->len++] = '\0'; + + /* new node */ + node_child = new(struct strbuf_node, 1); + if (!node_child) + return -ENOMEM; + *node_child = (struct strbuf_node) { + .value_off = off, + .value_len = len, + }; + + /* extend array, add new entry, sort for bisection */ + child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry)); + if (!child) { + free(node_child); + return -ENOMEM; + } + + str->nodes_count++; + + node->children = child; + bubbleinsert(node, c, node_child); + + return off; +} diff --git a/src/basic/strbuf.h b/src/basic/strbuf.h new file mode 100644 index 0000000..a36944a --- /dev/null +++ b/src/basic/strbuf.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> + +#include "macro.h" + +struct strbuf { + char *buf; + size_t len; + struct strbuf_node *root; + + size_t nodes_count; + size_t in_count; + size_t in_len; + size_t dedup_len; + size_t dedup_count; +}; + +struct strbuf_node { + size_t value_off; + size_t value_len; + + struct strbuf_child_entry *children; + uint8_t children_count; +}; + +struct strbuf_child_entry { + uint8_t c; + struct strbuf_node *child; +}; + +struct strbuf *strbuf_new(void); +ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len); +void strbuf_complete(struct strbuf *str); +void strbuf_cleanup(struct strbuf *str); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct strbuf*, strbuf_cleanup); diff --git a/src/basic/string-table.c b/src/basic/string-table.c new file mode 100644 index 0000000..34931b0 --- /dev/null +++ b/src/basic/string-table.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "string-table.h" +#include "string-util.h" + +ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) { + size_t i; + + if (!key) + return -1; + + for (i = 0; i < len; ++i) + if (streq_ptr(table[i], key)) + return (ssize_t) i; + + return -1; +} diff --git a/src/basic/string-table.h b/src/basic/string-table.h new file mode 100644 index 0000000..228c12a --- /dev/null +++ b/src/basic/string-table.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#pragma once + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" + +ssize_t string_table_lookup(const char * const *table, size_t len, const char *key); + +/* For basic lookup tables with strictly enumerated entries */ +#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + scope const char *name##_to_string(type i) { \ + if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \ + return NULL; \ + return name##_table[i]; \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \ + scope type name##_from_string(const char *s) { \ + return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) \ + scope type name##_from_string(const char *s) { \ + int b; \ + if (!s) \ + return -1; \ + b = parse_boolean(s); \ + if (b == 0) \ + return (type) 0; \ + else if (b > 0) \ + return yes; \ + return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,scope) \ + scope int name##_to_string_alloc(type i, char **str) { \ + char *s; \ + if (i < 0 || i > max) \ + return -ERANGE; \ + if (i < (type) ELEMENTSOF(name##_table)) { \ + s = strdup(name##_table[i]); \ + if (!s) \ + return -ENOMEM; \ + } else { \ + if (asprintf(&s, "%i", i) < 0) \ + return -ENOMEM; \ + } \ + *str = s; \ + return 0; \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,scope) \ + scope type name##_from_string(const char *s) { \ + type i; \ + unsigned u = 0; \ + if (!s) \ + return (type) -1; \ + for (i = 0; i < (type) ELEMENTSOF(name##_table); i++) \ + if (streq_ptr(name##_table[i], s)) \ + return i; \ + if (safe_atou(s, &u) >= 0 && u <= max) \ + return (type) u; \ + return (type) -1; \ + } \ + +#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) + +#define _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) + +#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,static) + +#define DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes) _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,) + +/* For string conversions where numbers are also acceptable */ +#define DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(name,type,max) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,) + +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,static) + +#define DUMP_STRING_TABLE(name,type,max) \ + do { \ + type _k; \ + flockfile(stdout); \ + for (_k = 0; _k < (max); _k++) { \ + const char *_t; \ + _t = name##_to_string(_k); \ + if (!_t) \ + continue; \ + fputs_unlocked(_t, stdout); \ + fputc_unlocked('\n', stdout); \ + } \ + funlockfile(stdout); \ + } while(false) diff --git a/src/basic/string-util.c b/src/basic/string-util.c new file mode 100644 index 0000000..93917bc --- /dev/null +++ b/src/basic/string-util.c @@ -0,0 +1,1099 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "escape.h" +#include "gunicode.h" +#include "locale-util.h" +#include "macro.h" +#include "string-util.h" +#include "terminal-util.h" +#include "utf8.h" +#include "util.h" +#include "fileio.h" + +int strcmp_ptr(const char *a, const char *b) { + + /* Like strcmp(), but tries to make sense of NULL pointers */ + if (a && b) + return strcmp(a, b); + + if (!a && b) + return -1; + + if (a && !b) + return 1; + + return 0; +} + +char* endswith(const char *s, const char *postfix) { + size_t sl, pl; + + assert(s); + assert(postfix); + + sl = strlen(s); + pl = strlen(postfix); + + if (pl == 0) + return (char*) s + sl; + + if (sl < pl) + return NULL; + + if (memcmp(s + sl - pl, postfix, pl) != 0) + return NULL; + + return (char*) s + sl - pl; +} + +char* endswith_no_case(const char *s, const char *postfix) { + size_t sl, pl; + + assert(s); + assert(postfix); + + sl = strlen(s); + pl = strlen(postfix); + + if (pl == 0) + return (char*) s + sl; + + if (sl < pl) + return NULL; + + if (strcasecmp(s + sl - pl, postfix) != 0) + return NULL; + + return (char*) s + sl - pl; +} + +char* first_word(const char *s, const char *word) { + size_t sl, wl; + const char *p; + + assert(s); + assert(word); + + /* Checks if the string starts with the specified word, either + * followed by NUL or by whitespace. Returns a pointer to the + * NUL or the first character after the whitespace. */ + + sl = strlen(s); + wl = strlen(word); + + if (sl < wl) + return NULL; + + if (wl == 0) + return (char*) s; + + if (memcmp(s, word, wl) != 0) + return NULL; + + p = s + wl; + if (*p == 0) + return (char*) p; + + if (!strchr(WHITESPACE, *p)) + return NULL; + + p += strspn(p, WHITESPACE); + return (char*) p; +} + +static size_t strcspn_escaped(const char *s, const char *reject) { + bool escaped = false; + int n; + + for (n=0; s[n]; n++) { + if (escaped) + escaped = false; + else if (s[n] == '\\') + escaped = true; + else if (strchr(reject, s[n])) + break; + } + + /* if s ends in \, return index of previous char */ + return n - escaped; +} + +/* Split a string into words. */ +const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags) { + const char *current; + + current = *state; + + if (!*current) { + assert(**state == '\0'); + return NULL; + } + + current += strspn(current, separator); + if (!*current) { + *state = current; + return NULL; + } + + if (flags & SPLIT_QUOTES && strchr("\'\"", *current)) { + char quotechars[2] = {*current, '\0'}; + + *l = strcspn_escaped(current + 1, quotechars); + if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] || + (current[*l + 2] && !strchr(separator, current[*l + 2]))) { + /* right quote missing or garbage at the end */ + if (flags & SPLIT_RELAX) { + *state = current + *l + 1 + (current[*l + 1] != '\0'); + return current + 1; + } + *state = current; + return NULL; + } + *state = current++ + *l + 2; + } else if (flags & SPLIT_QUOTES) { + *l = strcspn_escaped(current, separator); + if (current[*l] && !strchr(separator, current[*l]) && !(flags & SPLIT_RELAX)) { + /* unfinished escape */ + *state = current; + return NULL; + } + *state = current + *l; + } else { + *l = strcspn(current, separator); + *state = current + *l; + } + + return current; +} + +char *strnappend(const char *s, const char *suffix, size_t b) { + size_t a; + char *r; + + if (!s && !suffix) + return strdup(""); + + if (!s) + return strndup(suffix, b); + + if (!suffix) + return strdup(s); + + assert(s); + assert(suffix); + + a = strlen(s); + if (b > ((size_t) -1) - a) + return NULL; + + r = new(char, a+b+1); + if (!r) + return NULL; + + memcpy(r, s, a); + memcpy(r+a, suffix, b); + r[a+b] = 0; + + return r; +} + +char *strappend(const char *s, const char *suffix) { + return strnappend(s, suffix, strlen_ptr(suffix)); +} + +char *strjoin_real(const char *x, ...) { + va_list ap; + size_t l; + char *r, *p; + + va_start(ap, x); + + if (x) { + l = strlen(x); + + for (;;) { + const char *t; + size_t n; + + t = va_arg(ap, const char *); + if (!t) + break; + + n = strlen(t); + if (n > ((size_t) -1) - l) { + va_end(ap); + return NULL; + } + + l += n; + } + } else + l = 0; + + va_end(ap); + + r = new(char, l+1); + if (!r) + return NULL; + + if (x) { + p = stpcpy(r, x); + + va_start(ap, x); + + for (;;) { + const char *t; + + t = va_arg(ap, const char *); + if (!t) + break; + + p = stpcpy(p, t); + } + + va_end(ap); + } else + r[0] = 0; + + return r; +} + +char *strstrip(char *s) { + if (!s) + return NULL; + + /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */ + + return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE); +} + +char *delete_chars(char *s, const char *bad) { + char *f, *t; + + /* Drops all specified bad characters, regardless where in the string */ + + if (!s) + return NULL; + + if (!bad) + bad = WHITESPACE; + + for (f = s, t = s; *f; f++) { + if (strchr(bad, *f)) + continue; + + *(t++) = *f; + } + + *t = 0; + + return s; +} + +char *delete_trailing_chars(char *s, const char *bad) { + char *p, *c = s; + + /* Drops all specified bad characters, at the end of the string */ + + if (!s) + return NULL; + + if (!bad) + bad = WHITESPACE; + + for (p = s; *p; p++) + if (!strchr(bad, *p)) + c = p + 1; + + *c = 0; + + return s; +} + +char *truncate_nl(char *s) { + assert(s); + + s[strcspn(s, NEWLINE)] = 0; + return s; +} + +char ascii_tolower(char x) { + + if (x >= 'A' && x <= 'Z') + return x - 'A' + 'a'; + + return x; +} + +char ascii_toupper(char x) { + + if (x >= 'a' && x <= 'z') + return x - 'a' + 'A'; + + return x; +} + +char *ascii_strlower(char *t) { + char *p; + + assert(t); + + for (p = t; *p; p++) + *p = ascii_tolower(*p); + + return t; +} + +char *ascii_strupper(char *t) { + char *p; + + assert(t); + + for (p = t; *p; p++) + *p = ascii_toupper(*p); + + return t; +} + +char *ascii_strlower_n(char *t, size_t n) { + size_t i; + + if (n <= 0) + return t; + + for (i = 0; i < n; i++) + t[i] = ascii_tolower(t[i]); + + return t; +} + +int ascii_strcasecmp_n(const char *a, const char *b, size_t n) { + + for (; n > 0; a++, b++, n--) { + int x, y; + + x = (int) (uint8_t) ascii_tolower(*a); + y = (int) (uint8_t) ascii_tolower(*b); + + if (x != y) + return x - y; + } + + return 0; +} + +int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) { + int r; + + r = ascii_strcasecmp_n(a, b, MIN(n, m)); + if (r != 0) + return r; + + return CMP(n, m); +} + +bool chars_intersect(const char *a, const char *b) { + const char *p; + + /* Returns true if any of the chars in a are in b. */ + for (p = a; *p; p++) + if (strchr(b, *p)) + return true; + + return false; +} + +bool string_has_cc(const char *p, const char *ok) { + const char *t; + + assert(p); + + /* + * Check if a string contains control characters. If 'ok' is + * non-NULL it may be a string containing additional CCs to be + * considered OK. + */ + + for (t = p; *t; t++) { + if (ok && strchr(ok, *t)) + continue; + + if (*t > 0 && *t < ' ') + return true; + + if (*t == 127) + return true; + } + + return false; +} + +static int write_ellipsis(char *buf, bool unicode) { + if (unicode || is_locale_utf8()) { + buf[0] = 0xe2; /* tri-dot ellipsis: … */ + buf[1] = 0x80; + buf[2] = 0xa6; + } else { + buf[0] = '.'; + buf[1] = '.'; + buf[2] = '.'; + } + + return 3; +} + +static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { + size_t x, need_space, suffix_len; + char *t; + + assert(s); + assert(percent <= 100); + assert(new_length != (size_t) -1); + + if (old_length <= new_length) + return strndup(s, old_length); + + /* Special case short ellipsations */ + switch (new_length) { + + case 0: + return strdup(""); + + case 1: + if (is_locale_utf8()) + return strdup("…"); + else + return strdup("."); + + case 2: + if (!is_locale_utf8()) + return strdup(".."); + + break; + + default: + break; + } + + /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one + * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage, + * either for the UTF-8 encoded character or for three ASCII characters. */ + need_space = is_locale_utf8() ? 1 : 3; + + t = new(char, new_length+3); + if (!t) + return NULL; + + assert(new_length >= need_space); + + x = ((new_length - need_space) * percent + 50) / 100; + assert(x <= new_length - need_space); + + memcpy(t, s, x); + write_ellipsis(t + x, false); + suffix_len = new_length - x - need_space; + memcpy(t + x + 3, s + old_length - suffix_len, suffix_len); + *(t + x + 3 + suffix_len) = '\0'; + + return t; +} + +char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { + size_t x, k, len, len2; + const char *i, *j; + char *e; + int r; + + /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up + * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8 + * strings. + * + * Ellipsation is done in a locale-dependent way: + * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...") + * 2. Otherwise, a unicode ellipsis is used ("…") + * + * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or + * the current locale is UTF-8. + */ + + assert(s); + assert(percent <= 100); + + if (new_length == (size_t) -1) + return strndup(s, old_length); + + if (new_length == 0) + return strdup(""); + + /* If no multibyte characters use ascii_ellipsize_mem for speed */ + if (ascii_is_valid_n(s, old_length)) + return ascii_ellipsize_mem(s, old_length, new_length, percent); + + x = ((new_length - 1) * percent) / 100; + assert(x <= new_length - 1); + + k = 0; + for (i = s; i < s + old_length; i = utf8_next_char(i)) { + char32_t c; + int w; + + r = utf8_encoded_to_unichar(i, &c); + if (r < 0) + return NULL; + + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= x) + k += w; + else + break; + } + + for (j = s + old_length; j > i; ) { + char32_t c; + int w; + const char *jj; + + jj = utf8_prev_char(j); + r = utf8_encoded_to_unichar(jj, &c); + if (r < 0) + return NULL; + + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= new_length) { + k += w; + j = jj; + } else + break; + } + assert(i <= j); + + /* we don't actually need to ellipsize */ + if (i == j) + return memdup_suffix0(s, old_length); + + /* make space for ellipsis, if possible */ + if (j < s + old_length) + j = utf8_next_char(j); + else if (i > s) + i = utf8_prev_char(i); + + len = i - s; + len2 = s + old_length - j; + e = new(char, len + 3 + len2 + 1); + if (!e) + return NULL; + + /* + printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n", + old_length, new_length, x, len, len2, k); + */ + + memcpy(e, s, len); + write_ellipsis(e + len, true); + memcpy(e + len + 3, j, len2); + *(e + len + 3 + len2) = '\0'; + + return e; +} + +char *cellescape(char *buf, size_t len, const char *s) { + /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII + * characters are copied as they are, everything else is escaped. The result + * is different then if escaping and ellipsization was performed in two + * separate steps, because each sequence is either stored in full or skipped. + * + * This function should be used for logging about strings which expected to + * be plain ASCII in a safe way. + * + * An ellipsis will be used if s is too long. It was always placed at the + * very end. + */ + + size_t i = 0, last_char_width[4] = {}, k = 0, j; + + assert(len > 0); /* at least a terminating NUL */ + + for (;;) { + char four[4]; + int w; + + if (*s == 0) /* terminating NUL detected? then we are done! */ + goto done; + + w = cescape_char(*s, four); + if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's + * ellipsize at the previous location */ + break; + + /* OK, there was space, let's add this escaped character to the buffer */ + memcpy(buf + i, four, w); + i += w; + + /* And remember its width in the ring buffer */ + last_char_width[k] = w; + k = (k + 1) % 4; + + s++; + } + + /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4 + * characters ideally, but the buffer is shorter than that in the first place take what we can get */ + for (j = 0; j < ELEMENTSOF(last_char_width); j++) { + + if (i + 4 <= len) /* nice, we reached our space goal */ + break; + + k = k == 0 ? 3 : k - 1; + if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */ + break; + + assert(i >= last_char_width[k]); + i -= last_char_width[k]; + } + + if (i + 4 <= len) /* yay, enough space */ + i += write_ellipsis(buf + i, false); + else if (i + 3 <= len) { /* only space for ".." */ + buf[i++] = '.'; + buf[i++] = '.'; + } else if (i + 2 <= len) /* only space for a single "." */ + buf[i++] = '.'; + else + assert(i + 1 <= len); + + done: + buf[i] = '\0'; + return buf; +} + +bool nulstr_contains(const char *nulstr, const char *needle) { + const char *i; + + if (!nulstr) + return false; + + NULSTR_FOREACH(i, nulstr) + if (streq(i, needle)) + return true; + + return false; +} + +char* strshorten(char *s, size_t l) { + assert(s); + + if (strnlen(s, l+1) > l) + s[l] = 0; + + return s; +} + +char *strreplace(const char *text, const char *old_string, const char *new_string) { + size_t l, old_len, new_len, allocated = 0; + char *t, *ret = NULL; + const char *f; + + assert(old_string); + assert(new_string); + + if (!text) + return NULL; + + old_len = strlen(old_string); + new_len = strlen(new_string); + + l = strlen(text); + if (!GREEDY_REALLOC(ret, allocated, l+1)) + return NULL; + + f = text; + t = ret; + while (*f) { + size_t d, nl; + + if (!startswith(f, old_string)) { + *(t++) = *(f++); + continue; + } + + d = t - ret; + nl = l - old_len + new_len; + + if (!GREEDY_REALLOC(ret, allocated, nl + 1)) + return mfree(ret); + + l = nl; + t = ret + d; + + t = stpcpy(t, new_string); + f += old_len; + } + + *t = 0; + return ret; +} + +static void advance_offsets(ssize_t diff, size_t offsets[static 2], size_t shift[static 2], size_t size) { + if (!offsets) + return; + + if ((size_t) diff < offsets[0]) + shift[0] += size; + if ((size_t) diff < offsets[1]) + shift[1] += size; +} + +char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { + const char *i, *begin = NULL; + enum { + STATE_OTHER, + STATE_ESCAPE, + STATE_CSI, + STATE_CSO, + } state = STATE_OTHER; + char *obuf = NULL; + size_t osz = 0, isz, shift[2] = {}; + FILE *f; + + assert(ibuf); + assert(*ibuf); + + /* This does three things: + * + * 1. Replaces TABs by 8 spaces + * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences + * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences + * + * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any + * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the + * most basic formatting noise, but nothing else. + * + * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */ + + isz = _isz ? *_isz : strlen(*ibuf); + + f = open_memstream(&obuf, &osz); + if (!f) + return NULL; + + /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here + * and it doesn't leave our scope. */ + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + for (i = *ibuf; i < *ibuf + isz + 1; i++) { + + switch (state) { + + case STATE_OTHER: + if (i >= *ibuf + isz) /* EOT */ + break; + else if (*i == '\x1B') + state = STATE_ESCAPE; + else if (*i == '\t') { + fputs(" ", f); + advance_offsets(i - *ibuf, highlight, shift, 7); + } else + fputc(*i, f); + + break; + + case STATE_ESCAPE: + if (i >= *ibuf + isz) { /* EOT */ + fputc('\x1B', f); + advance_offsets(i - *ibuf, highlight, shift, 1); + break; + } else if (*i == '[') { /* ANSI CSI */ + state = STATE_CSI; + begin = i + 1; + } else if (*i == ']') { /* ANSI CSO */ + state = STATE_CSO; + begin = i + 1; + } else { + fputc('\x1B', f); + fputc(*i, f); + advance_offsets(i - *ibuf, highlight, shift, 1); + state = STATE_OTHER; + } + + break; + + case STATE_CSI: + + if (i >= *ibuf + isz || /* EOT … */ + !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */ + fputc('\x1B', f); + fputc('[', f); + advance_offsets(i - *ibuf, highlight, shift, 2); + state = STATE_OTHER; + i = begin-1; + } else if (*i == 'm') + state = STATE_OTHER; + + break; + + case STATE_CSO: + + if (i >= *ibuf + isz || /* EOT … */ + (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ + fputc('\x1B', f); + fputc(']', f); + advance_offsets(i - *ibuf, highlight, shift, 2); + state = STATE_OTHER; + i = begin-1; + } else if (*i == '\a') + state = STATE_OTHER; + + break; + } + } + + if (fflush_and_check(f) < 0) { + fclose(f); + return mfree(obuf); + } + + fclose(f); + + free(*ibuf); + *ibuf = obuf; + + if (_isz) + *_isz = osz; + + if (highlight) { + highlight[0] += shift[0]; + highlight[1] += shift[1]; + } + + return obuf; +} + +char *strextend_with_separator(char **x, const char *separator, ...) { + bool need_separator; + size_t f, l, l_separator; + char *r, *p; + va_list ap; + + assert(x); + + l = f = strlen_ptr(*x); + + need_separator = !isempty(*x); + l_separator = strlen_ptr(separator); + + va_start(ap, separator); + for (;;) { + const char *t; + size_t n; + + t = va_arg(ap, const char *); + if (!t) + break; + + n = strlen(t); + + if (need_separator) + n += l_separator; + + if (n > ((size_t) -1) - l) { + va_end(ap); + return NULL; + } + + l += n; + need_separator = true; + } + va_end(ap); + + need_separator = !isempty(*x); + + r = realloc(*x, l+1); + if (!r) + return NULL; + + p = r + f; + + va_start(ap, separator); + for (;;) { + const char *t; + + t = va_arg(ap, const char *); + if (!t) + break; + + if (need_separator && separator) + p = stpcpy(p, separator); + + p = stpcpy(p, t); + + need_separator = true; + } + va_end(ap); + + assert(p == r + l); + + *p = 0; + *x = r; + + return r + l; +} + +char *strrep(const char *s, unsigned n) { + size_t l; + char *r, *p; + unsigned i; + + assert(s); + + l = strlen(s); + p = r = malloc(l * n + 1); + if (!r) + return NULL; + + for (i = 0; i < n; i++) + p = stpcpy(p, s); + + *p = 0; + return r; +} + +int split_pair(const char *s, const char *sep, char **l, char **r) { + char *x, *a, *b; + + assert(s); + assert(sep); + assert(l); + assert(r); + + if (isempty(sep)) + return -EINVAL; + + x = strstr(s, sep); + if (!x) + return -EINVAL; + + a = strndup(s, x - s); + if (!a) + return -ENOMEM; + + b = strdup(x + strlen(sep)); + if (!b) { + free(a); + return -ENOMEM; + } + + *l = a; + *r = b; + + return 0; +} + +int free_and_strdup(char **p, const char *s) { + char *t; + + assert(p); + + /* Replaces a string pointer with a strdup()ed new string, + * possibly freeing the old one. */ + + if (streq_ptr(*p, s)) + return 0; + + if (s) { + t = strdup(s); + if (!t) + return -ENOMEM; + } else + t = NULL; + + free(*p); + *p = t; + + return 1; +} + +int free_and_strndup(char **p, const char *s, size_t l) { + char *t; + + assert(p); + assert(s || l == 0); + + /* Replaces a string pointer with a strndup()ed new string, + * freeing the old one. */ + + if (!*p && !s) + return 0; + + if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0')) + return 0; + + if (s) { + t = strndup(s, l); + if (!t) + return -ENOMEM; + } else + t = NULL; + + free_and_replace(*p, t); + return 1; +} + +#if !HAVE_EXPLICIT_BZERO +/* + * Pointer to memset is volatile so that compiler must de-reference + * the pointer and can't assume that it points to any function in + * particular (such as memset, which it then might further "optimize") + * This approach is inspired by openssl's crypto/mem_clr.c. + */ +typedef void *(*memset_t)(void *,int,size_t); + +static volatile memset_t memset_func = memset; + +void* explicit_bzero_safe(void *p, size_t l) { + if (l > 0) + memset_func(p, '\0', l); + + return p; +} +#endif + +char* string_erase(char *x) { + if (!x) + return NULL; + + /* A delicious drop of snake-oil! To be called on memory where + * we stored passphrases or so, after we used them. */ + explicit_bzero_safe(x, strlen(x)); + return x; +} + +char *string_free_erase(char *s) { + return mfree(string_erase(s)); +} + +bool string_is_safe(const char *p) { + const char *t; + + if (!p) + return false; + + for (t = p; *t; t++) { + if (*t > 0 && *t < ' ') /* no control characters */ + return false; + + if (strchr(QUOTES "\\\x7f", *t)) + return false; + } + + return true; +} diff --git a/src/basic/string-util.h b/src/basic/string-util.h new file mode 100644 index 0000000..38070ab --- /dev/null +++ b/src/basic/string-util.h @@ -0,0 +1,266 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <alloca.h> +#include <stdbool.h> +#include <stddef.h> +#include <string.h> + +#include "alloc-util.h" +#include "macro.h" + +/* What is interpreted as whitespace? */ +#define WHITESPACE " \t\n\r" +#define NEWLINE "\n\r" +#define QUOTES "\"\'" +#define COMMENTS "#;" +#define GLOB_CHARS "*?[" +#define DIGITS "0123456789" +#define LOWERCASE_LETTERS "abcdefghijklmnopqrstuvwxyz" +#define UPPERCASE_LETTERS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define LETTERS LOWERCASE_LETTERS UPPERCASE_LETTERS +#define ALPHANUMERICAL LETTERS DIGITS +#define HEXDIGITS DIGITS "abcdefABCDEF" + +#define streq(a,b) (strcmp((a),(b)) == 0) +#define strneq(a, b, n) (strncmp((a), (b), (n)) == 0) +#define strcaseeq(a,b) (strcasecmp((a),(b)) == 0) +#define strncaseeq(a, b, n) (strncasecmp((a), (b), (n)) == 0) + +int strcmp_ptr(const char *a, const char *b) _pure_; + +static inline bool streq_ptr(const char *a, const char *b) { + return strcmp_ptr(a, b) == 0; +} + +static inline const char* strempty(const char *s) { + return s ?: ""; +} + +static inline const char* strnull(const char *s) { + return s ?: "(null)"; +} + +static inline const char *strna(const char *s) { + return s ?: "n/a"; +} + +static inline bool isempty(const char *p) { + return !p || !p[0]; +} + +static inline const char *empty_to_null(const char *p) { + return isempty(p) ? NULL : p; +} + +static inline const char *empty_to_dash(const char *str) { + return isempty(str) ? "-" : str; +} + +static inline char *startswith(const char *s, const char *prefix) { + size_t l; + + l = strlen(prefix); + if (strncmp(s, prefix, l) == 0) + return (char*) s + l; + + return NULL; +} + +static inline char *startswith_no_case(const char *s, const char *prefix) { + size_t l; + + l = strlen(prefix); + if (strncasecmp(s, prefix, l) == 0) + return (char*) s + l; + + return NULL; +} + +char *endswith(const char *s, const char *postfix) _pure_; +char *endswith_no_case(const char *s, const char *postfix) _pure_; + +char *first_word(const char *s, const char *word) _pure_; + +typedef enum SplitFlags { + SPLIT_QUOTES = 0x01 << 0, + SPLIT_RELAX = 0x01 << 1, +} SplitFlags; + +const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags); + +#define FOREACH_WORD(word, length, s, state) \ + _FOREACH_WORD(word, length, s, WHITESPACE, 0, state) + +#define FOREACH_WORD_SEPARATOR(word, length, s, separator, state) \ + _FOREACH_WORD(word, length, s, separator, 0, state) + +#define _FOREACH_WORD(word, length, s, separator, flags, state) \ + for ((state) = (s), (word) = split(&(state), &(length), (separator), (flags)); (word); (word) = split(&(state), &(length), (separator), (flags))) + +char *strappend(const char *s, const char *suffix); +char *strnappend(const char *s, const char *suffix, size_t length); + +char *strjoin_real(const char *x, ...) _sentinel_; +#define strjoin(a, ...) strjoin_real((a), __VA_ARGS__, NULL) + +#define strjoina(a, ...) \ + ({ \ + const char *_appendees_[] = { a, __VA_ARGS__ }; \ + char *_d_, *_p_; \ + size_t _len_ = 0; \ + size_t _i_; \ + for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \ + _len_ += strlen(_appendees_[_i_]); \ + _p_ = _d_ = newa(char, _len_ + 1); \ + for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \ + _p_ = stpcpy(_p_, _appendees_[_i_]); \ + *_p_ = 0; \ + _d_; \ + }) + +char *strstrip(char *s); +char *delete_chars(char *s, const char *bad); +char *delete_trailing_chars(char *s, const char *bad); +char *truncate_nl(char *s); + +static inline char *skip_leading_chars(const char *s, const char *bad) { + + if (!s) + return NULL; + + if (!bad) + bad = WHITESPACE; + + return (char*) s + strspn(s, bad); +} + +char ascii_tolower(char x); +char *ascii_strlower(char *s); +char *ascii_strlower_n(char *s, size_t n); + +char ascii_toupper(char x); +char *ascii_strupper(char *s); + +int ascii_strcasecmp_n(const char *a, const char *b, size_t n); +int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m); + +bool chars_intersect(const char *a, const char *b) _pure_; + +static inline bool _pure_ in_charset(const char *s, const char* charset) { + assert(s); + assert(charset); + return s[strspn(s, charset)] == '\0'; +} + +bool string_has_cc(const char *p, const char *ok) _pure_; + +char *ellipsize_mem(const char *s, size_t old_length_bytes, size_t new_length_columns, unsigned percent); +static inline char *ellipsize(const char *s, size_t length, unsigned percent) { + return ellipsize_mem(s, strlen(s), length, percent); +} + +char *cellescape(char *buf, size_t len, const char *s); + +/* This limit is arbitrary, enough to give some idea what the string contains */ +#define CELLESCAPE_DEFAULT_LENGTH 64 + +bool nulstr_contains(const char *nulstr, const char *needle); + +char* strshorten(char *s, size_t l); + +char *strreplace(const char *text, const char *old_string, const char *new_string); + +char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]); + +char *strextend_with_separator(char **x, const char *separator, ...) _sentinel_; + +#define strextend(x, ...) strextend_with_separator(x, NULL, __VA_ARGS__) + +char *strrep(const char *s, unsigned n); + +int split_pair(const char *s, const char *sep, char **l, char **r); + +int free_and_strdup(char **p, const char *s); +int free_and_strndup(char **p, const char *s, size_t l); + +/* Normal memmem() requires haystack to be nonnull, which is annoying for zero-length buffers */ +static inline void *memmem_safe(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { + + if (needlelen <= 0) + return (void*) haystack; + + if (haystacklen < needlelen) + return NULL; + + assert(haystack); + assert(needle); + + return memmem(haystack, haystacklen, needle, needlelen); +} + +#if HAVE_EXPLICIT_BZERO +static inline void* explicit_bzero_safe(void *p, size_t l) { + if (l > 0) + explicit_bzero(p, l); + + return p; +} +#else +void *explicit_bzero_safe(void *p, size_t l); +#endif + +char *string_erase(char *x); + +char *string_free_erase(char *s); +DEFINE_TRIVIAL_CLEANUP_FUNC(char *, string_free_erase); +#define _cleanup_string_free_erase_ _cleanup_(string_free_erasep) + +bool string_is_safe(const char *p) _pure_; + +static inline size_t strlen_ptr(const char *s) { + if (!s) + return 0; + + return strlen(s); +} + +/* Like startswith(), but operates on arbitrary memory blocks */ +static inline void *memory_startswith(const void *p, size_t sz, const char *token) { + size_t n; + + assert(token); + + n = strlen(token); + if (sz < n) + return NULL; + + assert(p); + + if (memcmp(p, token, n) != 0) + return NULL; + + return (uint8_t*) p + n; +} + +/* Like startswith_no_case(), but operates on arbitrary memory blocks. + * It works only for ASCII strings. + */ +static inline void *memory_startswith_no_case(const void *p, size_t sz, const char *token) { + size_t n, i; + + assert(token); + + n = strlen(token); + if (sz < n) + return NULL; + + assert(p); + + for (i = 0; i < n; i++) { + if (ascii_tolower(((char *)p)[i]) != ascii_tolower(token[i])) + return NULL; + } + + return (uint8_t*) p + n; +} diff --git a/src/basic/strv.c b/src/basic/strv.c new file mode 100644 index 0000000..3a62f25 --- /dev/null +++ b/src/basic/strv.c @@ -0,0 +1,889 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fnmatch.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "escape.h" +#include "extract-word.h" +#include "fileio.h" +#include "string-util.h" +#include "strv.h" +#include "util.h" + +char *strv_find(char **l, const char *name) { + char **i; + + assert(name); + + STRV_FOREACH(i, l) + if (streq(*i, name)) + return *i; + + return NULL; +} + +char *strv_find_prefix(char **l, const char *name) { + char **i; + + assert(name); + + STRV_FOREACH(i, l) + if (startswith(*i, name)) + return *i; + + return NULL; +} + +char *strv_find_startswith(char **l, const char *name) { + char **i, *e; + + assert(name); + + /* Like strv_find_prefix, but actually returns only the + * suffix, not the whole item */ + + STRV_FOREACH(i, l) { + e = startswith(*i, name); + if (e) + return e; + } + + return NULL; +} + +void strv_clear(char **l) { + char **k; + + if (!l) + return; + + for (k = l; *k; k++) + free(*k); + + *l = NULL; +} + +char **strv_free(char **l) { + strv_clear(l); + return mfree(l); +} + +char **strv_free_erase(char **l) { + char **i; + + STRV_FOREACH(i, l) + string_erase(*i); + + return strv_free(l); +} + +char **strv_copy(char * const *l) { + char **r, **k; + + k = r = new(char*, strv_length(l) + 1); + if (!r) + return NULL; + + if (l) + for (; *l; k++, l++) { + *k = strdup(*l); + if (!*k) { + strv_free(r); + return NULL; + } + } + + *k = NULL; + return r; +} + +size_t strv_length(char * const *l) { + size_t n = 0; + + if (!l) + return 0; + + for (; *l; l++) + n++; + + return n; +} + +char **strv_new_ap(const char *x, va_list ap) { + const char *s; + _cleanup_strv_free_ char **a = NULL; + size_t n = 0, i = 0; + va_list aq; + + /* As a special trick we ignore all listed strings that equal + * STRV_IGNORE. This is supposed to be used with the + * STRV_IFNOTNULL() macro to include possibly NULL strings in + * the string list. */ + + if (x) { + n = x == STRV_IGNORE ? 0 : 1; + + va_copy(aq, ap); + while ((s = va_arg(aq, const char*))) { + if (s == STRV_IGNORE) + continue; + + n++; + } + + va_end(aq); + } + + a = new(char*, n+1); + if (!a) + return NULL; + + if (x) { + if (x != STRV_IGNORE) { + a[i] = strdup(x); + if (!a[i]) + return NULL; + i++; + } + + while ((s = va_arg(ap, const char*))) { + + if (s == STRV_IGNORE) + continue; + + a[i] = strdup(s); + if (!a[i]) + return NULL; + + i++; + } + } + + a[i] = NULL; + + return TAKE_PTR(a); +} + +char **strv_new_internal(const char *x, ...) { + char **r; + va_list ap; + + va_start(ap, x); + r = strv_new_ap(x, ap); + va_end(ap); + + return r; +} + +int strv_extend_strv(char ***a, char **b, bool filter_duplicates) { + char **s, **t; + size_t p, q, i = 0, j; + + assert(a); + + if (strv_isempty(b)) + return 0; + + p = strv_length(*a); + q = strv_length(b); + + t = reallocarray(*a, p + q + 1, sizeof(char *)); + if (!t) + return -ENOMEM; + + t[p] = NULL; + *a = t; + + STRV_FOREACH(s, b) { + + if (filter_duplicates && strv_contains(t, *s)) + continue; + + t[p+i] = strdup(*s); + if (!t[p+i]) + goto rollback; + + i++; + t[p+i] = NULL; + } + + assert(i <= q); + + return (int) i; + +rollback: + for (j = 0; j < i; j++) + free(t[p + j]); + + t[p] = NULL; + return -ENOMEM; +} + +int strv_extend_strv_concat(char ***a, char **b, const char *suffix) { + int r; + char **s; + + STRV_FOREACH(s, b) { + char *v; + + v = strappend(*s, suffix); + if (!v) + return -ENOMEM; + + r = strv_push(a, v); + if (r < 0) { + free(v); + return r; + } + } + + return 0; +} + +char **strv_split_full(const char *s, const char *separator, SplitFlags flags) { + const char *word, *state; + size_t l; + size_t n, i; + char **r; + + assert(s); + + if (!separator) + separator = WHITESPACE; + + s += strspn(s, separator); + if (isempty(s)) + return new0(char*, 1); + + n = 0; + _FOREACH_WORD(word, l, s, separator, flags, state) + n++; + + r = new(char*, n+1); + if (!r) + return NULL; + + i = 0; + _FOREACH_WORD(word, l, s, separator, flags, state) { + r[i] = strndup(word, l); + if (!r[i]) { + strv_free(r); + return NULL; + } + + i++; + } + + r[i] = NULL; + return r; +} + +char **strv_split_newlines(const char *s) { + char **l; + size_t n; + + assert(s); + + /* Special version of strv_split() that splits on newlines and + * suppresses an empty string at the end */ + + l = strv_split(s, NEWLINE); + if (!l) + return NULL; + + n = strv_length(l); + if (n <= 0) + return l; + + if (isempty(l[n - 1])) + l[n - 1] = mfree(l[n - 1]); + + return l; +} + +int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags) { + _cleanup_strv_free_ char **l = NULL; + size_t n = 0, allocated = 0; + int r; + + assert(t); + assert(s); + + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&s, &word, separators, flags); + if (r < 0) + return r; + if (r == 0) + break; + + if (!GREEDY_REALLOC(l, allocated, n + 2)) + return -ENOMEM; + + l[n++] = TAKE_PTR(word); + + l[n] = NULL; + } + + if (!l) { + l = new0(char*, 1); + if (!l) + return -ENOMEM; + } + + *t = TAKE_PTR(l); + + return (int) n; +} + +char *strv_join_prefix(char **l, const char *separator, const char *prefix) { + char *r, *e; + char **s; + size_t n, k, m; + + if (!separator) + separator = " "; + + k = strlen(separator); + m = strlen_ptr(prefix); + + n = 0; + STRV_FOREACH(s, l) { + if (s != l) + n += k; + n += m + strlen(*s); + } + + r = new(char, n+1); + if (!r) + return NULL; + + e = r; + STRV_FOREACH(s, l) { + if (s != l) + e = stpcpy(e, separator); + + if (prefix) + e = stpcpy(e, prefix); + + e = stpcpy(e, *s); + } + + *e = 0; + + return r; +} + +int strv_push(char ***l, char *value) { + char **c; + size_t n, m; + + if (!value) + return 0; + + n = strv_length(*l); + + /* Increase and check for overflow */ + m = n + 2; + if (m < n) + return -ENOMEM; + + c = reallocarray(*l, m, sizeof(char*)); + if (!c) + return -ENOMEM; + + c[n] = value; + c[n+1] = NULL; + + *l = c; + return 0; +} + +int strv_push_pair(char ***l, char *a, char *b) { + char **c; + size_t n, m; + + if (!a && !b) + return 0; + + n = strv_length(*l); + + /* increase and check for overflow */ + m = n + !!a + !!b + 1; + if (m < n) + return -ENOMEM; + + c = reallocarray(*l, m, sizeof(char*)); + if (!c) + return -ENOMEM; + + if (a) + c[n++] = a; + if (b) + c[n++] = b; + c[n] = NULL; + + *l = c; + return 0; +} + +int strv_insert(char ***l, size_t position, char *value) { + char **c; + size_t n, m, i; + + if (!value) + return 0; + + n = strv_length(*l); + position = MIN(position, n); + + /* increase and check for overflow */ + m = n + 2; + if (m < n) + return -ENOMEM; + + c = new(char*, m); + if (!c) + return -ENOMEM; + + for (i = 0; i < position; i++) + c[i] = (*l)[i]; + c[position] = value; + for (i = position; i < n; i++) + c[i+1] = (*l)[i]; + + c[n+1] = NULL; + + free(*l); + *l = c; + + return 0; +} + +int strv_consume(char ***l, char *value) { + int r; + + r = strv_push(l, value); + if (r < 0) + free(value); + + return r; +} + +int strv_consume_pair(char ***l, char *a, char *b) { + int r; + + r = strv_push_pair(l, a, b); + if (r < 0) { + free(a); + free(b); + } + + return r; +} + +int strv_consume_prepend(char ***l, char *value) { + int r; + + r = strv_push_prepend(l, value); + if (r < 0) + free(value); + + return r; +} + +int strv_extend(char ***l, const char *value) { + char *v; + + if (!value) + return 0; + + v = strdup(value); + if (!v) + return -ENOMEM; + + return strv_consume(l, v); +} + +int strv_extend_front(char ***l, const char *value) { + size_t n, m; + char *v, **c; + + assert(l); + + /* Like strv_extend(), but prepends rather than appends the new entry */ + + if (!value) + return 0; + + n = strv_length(*l); + + /* Increase and overflow check. */ + m = n + 2; + if (m < n) + return -ENOMEM; + + v = strdup(value); + if (!v) + return -ENOMEM; + + c = reallocarray(*l, m, sizeof(char*)); + if (!c) { + free(v); + return -ENOMEM; + } + + memmove(c+1, c, n * sizeof(char*)); + c[0] = v; + c[n+1] = NULL; + + *l = c; + return 0; +} + +char **strv_uniq(char **l) { + char **i; + + /* Drops duplicate entries. The first identical string will be + * kept, the others dropped */ + + STRV_FOREACH(i, l) + strv_remove(i+1, *i); + + return l; +} + +bool strv_is_uniq(char **l) { + char **i; + + STRV_FOREACH(i, l) + if (strv_find(i+1, *i)) + return false; + + return true; +} + +char **strv_remove(char **l, const char *s) { + char **f, **t; + + if (!l) + return NULL; + + assert(s); + + /* Drops every occurrence of s in the string list, edits + * in-place. */ + + for (f = t = l; *f; f++) + if (streq(*f, s)) + free(*f); + else + *(t++) = *f; + + *t = NULL; + return l; +} + +char **strv_parse_nulstr(const char *s, size_t l) { + /* l is the length of the input data, which will be split at NULs into + * elements of the resulting strv. Hence, the number of items in the resulting strv + * will be equal to one plus the number of NUL bytes in the l bytes starting at s, + * unless s[l-1] is NUL, in which case the final empty string is not stored in + * the resulting strv, and length is equal to the number of NUL bytes. + * + * Note that contrary to a normal nulstr which cannot contain empty strings, because + * the input data is terminated by any two consequent NUL bytes, this parser accepts + * empty strings in s. + */ + + const char *p; + size_t c = 0, i = 0; + char **v; + + assert(s || l <= 0); + + if (l <= 0) + return new0(char*, 1); + + for (p = s; p < s + l; p++) + if (*p == 0) + c++; + + if (s[l-1] != 0) + c++; + + v = new0(char*, c+1); + if (!v) + return NULL; + + p = s; + while (p < s + l) { + const char *e; + + e = memchr(p, 0, s + l - p); + + v[i] = strndup(p, e ? e - p : s + l - p); + if (!v[i]) { + strv_free(v); + return NULL; + } + + i++; + + if (!e) + break; + + p = e + 1; + } + + assert(i == c); + + return v; +} + +char **strv_split_nulstr(const char *s) { + const char *i; + char **r = NULL; + + NULSTR_FOREACH(i, s) + if (strv_extend(&r, i) < 0) { + strv_free(r); + return NULL; + } + + if (!r) + return strv_new(NULL); + + return r; +} + +int strv_make_nulstr(char **l, char **p, size_t *q) { + /* A valid nulstr with two NULs at the end will be created, but + * q will be the length without the two trailing NULs. Thus the output + * string is a valid nulstr and can be iterated over using NULSTR_FOREACH, + * and can also be parsed by strv_parse_nulstr as long as the length + * is provided separately. + */ + + size_t n_allocated = 0, n = 0; + _cleanup_free_ char *m = NULL; + char **i; + + assert(p); + assert(q); + + STRV_FOREACH(i, l) { + size_t z; + + z = strlen(*i); + + if (!GREEDY_REALLOC(m, n_allocated, n + z + 2)) + return -ENOMEM; + + memcpy(m + n, *i, z + 1); + n += z + 1; + } + + if (!m) { + m = new0(char, 1); + if (!m) + return -ENOMEM; + n = 1; + } else + /* make sure there is a second extra NUL at the end of resulting nulstr */ + m[n] = '\0'; + + assert(n > 0); + *p = m; + *q = n - 1; + + m = NULL; + + return 0; +} + +bool strv_overlap(char **a, char **b) { + char **i; + + STRV_FOREACH(i, a) + if (strv_contains(b, *i)) + return true; + + return false; +} + +static int str_compare(char * const *a, char * const *b) { + return strcmp(*a, *b); +} + +char **strv_sort(char **l) { + typesafe_qsort(l, strv_length(l), str_compare); + return l; +} + +bool strv_equal(char **a, char **b) { + + if (strv_isempty(a)) + return strv_isempty(b); + + if (strv_isempty(b)) + return false; + + for ( ; *a || *b; ++a, ++b) + if (!streq_ptr(*a, *b)) + return false; + + return true; +} + +void strv_print(char **l) { + char **s; + + STRV_FOREACH(s, l) + puts(*s); +} + +int strv_extendf(char ***l, const char *format, ...) { + va_list ap; + char *x; + int r; + + va_start(ap, format); + r = vasprintf(&x, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return strv_consume(l, x); +} + +char **strv_reverse(char **l) { + size_t n, i; + + n = strv_length(l); + if (n <= 1) + return l; + + for (i = 0; i < n / 2; i++) + SWAP_TWO(l[i], l[n-1-i]); + + return l; +} + +char **strv_shell_escape(char **l, const char *bad) { + char **s; + + /* Escapes every character in every string in l that is in bad, + * edits in-place, does not roll-back on error. */ + + STRV_FOREACH(s, l) { + char *v; + + v = shell_escape(*s, bad); + if (!v) + return NULL; + + free(*s); + *s = v; + } + + return l; +} + +bool strv_fnmatch(char* const* patterns, const char *s, int flags) { + char* const* p; + + STRV_FOREACH(p, patterns) + if (fnmatch(*p, s, flags) == 0) + return true; + + return false; +} + +char ***strv_free_free(char ***l) { + char ***i; + + if (!l) + return NULL; + + for (i = l; *i; i++) + strv_free(*i); + + return mfree(l); +} + +char **strv_skip(char **l, size_t n) { + + while (n > 0) { + if (strv_isempty(l)) + return l; + + l++, n--; + } + + return l; +} + +int strv_extend_n(char ***l, const char *value, size_t n) { + size_t i, j, k; + char **nl; + + assert(l); + + if (!value) + return 0; + if (n == 0) + return 0; + + /* Adds the value n times to l */ + + k = strv_length(*l); + + nl = reallocarray(*l, k + n + 1, sizeof(char *)); + if (!nl) + return -ENOMEM; + + *l = nl; + + for (i = k; i < k + n; i++) { + nl[i] = strdup(value); + if (!nl[i]) + goto rollback; + } + + nl[i] = NULL; + return 0; + +rollback: + for (j = k; j < i; j++) + free(nl[j]); + + nl[k] = NULL; + return -ENOMEM; +} + +int fputstrv(FILE *f, char **l, const char *separator, bool *space) { + bool b = false; + char **s; + int r; + + /* Like fputs(), but for strv, and with a less stupid argument order */ + + if (!space) + space = &b; + + STRV_FOREACH(s, l) { + r = fputs_with_space(f, *s, separator, space); + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/basic/strv.h b/src/basic/strv.h new file mode 100644 index 0000000..392cab6 --- /dev/null +++ b/src/basic/strv.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <fnmatch.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stddef.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "macro.h" +#include "string-util.h" +#include "util.h" + +char *strv_find(char **l, const char *name) _pure_; +char *strv_find_prefix(char **l, const char *name) _pure_; +char *strv_find_startswith(char **l, const char *name) _pure_; + +char **strv_free(char **l); +DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free); +#define _cleanup_strv_free_ _cleanup_(strv_freep) + +char **strv_free_erase(char **l); +DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free_erase); +#define _cleanup_strv_free_erase_ _cleanup_(strv_free_erasep) + +void strv_clear(char **l); + +char **strv_copy(char * const *l); +size_t strv_length(char * const *l) _pure_; + +int strv_extend_strv(char ***a, char **b, bool filter_duplicates); +int strv_extend_strv_concat(char ***a, char **b, const char *suffix); +int strv_extend(char ***l, const char *value); +int strv_extendf(char ***l, const char *format, ...) _printf_(2,0); +int strv_extend_front(char ***l, const char *value); +int strv_push(char ***l, char *value); +int strv_push_pair(char ***l, char *a, char *b); +int strv_insert(char ***l, size_t position, char *value); + +static inline int strv_push_prepend(char ***l, char *value) { + return strv_insert(l, 0, value); +} + +int strv_consume(char ***l, char *value); +int strv_consume_pair(char ***l, char *a, char *b); +int strv_consume_prepend(char ***l, char *value); + +char **strv_remove(char **l, const char *s); +char **strv_uniq(char **l); +bool strv_is_uniq(char **l); + +bool strv_equal(char **a, char **b); + +#define strv_contains(l, s) (!!strv_find((l), (s))) + +char **strv_new_internal(const char *x, ...) _sentinel_; +char **strv_new_ap(const char *x, va_list ap); +#define strv_new(...) strv_new_internal(__VA_ARGS__, NULL) + +#define STRV_IGNORE ((const char *) -1) + +static inline const char* STRV_IFNOTNULL(const char *x) { + return x ? x : STRV_IGNORE; +} + +static inline bool strv_isempty(char * const *l) { + return !l || !*l; +} + +char **strv_split_full(const char *s, const char *separator, SplitFlags flags); +static inline char **strv_split(const char *s, const char *separator) { + return strv_split_full(s, separator, 0); +} +char **strv_split_newlines(const char *s); + +int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags); + +char *strv_join_prefix(char **l, const char *separator, const char *prefix); +static inline char *strv_join(char **l, const char *separator) { + return strv_join_prefix(l, separator, NULL); +} + +char **strv_parse_nulstr(const char *s, size_t l); +char **strv_split_nulstr(const char *s); +int strv_make_nulstr(char **l, char **p, size_t *n); + +bool strv_overlap(char **a, char **b) _pure_; + +#define STRV_FOREACH(s, l) \ + for ((s) = (l); (s) && *(s); (s)++) + +#define STRV_FOREACH_BACKWARDS(s, l) \ + for (s = ({ \ + char **_l = l; \ + _l ? _l + strv_length(_l) - 1U : NULL; \ + }); \ + (l) && ((s) >= (l)); \ + (s)--) + +#define STRV_FOREACH_PAIR(x, y, l) \ + for ((x) = (l), (y) = (x+1); (x) && *(x) && *(y); (x) += 2, (y) = (x + 1)) + +char **strv_sort(char **l); +void strv_print(char **l); + +#define STRV_MAKE(...) ((char**) ((const char*[]) { __VA_ARGS__, NULL })) + +#define STRV_MAKE_EMPTY ((char*[1]) { NULL }) + +#define strv_from_stdarg_alloca(first) \ + ({ \ + char **_l; \ + \ + if (!first) \ + _l = (char**) &first; \ + else { \ + size_t _n; \ + va_list _ap; \ + \ + _n = 1; \ + va_start(_ap, first); \ + while (va_arg(_ap, char*)) \ + _n++; \ + va_end(_ap); \ + \ + _l = newa(char*, _n+1); \ + _l[_n = 0] = (char*) first; \ + va_start(_ap, first); \ + for (;;) { \ + _l[++_n] = va_arg(_ap, char*); \ + if (!_l[_n]) \ + break; \ + } \ + va_end(_ap); \ + } \ + _l; \ + }) + +#define STR_IN_SET(x, ...) strv_contains(STRV_MAKE(__VA_ARGS__), x) +#define STRPTR_IN_SET(x, ...) \ + ({ \ + const char* _x = (x); \ + _x && strv_contains(STRV_MAKE(__VA_ARGS__), _x); \ + }) + +#define STARTSWITH_SET(p, ...) \ + ({ \ + const char *_p = (p); \ + char *_found = NULL, **_i; \ + STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \ + _found = startswith(_p, *_i); \ + if (_found) \ + break; \ + } \ + _found; \ + }) + +#define FOREACH_STRING(x, y, ...) \ + for (char **_l = STRV_MAKE(({ x = y; }), ##__VA_ARGS__); \ + x; \ + x = *(++_l)) + +char **strv_reverse(char **l); +char **strv_shell_escape(char **l, const char *bad); + +bool strv_fnmatch(char* const* patterns, const char *s, int flags); + +static inline bool strv_fnmatch_or_empty(char* const* patterns, const char *s, int flags) { + assert(s); + return strv_isempty(patterns) || + strv_fnmatch(patterns, s, flags); +} + +char ***strv_free_free(char ***l); +DEFINE_TRIVIAL_CLEANUP_FUNC(char***, strv_free_free); + +char **strv_skip(char **l, size_t n); + +int strv_extend_n(char ***l, const char *value, size_t n); + +int fputstrv(FILE *f, char **l, const char *separator, bool *space); + +#define strv_free_and_replace(a, b) \ + ({ \ + strv_free(a); \ + (a) = (b); \ + (b) = NULL; \ + 0; \ + }) diff --git a/src/basic/strxcpyx.c b/src/basic/strxcpyx.c new file mode 100644 index 0000000..9210277 --- /dev/null +++ b/src/basic/strxcpyx.c @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +/* + * Concatenates/copies strings. In any case, terminates in all cases + * with '\0' and moves the @dest pointer forward to the added '\0'. + * Returns the remaining size, and 0 if the string was truncated. + * + * Due to the intended usage, these helpers silently noop invocations + * having zero size. This is technically an exception to the above + * statement "terminates in all cases". It's unexpected for such calls to + * occur outside of a loop where this is the preferred behavior. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +#include "strxcpyx.h" + +size_t strpcpy(char **dest, size_t size, const char *src) { + size_t len; + + assert(dest); + assert(src); + + if (size == 0) + return 0; + + len = strlen(src); + if (len >= size) { + if (size > 1) + *dest = mempcpy(*dest, src, size-1); + size = 0; + } else if (len > 0) { + *dest = mempcpy(*dest, src, len); + size -= len; + } + + *dest[0] = '\0'; + return size; +} + +size_t strpcpyf(char **dest, size_t size, const char *src, ...) { + va_list va; + int i; + + assert(dest); + assert(src); + + if (size == 0) + return 0; + + va_start(va, src); + i = vsnprintf(*dest, size, src, va); + if (i < (int)size) { + *dest += i; + size -= i; + } else + size = 0; + va_end(va); + return size; +} + +size_t strpcpyl(char **dest, size_t size, const char *src, ...) { + va_list va; + + assert(dest); + assert(src); + + va_start(va, src); + do { + size = strpcpy(dest, size, src); + src = va_arg(va, char *); + } while (src); + va_end(va); + return size; +} + +size_t strscpy(char *dest, size_t size, const char *src) { + char *s; + + assert(dest); + assert(src); + + s = dest; + return strpcpy(&s, size, src); +} + +size_t strscpyl(char *dest, size_t size, const char *src, ...) { + va_list va; + char *s; + + assert(dest); + assert(src); + + va_start(va, src); + s = dest; + do { + size = strpcpy(&s, size, src); + src = va_arg(va, char *); + } while (src); + va_end(va); + + return size; +} diff --git a/src/basic/strxcpyx.h b/src/basic/strxcpyx.h new file mode 100644 index 0000000..0f2b749 --- /dev/null +++ b/src/basic/strxcpyx.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stddef.h> + +#include "macro.h" + +size_t strpcpy(char **dest, size_t size, const char *src); +size_t strpcpyf(char **dest, size_t size, const char *src, ...) _printf_(3, 4); +size_t strpcpyl(char **dest, size_t size, const char *src, ...) _sentinel_; +size_t strscpy(char *dest, size_t size, const char *src); +size_t strscpyl(char *dest, size_t size, const char *src, ...) _sentinel_; diff --git a/src/basic/syslog-util.c b/src/basic/syslog-util.c new file mode 100644 index 0000000..fe12948 --- /dev/null +++ b/src/basic/syslog-util.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <string.h> +#include <syslog.h> + +#include "hexdecoct.h" +#include "macro.h" +#include "string-table.h" +#include "syslog-util.h" + +int syslog_parse_priority(const char **p, int *priority, bool with_facility) { + int a = 0, b = 0, c = 0; + const char *end; + size_t k; + + assert(p); + assert(*p); + assert(priority); + + if ((*p)[0] != '<') + return 0; + + end = strchr(*p, '>'); + if (!end) + return 0; + + k = end - *p; + assert(k > 0); + + if (k == 2) + c = undecchar((*p)[1]); + else if (k == 3) { + b = undecchar((*p)[1]); + c = undecchar((*p)[2]); + } else if (k == 4) { + a = undecchar((*p)[1]); + b = undecchar((*p)[2]); + c = undecchar((*p)[3]); + } else + return 0; + + if (a < 0 || b < 0 || c < 0 || + (!with_facility && (a || b || c > 7))) + return 0; + + if (with_facility) + *priority = a*100 + b*10 + c; + else + *priority = (*priority & LOG_FACMASK) | c; + + *p += k + 1; + return 1; +} + +static const char *const log_facility_unshifted_table[LOG_NFACILITIES] = { + [LOG_FAC(LOG_KERN)] = "kern", + [LOG_FAC(LOG_USER)] = "user", + [LOG_FAC(LOG_MAIL)] = "mail", + [LOG_FAC(LOG_DAEMON)] = "daemon", + [LOG_FAC(LOG_AUTH)] = "auth", + [LOG_FAC(LOG_SYSLOG)] = "syslog", + [LOG_FAC(LOG_LPR)] = "lpr", + [LOG_FAC(LOG_NEWS)] = "news", + [LOG_FAC(LOG_UUCP)] = "uucp", + [LOG_FAC(LOG_CRON)] = "cron", + [LOG_FAC(LOG_AUTHPRIV)] = "authpriv", + [LOG_FAC(LOG_FTP)] = "ftp", + [LOG_FAC(LOG_LOCAL0)] = "local0", + [LOG_FAC(LOG_LOCAL1)] = "local1", + [LOG_FAC(LOG_LOCAL2)] = "local2", + [LOG_FAC(LOG_LOCAL3)] = "local3", + [LOG_FAC(LOG_LOCAL4)] = "local4", + [LOG_FAC(LOG_LOCAL5)] = "local5", + [LOG_FAC(LOG_LOCAL6)] = "local6", + [LOG_FAC(LOG_LOCAL7)] = "local7" +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_facility_unshifted, int, LOG_FAC(~0)); + +bool log_facility_unshifted_is_valid(int facility) { + return facility >= 0 && facility <= LOG_FAC(~0); +} + +static const char *const log_level_table[] = { + [LOG_EMERG] = "emerg", + [LOG_ALERT] = "alert", + [LOG_CRIT] = "crit", + [LOG_ERR] = "err", + [LOG_WARNING] = "warning", + [LOG_NOTICE] = "notice", + [LOG_INFO] = "info", + [LOG_DEBUG] = "debug" +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_level, int, LOG_DEBUG); + +bool log_level_is_valid(int level) { + return level >= 0 && level <= LOG_DEBUG; +} diff --git a/src/basic/syslog-util.h b/src/basic/syslog-util.h new file mode 100644 index 0000000..8f419e8 --- /dev/null +++ b/src/basic/syslog-util.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +int log_facility_unshifted_to_string_alloc(int i, char **s); +int log_facility_unshifted_from_string(const char *s); +bool log_facility_unshifted_is_valid(int faciliy); + +int log_level_to_string_alloc(int i, char **s); +int log_level_from_string(const char *s); +bool log_level_is_valid(int level); + +int syslog_parse_priority(const char **p, int *priority, bool with_facility); diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c new file mode 100644 index 0000000..0f38120 --- /dev/null +++ b/src/basic/terminal-util.c @@ -0,0 +1,1318 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/kd.h> +#include <linux/tiocl.h> +#include <linux/vt.h> +#include <poll.h> +#include <signal.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/inotify.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/sysmacros.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <termios.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "copy.h" +#include "def.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "socket-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "time-util.h" +#include "util.h" + +static volatile unsigned cached_columns = 0; +static volatile unsigned cached_lines = 0; + +static volatile int cached_on_tty = -1; +static volatile int cached_colors_enabled = -1; +static volatile int cached_underline_enabled = -1; + +int chvt(int vt) { + _cleanup_close_ int fd; + + /* Switch to the specified vt number. If the VT is specified <= 0 switch to the VT the kernel log messages go, + * if that's configured. */ + + fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return -errno; + + if (vt <= 0) { + int tiocl[2] = { + TIOCL_GETKMSGREDIRECT, + 0 + }; + + if (ioctl(fd, TIOCLINUX, tiocl) < 0) + return -errno; + + vt = tiocl[0] <= 0 ? 1 : tiocl[0]; + } + + if (ioctl(fd, VT_ACTIVATE, vt) < 0) + return -errno; + + return 0; +} + +int read_one_char(FILE *f, char *ret, usec_t t, bool *need_nl) { + _cleanup_free_ char *line = NULL; + struct termios old_termios; + int r; + + assert(f); + assert(ret); + + /* If this is a terminal, then switch canonical mode off, so that we can read a single character */ + if (tcgetattr(fileno(f), &old_termios) >= 0) { + struct termios new_termios = old_termios; + + new_termios.c_lflag &= ~ICANON; + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + + if (tcsetattr(fileno(f), TCSADRAIN, &new_termios) >= 0) { + char c; + + if (t != USEC_INFINITY) { + if (fd_wait_for_event(fileno(f), POLLIN, t) <= 0) { + (void) tcsetattr(fileno(f), TCSADRAIN, &old_termios); + return -ETIMEDOUT; + } + } + + r = safe_fgetc(f, &c); + (void) tcsetattr(fileno(f), TCSADRAIN, &old_termios); + if (r < 0) + return r; + if (r == 0) + return -EIO; + + if (need_nl) + *need_nl = c != '\n'; + + *ret = c; + return 0; + } + } + + if (t != USEC_INFINITY) { + if (fd_wait_for_event(fileno(f), POLLIN, t) <= 0) + return -ETIMEDOUT; + } + + /* If this is not a terminal, then read a full line instead */ + + r = read_line(f, 16, &line); /* longer than necessary, to eat up UTF-8 chars/vt100 key sequences */ + if (r < 0) + return r; + if (r == 0) + return -EIO; + + if (strlen(line) != 1) + return -EBADMSG; + + if (need_nl) + *need_nl = false; + + *ret = line[0]; + return 0; +} + +#define DEFAULT_ASK_REFRESH_USEC (2*USEC_PER_SEC) + +int ask_char(char *ret, const char *replies, const char *fmt, ...) { + int r; + + assert(ret); + assert(replies); + assert(fmt); + + for (;;) { + va_list ap; + char c; + bool need_nl = true; + + if (colors_enabled()) + fputs(ANSI_HIGHLIGHT, stdout); + + putchar('\r'); + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + if (colors_enabled()) + fputs(ANSI_NORMAL, stdout); + + fflush(stdout); + + r = read_one_char(stdin, &c, DEFAULT_ASK_REFRESH_USEC, &need_nl); + if (r < 0) { + + if (r == -ETIMEDOUT) + continue; + + if (r == -EBADMSG) { + puts("Bad input, please try again."); + continue; + } + + putchar('\n'); + return r; + } + + if (need_nl) + putchar('\n'); + + if (strchr(replies, c)) { + *ret = c; + return 0; + } + + puts("Read unexpected character, please try again."); + } +} + +int ask_string(char **ret, const char *text, ...) { + int r; + + assert(ret); + assert(text); + + for (;;) { + _cleanup_free_ char *line = NULL; + va_list ap; + + if (colors_enabled()) + fputs(ANSI_HIGHLIGHT, stdout); + + va_start(ap, text); + vprintf(text, ap); + va_end(ap); + + if (colors_enabled()) + fputs(ANSI_NORMAL, stdout); + + fflush(stdout); + + r = read_line(stdin, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -EIO; + + if (!isempty(line)) { + *ret = TAKE_PTR(line); + return 0; + } + } +} + +int reset_terminal_fd(int fd, bool switch_to_text) { + struct termios termios; + int r = 0; + + /* Set terminal to some sane defaults */ + + assert(fd >= 0); + + /* We leave locked terminal attributes untouched, so that + * Plymouth may set whatever it wants to set, and we don't + * interfere with that. */ + + /* Disable exclusive mode, just in case */ + (void) ioctl(fd, TIOCNXCL); + + /* Switch to text mode */ + if (switch_to_text) + (void) ioctl(fd, KDSETMODE, KD_TEXT); + + /* Set default keyboard mode */ + (void) vt_reset_keyboard(fd); + + if (tcgetattr(fd, &termios) < 0) { + r = -errno; + goto finish; + } + + /* We only reset the stuff that matters to the software. How + * hardware is set up we don't touch assuming that somebody + * else will do that for us */ + + termios.c_iflag &= ~(IGNBRK | BRKINT | ISTRIP | INLCR | IGNCR | IUCLC); + termios.c_iflag |= ICRNL | IMAXBEL | IUTF8; + termios.c_oflag |= ONLCR; + termios.c_cflag |= CREAD; + termios.c_lflag = ISIG | ICANON | IEXTEN | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOPRT | ECHOKE; + + termios.c_cc[VINTR] = 03; /* ^C */ + termios.c_cc[VQUIT] = 034; /* ^\ */ + termios.c_cc[VERASE] = 0177; + termios.c_cc[VKILL] = 025; /* ^X */ + termios.c_cc[VEOF] = 04; /* ^D */ + termios.c_cc[VSTART] = 021; /* ^Q */ + termios.c_cc[VSTOP] = 023; /* ^S */ + termios.c_cc[VSUSP] = 032; /* ^Z */ + termios.c_cc[VLNEXT] = 026; /* ^V */ + termios.c_cc[VWERASE] = 027; /* ^W */ + termios.c_cc[VREPRINT] = 022; /* ^R */ + termios.c_cc[VEOL] = 0; + termios.c_cc[VEOL2] = 0; + + termios.c_cc[VTIME] = 0; + termios.c_cc[VMIN] = 1; + + if (tcsetattr(fd, TCSANOW, &termios) < 0) + r = -errno; + +finish: + /* Just in case, flush all crap out */ + (void) tcflush(fd, TCIOFLUSH); + + return r; +} + +int reset_terminal(const char *name) { + _cleanup_close_ int fd = -1; + + /* We open the terminal with O_NONBLOCK here, to ensure we + * don't block on carrier if this is a terminal with carrier + * configured. */ + + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return fd; + + return reset_terminal_fd(fd, true); +} + +int open_terminal(const char *name, int mode) { + unsigned c = 0; + int fd; + + /* + * If a TTY is in the process of being closed opening it might + * cause EIO. This is horribly awful, but unlikely to be + * changed in the kernel. Hence we work around this problem by + * retrying a couple of times. + * + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/554172/comments/245 + */ + + if (mode & O_CREAT) + return -EINVAL; + + for (;;) { + fd = open(name, mode, 0); + if (fd >= 0) + break; + + if (errno != EIO) + return -errno; + + /* Max 1s in total */ + if (c >= 20) + return -errno; + + usleep(50 * USEC_PER_MSEC); + c++; + } + + if (isatty(fd) <= 0) { + safe_close(fd); + return -ENOTTY; + } + + return fd; +} + +int acquire_terminal( + const char *name, + AcquireTerminalFlags flags, + usec_t timeout) { + + _cleanup_close_ int notify = -1, fd = -1; + usec_t ts = USEC_INFINITY; + int r, wd = -1; + + assert(name); + assert(IN_SET(flags & ~ACQUIRE_TERMINAL_PERMISSIVE, ACQUIRE_TERMINAL_TRY, ACQUIRE_TERMINAL_FORCE, ACQUIRE_TERMINAL_WAIT)); + + /* We use inotify to be notified when the tty is closed. We create the watch before checking if we can actually + * acquire it, so that we don't lose any event. + * + * Note: strictly speaking this actually watches for the device being closed, it does *not* really watch + * whether a tty loses its controlling process. However, unless some rogue process uses TIOCNOTTY on /dev/tty + * *after* closing its tty otherwise this will not become a problem. As long as the administrator makes sure to + * not configure any service on the same tty as an untrusted user this should not be a problem. (Which they + * probably should not do anyway.) */ + + if ((flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_WAIT) { + notify = inotify_init1(IN_CLOEXEC | (timeout != USEC_INFINITY ? IN_NONBLOCK : 0)); + if (notify < 0) + return -errno; + + wd = inotify_add_watch(notify, name, IN_CLOSE); + if (wd < 0) + return -errno; + + if (timeout != USEC_INFINITY) + ts = now(CLOCK_MONOTONIC); + } + + for (;;) { + struct sigaction sa_old, sa_new = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + if (notify >= 0) { + r = flush_fd(notify); + if (r < 0) + return r; + } + + /* We pass here O_NOCTTY only so that we can check the return value TIOCSCTTY and have a reliable way + * to figure out if we successfully became the controlling process of the tty */ + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return fd; + + /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed if we already own the tty. */ + assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0); + + /* First, try to get the tty */ + r = ioctl(fd, TIOCSCTTY, + (flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_FORCE) < 0 ? -errno : 0; + + /* Reset signal handler to old value */ + assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0); + + /* Success? Exit the loop now! */ + if (r >= 0) + break; + + /* Any failure besides -EPERM? Fail, regardless of the mode. */ + if (r != -EPERM) + return r; + + if (flags & ACQUIRE_TERMINAL_PERMISSIVE) /* If we are in permissive mode, then EPERM is fine, turn this + * into a success. Note that EPERM is also returned if we + * already are the owner of the TTY. */ + break; + + if (flags != ACQUIRE_TERMINAL_WAIT) /* If we are in TRY or FORCE mode, then propagate EPERM as EPERM */ + return r; + + assert(notify >= 0); + assert(wd >= 0); + + for (;;) { + union inotify_event_buffer buffer; + struct inotify_event *e; + ssize_t l; + + if (timeout != USEC_INFINITY) { + usec_t n; + + assert(ts != USEC_INFINITY); + + n = now(CLOCK_MONOTONIC); + if (ts + timeout < n) + return -ETIMEDOUT; + + r = fd_wait_for_event(notify, POLLIN, ts + timeout - n); + if (r < 0) + return r; + if (r == 0) + return -ETIMEDOUT; + } + + l = read(notify, &buffer, sizeof(buffer)); + if (l < 0) { + if (IN_SET(errno, EINTR, EAGAIN)) + continue; + + return -errno; + } + + FOREACH_INOTIFY_EVENT(e, buffer, l) { + if (e->mask & IN_Q_OVERFLOW) /* If we hit an inotify queue overflow, simply check if the terminal is up for grabs now. */ + break; + + if (e->wd != wd || !(e->mask & IN_CLOSE)) /* Safety checks */ + return -EIO; + } + + break; + } + + /* We close the tty fd here since if the old session ended our handle will be dead. It's important that + * we do this after sleeping, so that we don't enter an endless loop. */ + fd = safe_close(fd); + } + + return TAKE_FD(fd); +} + +int release_terminal(void) { + static const struct sigaction sa_new = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + _cleanup_close_ int fd = -1; + struct sigaction sa_old; + int r; + + fd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return -errno; + + /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed + * by our own TIOCNOTTY */ + assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0); + + r = ioctl(fd, TIOCNOTTY) < 0 ? -errno : 0; + + assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0); + + return r; +} + +int terminal_vhangup_fd(int fd) { + assert(fd >= 0); + + if (ioctl(fd, TIOCVHANGUP) < 0) + return -errno; + + return 0; +} + +int terminal_vhangup(const char *name) { + _cleanup_close_ int fd; + + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return fd; + + return terminal_vhangup_fd(fd); +} + +int vt_disallocate(const char *name) { + _cleanup_close_ int fd = -1; + const char *e, *n; + unsigned u; + int r; + + /* Deallocate the VT if possible. If not possible + * (i.e. because it is the active one), at least clear it + * entirely (including the scrollback buffer) */ + + e = path_startswith(name, "/dev/"); + if (!e) + return -EINVAL; + + if (!tty_is_vc(name)) { + /* So this is not a VT. I guess we cannot deallocate + * it then. But let's at least clear the screen */ + + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return fd; + + loop_write(fd, + "\033[r" /* clear scrolling region */ + "\033[H" /* move home */ + "\033[2J", /* clear screen */ + 10, false); + return 0; + } + + n = startswith(e, "tty"); + if (!n) + return -EINVAL; + + r = safe_atou(n, &u); + if (r < 0) + return r; + + if (u <= 0) + return -EINVAL; + + /* Try to deallocate */ + fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return fd; + + r = ioctl(fd, VT_DISALLOCATE, u); + fd = safe_close(fd); + + if (r >= 0) + return 0; + + if (errno != EBUSY) + return -errno; + + /* Couldn't deallocate, so let's clear it fully with + * scrollback */ + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return fd; + + loop_write(fd, + "\033[r" /* clear scrolling region */ + "\033[H" /* move home */ + "\033[3J", /* clear screen including scrollback, requires Linux 2.6.40 */ + 10, false); + return 0; +} + +int make_console_stdio(void) { + int fd, r; + + /* Make /dev/console the controlling terminal and stdin/stdout/stderr */ + + fd = acquire_terminal("/dev/console", ACQUIRE_TERMINAL_FORCE|ACQUIRE_TERMINAL_PERMISSIVE, USEC_INFINITY); + if (fd < 0) + return log_error_errno(fd, "Failed to acquire terminal: %m"); + + r = reset_terminal_fd(fd, true); + if (r < 0) + log_warning_errno(r, "Failed to reset terminal, ignoring: %m"); + + r = rearrange_stdio(fd, fd, fd); /* This invalidates 'fd' both on success and on failure. */ + if (r < 0) + return log_error_errno(r, "Failed to make terminal stdin/stdout/stderr: %m"); + + reset_terminal_feature_caches(); + + return 0; +} + +bool tty_is_vc(const char *tty) { + assert(tty); + + return vtnr_from_tty(tty) >= 0; +} + +bool tty_is_console(const char *tty) { + assert(tty); + + return streq(skip_dev_prefix(tty), "console"); +} + +int vtnr_from_tty(const char *tty) { + int i, r; + + assert(tty); + + tty = skip_dev_prefix(tty); + + if (!startswith(tty, "tty") ) + return -EINVAL; + + if (tty[3] < '0' || tty[3] > '9') + return -EINVAL; + + r = safe_atoi(tty+3, &i); + if (r < 0) + return r; + + if (i < 0 || i > 63) + return -EINVAL; + + return i; +} + + int resolve_dev_console(char **ret) { + _cleanup_free_ char *active = NULL; + char *tty; + int r; + + assert(ret); + + /* Resolve where /dev/console is pointing to, if /sys is actually ours (i.e. not read-only-mounted which is a + * sign for container setups) */ + + if (path_is_read_only_fs("/sys") > 0) + return -ENOMEDIUM; + + r = read_one_line_file("/sys/class/tty/console/active", &active); + if (r < 0) + return r; + + /* If multiple log outputs are configured the last one is what /dev/console points to */ + tty = strrchr(active, ' '); + if (tty) + tty++; + else + tty = active; + + if (streq(tty, "tty0")) { + active = mfree(active); + + /* Get the active VC (e.g. tty1) */ + r = read_one_line_file("/sys/class/tty/tty0/active", &active); + if (r < 0) + return r; + + tty = active; + } + + if (tty == active) + *ret = TAKE_PTR(active); + else { + char *tmp; + + tmp = strdup(tty); + if (!tmp) + return -ENOMEM; + + *ret = tmp; + } + + return 0; +} + +int get_kernel_consoles(char ***ret) { + _cleanup_strv_free_ char **l = NULL; + _cleanup_free_ char *line = NULL; + const char *p; + int r; + + assert(ret); + + /* If /sys is mounted read-only this means we are running in some kind of container environment. In that + * case /sys would reflect the host system, not us, hence ignore the data we can read from it. */ + if (path_is_read_only_fs("/sys") > 0) + goto fallback; + + r = read_one_line_file("/sys/class/tty/console/active", &line); + if (r < 0) + return r; + + p = line; + for (;;) { + _cleanup_free_ char *tty = NULL; + char *path; + + r = extract_first_word(&p, &tty, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + if (streq(tty, "tty0")) { + tty = mfree(tty); + r = read_one_line_file("/sys/class/tty/tty0/active", &tty); + if (r < 0) + return r; + } + + path = strappend("/dev/", tty); + if (!path) + return -ENOMEM; + + if (access(path, F_OK) < 0) { + log_debug_errno(errno, "Console device %s is not accessible, skipping: %m", path); + free(path); + continue; + } + + r = strv_consume(&l, path); + if (r < 0) + return r; + } + + if (strv_isempty(l)) { + log_debug("No devices found for system console"); + goto fallback; + } + + *ret = TAKE_PTR(l); + + return 0; + +fallback: + r = strv_extend(&l, "/dev/console"); + if (r < 0) + return r; + + *ret = TAKE_PTR(l); + + return 0; +} + +bool tty_is_vc_resolve(const char *tty) { + _cleanup_free_ char *resolved = NULL; + + assert(tty); + + tty = skip_dev_prefix(tty); + + if (streq(tty, "console")) { + if (resolve_dev_console(&resolved) < 0) + return false; + + tty = resolved; + } + + return tty_is_vc(tty); +} + +const char *default_term_for_tty(const char *tty) { + return tty && tty_is_vc_resolve(tty) ? "linux" : "vt220"; +} + +int fd_columns(int fd) { + struct winsize ws = {}; + + if (ioctl(fd, TIOCGWINSZ, &ws) < 0) + return -errno; + + if (ws.ws_col <= 0) + return -EIO; + + return ws.ws_col; +} + +unsigned columns(void) { + const char *e; + int c; + + if (cached_columns > 0) + return cached_columns; + + c = 0; + e = getenv("COLUMNS"); + if (e) + (void) safe_atoi(e, &c); + + if (c <= 0 || c > USHRT_MAX) { + c = fd_columns(STDOUT_FILENO); + if (c <= 0) + c = 80; + } + + cached_columns = c; + return cached_columns; +} + +int fd_lines(int fd) { + struct winsize ws = {}; + + if (ioctl(fd, TIOCGWINSZ, &ws) < 0) + return -errno; + + if (ws.ws_row <= 0) + return -EIO; + + return ws.ws_row; +} + +unsigned lines(void) { + const char *e; + int l; + + if (cached_lines > 0) + return cached_lines; + + l = 0; + e = getenv("LINES"); + if (e) + (void) safe_atoi(e, &l); + + if (l <= 0 || l > USHRT_MAX) { + l = fd_lines(STDOUT_FILENO); + if (l <= 0) + l = 24; + } + + cached_lines = l; + return cached_lines; +} + +/* intended to be used as a SIGWINCH sighandler */ +void columns_lines_cache_reset(int signum) { + cached_columns = 0; + cached_lines = 0; +} + +void reset_terminal_feature_caches(void) { + cached_columns = 0; + cached_lines = 0; + + cached_colors_enabled = -1; + cached_underline_enabled = -1; + cached_on_tty = -1; +} + +bool on_tty(void) { + + /* We check both stdout and stderr, so that situations where pipes on the shell are used are reliably + * recognized, regardless if only the output or the errors are piped to some place. Since on_tty() is generally + * used to default to a safer, non-interactive, non-color mode of operation it's probably good to be defensive + * here, and check for both. Note that we don't check for STDIN_FILENO, because it should fine to use fancy + * terminal functionality when outputting stuff, even if the input is piped to us. */ + + if (cached_on_tty < 0) + cached_on_tty = + isatty(STDOUT_FILENO) > 0 && + isatty(STDERR_FILENO) > 0; + + return cached_on_tty; +} + +int getttyname_malloc(int fd, char **ret) { + size_t l = 100; + int r; + + assert(fd >= 0); + assert(ret); + + for (;;) { + char path[l]; + + r = ttyname_r(fd, path, sizeof(path)); + if (r == 0) { + char *c; + + c = strdup(skip_dev_prefix(path)); + if (!c) + return -ENOMEM; + + *ret = c; + return 0; + } + + if (r != ERANGE) + return -r; + + l *= 2; + } + + return 0; +} + +int getttyname_harder(int fd, char **r) { + int k; + char *s = NULL; + + k = getttyname_malloc(fd, &s); + if (k < 0) + return k; + + if (streq(s, "tty")) { + free(s); + return get_ctty(0, NULL, r); + } + + *r = s; + return 0; +} + +int get_ctty_devnr(pid_t pid, dev_t *d) { + int r; + _cleanup_free_ char *line = NULL; + const char *p; + unsigned long ttynr; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r < 0) + return r; + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + if (sscanf(p, " " + "%*c " /* state */ + "%*d " /* ppid */ + "%*d " /* pgrp */ + "%*d " /* session */ + "%lu ", /* ttynr */ + &ttynr) != 1) + return -EIO; + + if (major(ttynr) == 0 && minor(ttynr) == 0) + return -ENXIO; + + if (d) + *d = (dev_t) ttynr; + + return 0; +} + +int get_ctty(pid_t pid, dev_t *ret_devnr, char **ret) { + _cleanup_free_ char *fn = NULL, *b = NULL; + dev_t devnr; + int r; + + r = get_ctty_devnr(pid, &devnr); + if (r < 0) + return r; + + r = device_path_make_canonical(S_IFCHR, devnr, &fn); + if (r < 0) { + if (r != -ENOENT) /* No symlink for this in /dev/char/? */ + return r; + + if (major(devnr) == 136) { + /* This is an ugly hack: PTY devices are not listed in /dev/char/, as they don't follow the + * Linux device model. This means we have no nice way to match them up against their actual + * device node. Let's hence do the check by the fixed, assigned major number. Normally we try + * to avoid such fixed major/minor matches, but there appears to nother nice way to handle + * this. */ + + if (asprintf(&b, "pts/%u", minor(devnr)) < 0) + return -ENOMEM; + } else { + /* Probably something similar to the ptys which have no symlink in /dev/char/. Let's return + * something vaguely useful. */ + + r = device_path_make_major_minor(S_IFCHR, devnr, &fn); + if (r < 0) + return r; + } + } + + if (!b) { + const char *w; + + w = path_startswith(fn, "/dev/"); + if (w) { + b = strdup(w); + if (!b) + return -ENOMEM; + } else + b = TAKE_PTR(fn); + } + + if (ret) + *ret = TAKE_PTR(b); + + if (ret_devnr) + *ret_devnr = devnr; + + return 0; +} + +int ptsname_malloc(int fd, char **ret) { + size_t l = 100; + + assert(fd >= 0); + assert(ret); + + for (;;) { + char *c; + + c = new(char, l); + if (!c) + return -ENOMEM; + + if (ptsname_r(fd, c, l) == 0) { + *ret = c; + return 0; + } + if (errno != ERANGE) { + free(c); + return -errno; + } + + free(c); + l *= 2; + } +} + +int ptsname_namespace(int pty, char **ret) { + int no = -1, r; + + /* Like ptsname(), but doesn't assume that the path is + * accessible in the local namespace. */ + + r = ioctl(pty, TIOCGPTN, &no); + if (r < 0) + return -errno; + + if (no < 0) + return -EIO; + + if (asprintf(ret, "/dev/pts/%i", no) < 0) + return -ENOMEM; + + return 0; +} + +int openpt_in_namespace(pid_t pid, int flags) { + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1; + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + pid_t child; + int r; + + assert(pid > 0); + + r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd); + if (r < 0) + return r; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) + return -errno; + + r = namespace_fork("(sd-openptns)", "(sd-openpt)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child); + if (r < 0) + return r; + if (r == 0) { + int master; + + pair[0] = safe_close(pair[0]); + + master = posix_openpt(flags|O_NOCTTY|O_CLOEXEC); + if (master < 0) + _exit(EXIT_FAILURE); + + if (unlockpt(master) < 0) + _exit(EXIT_FAILURE); + + if (send_one_fd(pair[1], master, 0) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-openptns)", child, 0); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EIO; + + return receive_one_fd(pair[0], 0); +} + +int open_terminal_in_namespace(pid_t pid, const char *name, int mode) { + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1; + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + pid_t child; + int r; + + r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd); + if (r < 0) + return r; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) + return -errno; + + r = namespace_fork("(sd-terminalns)", "(sd-terminal)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child); + if (r < 0) + return r; + if (r == 0) { + int master; + + pair[0] = safe_close(pair[0]); + + master = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC); + if (master < 0) + _exit(EXIT_FAILURE); + + if (send_one_fd(pair[1], master, 0) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-terminalns)", child, 0); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EIO; + + return receive_one_fd(pair[0], 0); +} + +static bool getenv_terminal_is_dumb(void) { + const char *e; + + e = getenv("TERM"); + if (!e) + return true; + + return streq(e, "dumb"); +} + +bool terminal_is_dumb(void) { + if (!on_tty()) + return true; + + return getenv_terminal_is_dumb(); +} + +bool colors_enabled(void) { + + /* Returns true if colors are considered supported on our stdout. For that we check $SYSTEMD_COLORS first + * (which is the explicit way to turn colors on/off). If that didn't work we turn colors off unless we are on a + * TTY. And if we are on a TTY we turn it off if $TERM is set to "dumb". There's one special tweak though: if + * we are PID 1 then we do not check whether we are connected to a TTY, because we don't keep /dev/console open + * continously due to fear of SAK, and hence things are a bit weird. */ + + if (cached_colors_enabled < 0) { + int val; + + val = getenv_bool("SYSTEMD_COLORS"); + if (val >= 0) + cached_colors_enabled = val; + else if (getpid_cached() == 1) + /* PID1 outputs to the console without holding it open all the time */ + cached_colors_enabled = !getenv_terminal_is_dumb(); + else + cached_colors_enabled = !terminal_is_dumb(); + } + + return cached_colors_enabled; +} + +bool dev_console_colors_enabled(void) { + _cleanup_free_ char *s = NULL; + int b; + + /* Returns true if we assume that color is supported on /dev/console. + * + * For that we first check if we explicitly got told to use colors or not, by checking $SYSTEMD_COLORS. If that + * isn't set we check whether PID 1 has $TERM set, and if not, whether TERM is set on the kernel command + * line. If we find $TERM set we assume color if it's not set to "dumb", similarly to how regular + * colors_enabled() operates. */ + + b = getenv_bool("SYSTEMD_COLORS"); + if (b >= 0) + return b; + + if (getenv_for_pid(1, "TERM", &s) <= 0) + (void) proc_cmdline_get_key("TERM", 0, &s); + + return !streq_ptr(s, "dumb"); +} + +bool underline_enabled(void) { + + if (cached_underline_enabled < 0) { + + /* The Linux console doesn't support underlining, turn it off, but only there. */ + + if (colors_enabled()) + cached_underline_enabled = !streq_ptr(getenv("TERM"), "linux"); + else + cached_underline_enabled = false; + } + + return cached_underline_enabled; +} + +int vt_default_utf8(void) { + _cleanup_free_ char *b = NULL; + int r; + + /* Read the default VT UTF8 setting from the kernel */ + + r = read_one_line_file("/sys/module/vt/parameters/default_utf8", &b); + if (r < 0) + return r; + + return parse_boolean(b); +} + +int vt_reset_keyboard(int fd) { + int kb; + + /* If we can't read the default, then default to unicode. It's 2017 after all. */ + kb = vt_default_utf8() != 0 ? K_UNICODE : K_XLATE; + + if (ioctl(fd, KDSKBMODE, kb) < 0) + return -errno; + + return 0; +} + +int vt_restore(int fd) { + static const struct vt_mode mode = { + .mode = VT_AUTO, + }; + int r, q = 0; + + r = ioctl(fd, KDSETMODE, KD_TEXT); + if (r < 0) + q = log_debug_errno(errno, "Failed to set VT in text mode, ignoring: %m"); + + r = vt_reset_keyboard(fd); + if (r < 0) { + log_debug_errno(r, "Failed to reset keyboard mode, ignoring: %m"); + if (q >= 0) + q = r; + } + + r = ioctl(fd, VT_SETMODE, &mode); + if (r < 0) { + log_debug_errno(errno, "Failed to set VT_AUTO mode, ignoring: %m"); + if (q >= 0) + q = -errno; + } + + r = fchown(fd, 0, (gid_t) -1); + if (r < 0) { + log_debug_errno(errno, "Failed to chown VT, ignoring: %m"); + if (q >= 0) + q = -errno; + } + + return q; +} + +int vt_release(int fd, bool restore) { + assert(fd >= 0); + + /* This function releases the VT by acknowledging the VT-switch signal + * sent by the kernel and optionally reset the VT in text and auto + * VT-switching modes. */ + + if (ioctl(fd, VT_RELDISP, 1) < 0) + return -errno; + + if (restore) + return vt_restore(fd); + + return 0; +} diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h new file mode 100644 index 0000000..c885e0a --- /dev/null +++ b/src/basic/terminal-util.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/types.h> + +#include "macro.h" +#include "time-util.h" + +/* Regular colors */ +#define ANSI_BLACK "\x1B[0;30m" +#define ANSI_RED "\x1B[0;31m" +#define ANSI_GREEN "\x1B[0;32m" +#define ANSI_YELLOW "\x1B[0;33m" +#define ANSI_BLUE "\x1B[0;34m" +#define ANSI_MAGENTA "\x1B[0;35m" +#define ANSI_CYAN "\x1B[0;36m" +#define ANSI_WHITE "\x1B[0;37m" +#define ANSI_GREY "\x1B[0;2;37m" + +/* Bold/highlighted */ +#define ANSI_HIGHLIGHT_BLACK "\x1B[0;1;30m" +#define ANSI_HIGHLIGHT_RED "\x1B[0;1;31m" +#define ANSI_HIGHLIGHT_GREEN "\x1B[0;1;32m" +#define ANSI_HIGHLIGHT_YELLOW "\x1B[0;1;33m" +#define ANSI_HIGHLIGHT_BLUE "\x1B[0;1;34m" +#define ANSI_HIGHLIGHT_MAGENTA "\x1B[0;1;35m" +#define ANSI_HIGHLIGHT_CYAN "\x1B[0;1;36m" +#define ANSI_HIGHLIGHT_WHITE "\x1B[0;1;37m" + +/* Underlined */ +#define ANSI_HIGHLIGHT_BLACK_UNDERLINE "\x1B[0;1;4;30m" +#define ANSI_HIGHLIGHT_RED_UNDERLINE "\x1B[0;1;4;31m" +#define ANSI_HIGHLIGHT_GREEN_UNDERLINE "\x1B[0;1;4;32m" +#define ANSI_HIGHLIGHT_YELLOW_UNDERLINE "\x1B[0;1;4;33m" +#define ANSI_HIGHLIGHT_BLUE_UNDERLINE "\x1B[0;1;4;34m" +#define ANSI_HIGHLIGHT_MAGENTA_UNDERLINE "\x1B[0;1;4;35m" +#define ANSI_HIGHLIGHT_CYAN_UNDERLINE "\x1B[0;1;4;36m" +#define ANSI_HIGHLIGHT_WHITE_UNDERLINE "\x1B[0;1;4;37m" + +/* Other ANSI codes */ +#define ANSI_UNDERLINE "\x1B[0;4m" +#define ANSI_HIGHLIGHT "\x1B[0;1;39m" +#define ANSI_HIGHLIGHT_UNDERLINE "\x1B[0;1;4m" + +/* Reset/clear ANSI styles */ +#define ANSI_NORMAL "\x1B[0m" + +/* Erase characters until the end of the line */ +#define ANSI_ERASE_TO_END_OF_LINE "\x1B[K" + +/* Move cursor up one line */ +#define ANSI_REVERSE_LINEFEED "\x1BM" + +/* Set cursor to top left corner and clear screen */ +#define ANSI_HOME_CLEAR "\x1B[H\x1B[2J" + +int reset_terminal_fd(int fd, bool switch_to_text); +int reset_terminal(const char *name); + +int open_terminal(const char *name, int mode); + +/* Flags for tweaking the way we become the controlling process of a terminal. */ +typedef enum AcquireTerminalFlags { + /* Try to become the controlling process of the TTY. If we can't return -EPERM. */ + ACQUIRE_TERMINAL_TRY = 0, + + /* Tell the kernel to forcibly make us the controlling process of the TTY. Returns -EPERM if the kernel doesn't allow that. */ + ACQUIRE_TERMINAL_FORCE = 1, + + /* If we can't become the controlling process of the TTY right-away, then wait until we can. */ + ACQUIRE_TERMINAL_WAIT = 2, + + /* Pick one of the above, and then OR this flag in, in order to request permissive behaviour, if we can't become controlling process then don't mind */ + ACQUIRE_TERMINAL_PERMISSIVE = 1 << 2, +} AcquireTerminalFlags; + +int acquire_terminal(const char *name, AcquireTerminalFlags flags, usec_t timeout); +int release_terminal(void); + +int terminal_vhangup_fd(int fd); +int terminal_vhangup(const char *name); + +int chvt(int vt); + +int read_one_char(FILE *f, char *ret, usec_t timeout, bool *need_nl); +int ask_char(char *ret, const char *replies, const char *text, ...) _printf_(3, 4); +int ask_string(char **ret, const char *text, ...) _printf_(2, 3); + +int vt_disallocate(const char *name); + +int resolve_dev_console(char **ret); +int get_kernel_consoles(char ***ret); +bool tty_is_vc(const char *tty); +bool tty_is_vc_resolve(const char *tty); +bool tty_is_console(const char *tty) _pure_; +int vtnr_from_tty(const char *tty); +const char *default_term_for_tty(const char *tty); + +int make_console_stdio(void); + +int fd_columns(int fd); +unsigned columns(void); +int fd_lines(int fd); +unsigned lines(void); + +void columns_lines_cache_reset(int _unused_ signum); +void reset_terminal_feature_caches(void); + +bool on_tty(void); +bool terminal_is_dumb(void); +bool colors_enabled(void); +bool underline_enabled(void); +bool dev_console_colors_enabled(void); + +#define DEFINE_ANSI_FUNC(name, NAME) \ + static inline const char *ansi_##name(void) { \ + return colors_enabled() ? ANSI_##NAME : ""; \ + } + +#define DEFINE_ANSI_FUNC_UNDERLINE(name, NAME, REPLACEMENT) \ + static inline const char *ansi_##name(void) { \ + return underline_enabled() ? ANSI_##NAME : \ + colors_enabled() ? ANSI_##REPLACEMENT : ""; \ + } + +DEFINE_ANSI_FUNC(highlight, HIGHLIGHT); +DEFINE_ANSI_FUNC(highlight_red, HIGHLIGHT_RED); +DEFINE_ANSI_FUNC(highlight_green, HIGHLIGHT_GREEN); +DEFINE_ANSI_FUNC(highlight_yellow, HIGHLIGHT_YELLOW); +DEFINE_ANSI_FUNC(highlight_blue, HIGHLIGHT_BLUE); +DEFINE_ANSI_FUNC(highlight_magenta, HIGHLIGHT_MAGENTA); +DEFINE_ANSI_FUNC(normal, NORMAL); +DEFINE_ANSI_FUNC(grey, GREY); + +DEFINE_ANSI_FUNC_UNDERLINE(underline, UNDERLINE, NORMAL); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_underline, HIGHLIGHT_UNDERLINE, HIGHLIGHT); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_red_underline, HIGHLIGHT_RED_UNDERLINE, HIGHLIGHT_RED); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_green_underline, HIGHLIGHT_GREEN_UNDERLINE, HIGHLIGHT_GREEN); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_yellow_underline, HIGHLIGHT_YELLOW_UNDERLINE, HIGHLIGHT_YELLOW); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_blue_underline, HIGHLIGHT_BLUE_UNDERLINE, HIGHLIGHT_BLUE); + +int get_ctty_devnr(pid_t pid, dev_t *d); +int get_ctty(pid_t, dev_t *_devnr, char **r); + +int getttyname_malloc(int fd, char **r); +int getttyname_harder(int fd, char **r); + +int ptsname_malloc(int fd, char **ret); +int ptsname_namespace(int pty, char **ret); + +int openpt_in_namespace(pid_t pid, int flags); +int open_terminal_in_namespace(pid_t pid, const char *name, int mode); + +int vt_default_utf8(void); +int vt_reset_keyboard(int fd); +int vt_restore(int fd); +int vt_release(int fd, bool restore_vt); diff --git a/src/basic/time-util.c b/src/basic/time-util.c new file mode 100644 index 0000000..62cdc30 --- /dev/null +++ b/src/basic/time-util.c @@ -0,0 +1,1471 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/timerfd.h> +#include <sys/timex.h> +#include <sys/types.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "missing_timerfd.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +static clockid_t map_clock_id(clockid_t c) { + + /* Some more exotic archs (s390, ppc, …) lack the "ALARM" flavour of the clocks. Thus, clock_gettime() will + * fail for them. Since they are essentially the same as their non-ALARM pendants (their only difference is + * when timers are set on them), let's just map them accordingly. This way, we can get the correct time even on + * those archs. */ + + switch (c) { + + case CLOCK_BOOTTIME_ALARM: + return CLOCK_BOOTTIME; + + case CLOCK_REALTIME_ALARM: + return CLOCK_REALTIME; + + default: + return c; + } +} + +usec_t now(clockid_t clock_id) { + struct timespec ts; + + assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0); + + return timespec_load(&ts); +} + +nsec_t now_nsec(clockid_t clock_id) { + struct timespec ts; + + assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0); + + return timespec_load_nsec(&ts); +} + +dual_timestamp* dual_timestamp_get(dual_timestamp *ts) { + assert(ts); + + ts->realtime = now(CLOCK_REALTIME); + ts->monotonic = now(CLOCK_MONOTONIC); + + return ts; +} + +triple_timestamp* triple_timestamp_get(triple_timestamp *ts) { + assert(ts); + + ts->realtime = now(CLOCK_REALTIME); + ts->monotonic = now(CLOCK_MONOTONIC); + ts->boottime = clock_boottime_supported() ? now(CLOCK_BOOTTIME) : USEC_INFINITY; + + return ts; +} + +dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u) { + int64_t delta; + assert(ts); + + if (u == USEC_INFINITY || u <= 0) { + ts->realtime = ts->monotonic = u; + return ts; + } + + ts->realtime = u; + + delta = (int64_t) now(CLOCK_REALTIME) - (int64_t) u; + ts->monotonic = usec_sub_signed(now(CLOCK_MONOTONIC), delta); + + return ts; +} + +triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u) { + int64_t delta; + + assert(ts); + + if (u == USEC_INFINITY || u <= 0) { + ts->realtime = ts->monotonic = ts->boottime = u; + return ts; + } + + ts->realtime = u; + delta = (int64_t) now(CLOCK_REALTIME) - (int64_t) u; + ts->monotonic = usec_sub_signed(now(CLOCK_MONOTONIC), delta); + ts->boottime = clock_boottime_supported() ? usec_sub_signed(now(CLOCK_BOOTTIME), delta) : USEC_INFINITY; + + return ts; +} + +dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u) { + int64_t delta; + assert(ts); + + if (u == USEC_INFINITY) { + ts->realtime = ts->monotonic = USEC_INFINITY; + return ts; + } + + ts->monotonic = u; + delta = (int64_t) now(CLOCK_MONOTONIC) - (int64_t) u; + ts->realtime = usec_sub_signed(now(CLOCK_REALTIME), delta); + + return ts; +} + +dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u) { + int64_t delta; + + if (u == USEC_INFINITY) { + ts->realtime = ts->monotonic = USEC_INFINITY; + return ts; + } + + dual_timestamp_get(ts); + delta = (int64_t) now(clock_boottime_or_monotonic()) - (int64_t) u; + ts->realtime = usec_sub_signed(ts->realtime, delta); + ts->monotonic = usec_sub_signed(ts->monotonic, delta); + + return ts; +} + +usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock) { + + switch (clock) { + + case CLOCK_REALTIME: + case CLOCK_REALTIME_ALARM: + return ts->realtime; + + case CLOCK_MONOTONIC: + return ts->monotonic; + + case CLOCK_BOOTTIME: + case CLOCK_BOOTTIME_ALARM: + return ts->boottime; + + default: + return USEC_INFINITY; + } +} + +usec_t timespec_load(const struct timespec *ts) { + assert(ts); + + if (ts->tv_sec < 0 || ts->tv_nsec < 0) + return USEC_INFINITY; + + if ((usec_t) ts->tv_sec > (UINT64_MAX - (ts->tv_nsec / NSEC_PER_USEC)) / USEC_PER_SEC) + return USEC_INFINITY; + + return + (usec_t) ts->tv_sec * USEC_PER_SEC + + (usec_t) ts->tv_nsec / NSEC_PER_USEC; +} + +nsec_t timespec_load_nsec(const struct timespec *ts) { + assert(ts); + + if (ts->tv_sec < 0 || ts->tv_nsec < 0) + return NSEC_INFINITY; + + if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC) + return NSEC_INFINITY; + + return (nsec_t) ts->tv_sec * NSEC_PER_SEC + (nsec_t) ts->tv_nsec; +} + +struct timespec *timespec_store(struct timespec *ts, usec_t u) { + assert(ts); + + if (u == USEC_INFINITY || + u / USEC_PER_SEC >= TIME_T_MAX) { + ts->tv_sec = (time_t) -1; + ts->tv_nsec = (long) -1; + return ts; + } + + ts->tv_sec = (time_t) (u / USEC_PER_SEC); + ts->tv_nsec = (long int) ((u % USEC_PER_SEC) * NSEC_PER_USEC); + + return ts; +} + +usec_t timeval_load(const struct timeval *tv) { + assert(tv); + + if (tv->tv_sec < 0 || tv->tv_usec < 0) + return USEC_INFINITY; + + if ((usec_t) tv->tv_sec > (UINT64_MAX - tv->tv_usec) / USEC_PER_SEC) + return USEC_INFINITY; + + return + (usec_t) tv->tv_sec * USEC_PER_SEC + + (usec_t) tv->tv_usec; +} + +struct timeval *timeval_store(struct timeval *tv, usec_t u) { + assert(tv); + + if (u == USEC_INFINITY || + u / USEC_PER_SEC > TIME_T_MAX) { + tv->tv_sec = (time_t) -1; + tv->tv_usec = (suseconds_t) -1; + } else { + tv->tv_sec = (time_t) (u / USEC_PER_SEC); + tv->tv_usec = (suseconds_t) (u % USEC_PER_SEC); + } + + return tv; +} + +static char *format_timestamp_internal( + char *buf, + size_t l, + usec_t t, + bool utc, + bool us) { + + /* The weekdays in non-localized (English) form. We use this instead of the localized form, so that our + * generated timestamps may be parsed with parse_timestamp(), and always read the same. */ + static const char * const weekdays[] = { + [0] = "Sun", + [1] = "Mon", + [2] = "Tue", + [3] = "Wed", + [4] = "Thu", + [5] = "Fri", + [6] = "Sat", + }; + + struct tm tm; + time_t sec; + size_t n; + + assert(buf); + + if (l < + 3 + /* week day */ + 1 + 10 + /* space and date */ + 1 + 8 + /* space and time */ + (us ? 1 + 6 : 0) + /* "." and microsecond part */ + 1 + 1 + /* space and shortest possible zone */ + 1) + return NULL; /* Not enough space even for the shortest form. */ + if (t <= 0 || t == USEC_INFINITY) + return NULL; /* Timestamp is unset */ + + /* Let's not format times with years > 9999 */ + if (t > USEC_TIMESTAMP_FORMATTABLE_MAX) { + assert(l >= STRLEN("--- XXXX-XX-XX XX:XX:XX") + 1); + strcpy(buf, "--- XXXX-XX-XX XX:XX:XX"); + return buf; + } + + sec = (time_t) (t / USEC_PER_SEC); /* Round down */ + + if (!localtime_or_gmtime_r(&sec, &tm, utc)) + return NULL; + + /* Start with the week day */ + assert((size_t) tm.tm_wday < ELEMENTSOF(weekdays)); + memcpy(buf, weekdays[tm.tm_wday], 4); + + /* Add the main components */ + if (strftime(buf + 3, l - 3, " %Y-%m-%d %H:%M:%S", &tm) <= 0) + return NULL; /* Doesn't fit */ + + /* Append the microseconds part, if that's requested */ + if (us) { + n = strlen(buf); + if (n + 8 > l) + return NULL; /* Microseconds part doesn't fit. */ + + sprintf(buf + n, ".%06"PRI_USEC, t % USEC_PER_SEC); + } + + /* Append the timezone */ + n = strlen(buf); + if (utc) { + /* If this is UTC then let's explicitly use the "UTC" string here, because gmtime_r() normally uses the + * obsolete "GMT" instead. */ + if (n + 5 > l) + return NULL; /* "UTC" doesn't fit. */ + + strcpy(buf + n, " UTC"); + + } else if (!isempty(tm.tm_zone)) { + size_t tn; + + /* An explicit timezone is specified, let's use it, if it fits */ + tn = strlen(tm.tm_zone); + if (n + 1 + tn + 1 > l) { + /* The full time zone does not fit in. Yuck. */ + + if (n + 1 + _POSIX_TZNAME_MAX + 1 > l) + return NULL; /* Not even enough space for the POSIX minimum (of 6)? In that case, complain that it doesn't fit */ + + /* So the time zone doesn't fit in fully, but the caller passed enough space for the POSIX + * minimum time zone length. In this case suppress the timezone entirely, in order not to dump + * an overly long, hard to read string on the user. This should be safe, because the user will + * assume the local timezone anyway if none is shown. And so does parse_timestamp(). */ + } else { + buf[n++] = ' '; + strcpy(buf + n, tm.tm_zone); + } + } + + return buf; +} + +char *format_timestamp(char *buf, size_t l, usec_t t) { + return format_timestamp_internal(buf, l, t, false, false); +} + +char *format_timestamp_utc(char *buf, size_t l, usec_t t) { + return format_timestamp_internal(buf, l, t, true, false); +} + +char *format_timestamp_us(char *buf, size_t l, usec_t t) { + return format_timestamp_internal(buf, l, t, false, true); +} + +char *format_timestamp_us_utc(char *buf, size_t l, usec_t t) { + return format_timestamp_internal(buf, l, t, true, true); +} + +char *format_timestamp_relative(char *buf, size_t l, usec_t t) { + const char *s; + usec_t n, d; + + if (t <= 0 || t == USEC_INFINITY) + return NULL; + + n = now(CLOCK_REALTIME); + if (n > t) { + d = n - t; + s = "ago"; + } else { + d = t - n; + s = "left"; + } + + if (d >= USEC_PER_YEAR) + snprintf(buf, l, USEC_FMT " years " USEC_FMT " months %s", + d / USEC_PER_YEAR, + (d % USEC_PER_YEAR) / USEC_PER_MONTH, s); + else if (d >= USEC_PER_MONTH) + snprintf(buf, l, USEC_FMT " months " USEC_FMT " days %s", + d / USEC_PER_MONTH, + (d % USEC_PER_MONTH) / USEC_PER_DAY, s); + else if (d >= USEC_PER_WEEK) + snprintf(buf, l, USEC_FMT " weeks " USEC_FMT " days %s", + d / USEC_PER_WEEK, + (d % USEC_PER_WEEK) / USEC_PER_DAY, s); + else if (d >= 2*USEC_PER_DAY) + snprintf(buf, l, USEC_FMT " days %s", d / USEC_PER_DAY, s); + else if (d >= 25*USEC_PER_HOUR) + snprintf(buf, l, "1 day " USEC_FMT "h %s", + (d - USEC_PER_DAY) / USEC_PER_HOUR, s); + else if (d >= 6*USEC_PER_HOUR) + snprintf(buf, l, USEC_FMT "h %s", + d / USEC_PER_HOUR, s); + else if (d >= USEC_PER_HOUR) + snprintf(buf, l, USEC_FMT "h " USEC_FMT "min %s", + d / USEC_PER_HOUR, + (d % USEC_PER_HOUR) / USEC_PER_MINUTE, s); + else if (d >= 5*USEC_PER_MINUTE) + snprintf(buf, l, USEC_FMT "min %s", + d / USEC_PER_MINUTE, s); + else if (d >= USEC_PER_MINUTE) + snprintf(buf, l, USEC_FMT "min " USEC_FMT "s %s", + d / USEC_PER_MINUTE, + (d % USEC_PER_MINUTE) / USEC_PER_SEC, s); + else if (d >= USEC_PER_SEC) + snprintf(buf, l, USEC_FMT "s %s", + d / USEC_PER_SEC, s); + else if (d >= USEC_PER_MSEC) + snprintf(buf, l, USEC_FMT "ms %s", + d / USEC_PER_MSEC, s); + else if (d > 0) + snprintf(buf, l, USEC_FMT"us %s", + d, s); + else + snprintf(buf, l, "now"); + + buf[l-1] = 0; + return buf; +} + +char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) { + static const struct { + const char *suffix; + usec_t usec; + } table[] = { + { "y", USEC_PER_YEAR }, + { "month", USEC_PER_MONTH }, + { "w", USEC_PER_WEEK }, + { "d", USEC_PER_DAY }, + { "h", USEC_PER_HOUR }, + { "min", USEC_PER_MINUTE }, + { "s", USEC_PER_SEC }, + { "ms", USEC_PER_MSEC }, + { "us", 1 }, + }; + + size_t i; + char *p = buf; + bool something = false; + + assert(buf); + assert(l > 0); + + if (t == USEC_INFINITY) { + strncpy(p, "infinity", l-1); + p[l-1] = 0; + return p; + } + + if (t <= 0) { + strncpy(p, "0", l-1); + p[l-1] = 0; + return p; + } + + /* The result of this function can be parsed with parse_sec */ + + for (i = 0; i < ELEMENTSOF(table); i++) { + int k = 0; + size_t n; + bool done = false; + usec_t a, b; + + if (t <= 0) + break; + + if (t < accuracy && something) + break; + + if (t < table[i].usec) + continue; + + if (l <= 1) + break; + + a = t / table[i].usec; + b = t % table[i].usec; + + /* Let's see if we should shows this in dot notation */ + if (t < USEC_PER_MINUTE && b > 0) { + usec_t cc; + signed char j; + + j = 0; + for (cc = table[i].usec; cc > 1; cc /= 10) + j++; + + for (cc = accuracy; cc > 1; cc /= 10) { + b /= 10; + j--; + } + + if (j > 0) { + k = snprintf(p, l, + "%s"USEC_FMT".%0*"PRI_USEC"%s", + p > buf ? " " : "", + a, + j, + b, + table[i].suffix); + + t = 0; + done = true; + } + } + + /* No? Then let's show it normally */ + if (!done) { + k = snprintf(p, l, + "%s"USEC_FMT"%s", + p > buf ? " " : "", + a, + table[i].suffix); + + t = b; + } + + n = MIN((size_t) k, l); + + l -= n; + p += n; + + something = true; + } + + *p = 0; + + return buf; +} + +static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { + static const struct { + const char *name; + const int nr; + } day_nr[] = { + { "Sunday", 0 }, + { "Sun", 0 }, + { "Monday", 1 }, + { "Mon", 1 }, + { "Tuesday", 2 }, + { "Tue", 2 }, + { "Wednesday", 3 }, + { "Wed", 3 }, + { "Thursday", 4 }, + { "Thu", 4 }, + { "Friday", 5 }, + { "Fri", 5 }, + { "Saturday", 6 }, + { "Sat", 6 }, + }; + + const char *k, *utc = NULL, *tzn = NULL; + struct tm tm, copy; + time_t x; + usec_t x_usec, plus = 0, minus = 0, ret; + int r, weekday = -1, dst = -1; + size_t i; + + /* Allowed syntaxes: + * + * 2012-09-22 16:34:22 + * 2012-09-22 16:34 (seconds will be set to 0) + * 2012-09-22 (time will be set to 00:00:00) + * 16:34:22 (date will be set to today) + * 16:34 (date will be set to today, seconds to 0) + * now + * yesterday (time is set to 00:00:00) + * today (time is set to 00:00:00) + * tomorrow (time is set to 00:00:00) + * +5min + * -5days + * @2147483647 (seconds since epoch) + */ + + assert(t); + assert(usec); + + if (t[0] == '@' && !with_tz) + return parse_sec(t + 1, usec); + + ret = now(CLOCK_REALTIME); + + if (!with_tz) { + if (streq(t, "now")) + goto finish; + + else if (t[0] == '+') { + r = parse_sec(t+1, &plus); + if (r < 0) + return r; + + goto finish; + + } else if (t[0] == '-') { + r = parse_sec(t+1, &minus); + if (r < 0) + return r; + + goto finish; + + } else if ((k = endswith(t, " ago"))) { + t = strndupa(t, k - t); + + r = parse_sec(t, &minus); + if (r < 0) + return r; + + goto finish; + + } else if ((k = endswith(t, " left"))) { + t = strndupa(t, k - t); + + r = parse_sec(t, &plus); + if (r < 0) + return r; + + goto finish; + } + + /* See if the timestamp is suffixed with UTC */ + utc = endswith_no_case(t, " UTC"); + if (utc) + t = strndupa(t, utc - t); + else { + const char *e = NULL; + int j; + + tzset(); + + /* See if the timestamp is suffixed by either the DST or non-DST local timezone. Note that we only + * support the local timezones here, nothing else. Not because we wouldn't want to, but simply because + * there are no nice APIs available to cover this. By accepting the local time zone strings, we make + * sure that all timestamps written by format_timestamp() can be parsed correctly, even though we don't + * support arbitrary timezone specifications. */ + + for (j = 0; j <= 1; j++) { + + if (isempty(tzname[j])) + continue; + + e = endswith_no_case(t, tzname[j]); + if (!e) + continue; + if (e == t) + continue; + if (e[-1] != ' ') + continue; + + break; + } + + if (IN_SET(j, 0, 1)) { + /* Found one of the two timezones specified. */ + t = strndupa(t, e - t - 1); + dst = j; + tzn = tzname[j]; + } + } + } + + x = (time_t) (ret / USEC_PER_SEC); + x_usec = 0; + + if (!localtime_or_gmtime_r(&x, &tm, utc)) + return -EINVAL; + + tm.tm_isdst = dst; + if (!with_tz && tzn) + tm.tm_zone = tzn; + + if (streq(t, "today")) { + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + + } else if (streq(t, "yesterday")) { + tm.tm_mday--; + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + + } else if (streq(t, "tomorrow")) { + tm.tm_mday++; + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + } + + for (i = 0; i < ELEMENTSOF(day_nr); i++) { + size_t skip; + + if (!startswith_no_case(t, day_nr[i].name)) + continue; + + skip = strlen(day_nr[i].name); + if (t[skip] != ' ') + continue; + + weekday = day_nr[i].nr; + t += skip + 1; + break; + } + + copy = tm; + k = strptime(t, "%y-%m-%d %H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + tm = copy; + k = strptime(t, "%Y-%m-%d %H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + tm = copy; + k = strptime(t, "%y-%m-%d %H:%M", &tm); + if (k && *k == 0) { + tm.tm_sec = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%Y-%m-%d %H:%M", &tm); + if (k && *k == 0) { + tm.tm_sec = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%y-%m-%d", &tm); + if (k && *k == 0) { + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%Y-%m-%d", &tm); + if (k && *k == 0) { + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + tm = copy; + k = strptime(t, "%H:%M", &tm); + if (k && *k == 0) { + tm.tm_sec = 0; + goto from_tm; + } + + return -EINVAL; + +parse_usec: + { + unsigned add; + + k++; + r = parse_fractional_part_u(&k, 6, &add); + if (r < 0) + return -EINVAL; + + if (*k) + return -EINVAL; + + x_usec = add; + } + +from_tm: + if (weekday >= 0 && tm.tm_wday != weekday) + return -EINVAL; + + x = mktime_or_timegm(&tm, utc); + if (x < 0) + return -EINVAL; + + ret = (usec_t) x * USEC_PER_SEC + x_usec; + if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX) + return -EINVAL; + +finish: + if (ret + plus < ret) /* overflow? */ + return -EINVAL; + ret += plus; + if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX) + return -EINVAL; + + if (ret >= minus) + ret -= minus; + else + return -EINVAL; + + *usec = ret; + + return 0; +} + +typedef struct ParseTimestampResult { + usec_t usec; + int return_value; +} ParseTimestampResult; + +int parse_timestamp(const char *t, usec_t *usec) { + char *last_space, *tz = NULL; + ParseTimestampResult *shared, tmp; + int r; + + last_space = strrchr(t, ' '); + if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) + tz = last_space + 1; + + if (!tz || endswith_no_case(t, " UTC")) + return parse_timestamp_impl(t, usec, false); + + shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if (shared == MAP_FAILED) + return negative_errno(); + + r = safe_fork("(sd-timestamp)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL); + if (r < 0) { + (void) munmap(shared, sizeof *shared); + return r; + } + if (r == 0) { + bool with_tz = true; + + if (setenv("TZ", tz, 1) != 0) { + shared->return_value = negative_errno(); + _exit(EXIT_FAILURE); + } + + tzset(); + + /* If there is a timezone that matches the tzname fields, leave the parsing to the implementation. + * Otherwise just cut it off. */ + with_tz = !STR_IN_SET(tz, tzname[0], tzname[1]); + + /* Cut off the timezone if we don't need it. */ + if (with_tz) + t = strndupa(t, last_space - t); + + shared->return_value = parse_timestamp_impl(t, &shared->usec, with_tz); + + _exit(EXIT_SUCCESS); + } + + tmp = *shared; + if (munmap(shared, sizeof *shared) != 0) + return negative_errno(); + + if (tmp.return_value == 0) + *usec = tmp.usec; + + return tmp.return_value; +} + +static const char* extract_multiplier(const char *p, usec_t *multiplier) { + static const struct { + const char *suffix; + usec_t usec; + } table[] = { + { "seconds", USEC_PER_SEC }, + { "second", USEC_PER_SEC }, + { "sec", USEC_PER_SEC }, + { "s", USEC_PER_SEC }, + { "minutes", USEC_PER_MINUTE }, + { "minute", USEC_PER_MINUTE }, + { "min", USEC_PER_MINUTE }, + { "months", USEC_PER_MONTH }, + { "month", USEC_PER_MONTH }, + { "M", USEC_PER_MONTH }, + { "msec", USEC_PER_MSEC }, + { "ms", USEC_PER_MSEC }, + { "m", USEC_PER_MINUTE }, + { "hours", USEC_PER_HOUR }, + { "hour", USEC_PER_HOUR }, + { "hr", USEC_PER_HOUR }, + { "h", USEC_PER_HOUR }, + { "days", USEC_PER_DAY }, + { "day", USEC_PER_DAY }, + { "d", USEC_PER_DAY }, + { "weeks", USEC_PER_WEEK }, + { "week", USEC_PER_WEEK }, + { "w", USEC_PER_WEEK }, + { "years", USEC_PER_YEAR }, + { "year", USEC_PER_YEAR }, + { "y", USEC_PER_YEAR }, + { "usec", 1ULL }, + { "us", 1ULL }, + { "µs", 1ULL }, + }; + size_t i; + + for (i = 0; i < ELEMENTSOF(table); i++) { + char *e; + + e = startswith(p, table[i].suffix); + if (e) { + *multiplier = table[i].usec; + return e; + } + } + + return p; +} + +int parse_time(const char *t, usec_t *usec, usec_t default_unit) { + const char *p, *s; + usec_t r = 0; + bool something = false; + + assert(t); + assert(usec); + assert(default_unit > 0); + + p = t; + + p += strspn(p, WHITESPACE); + s = startswith(p, "infinity"); + if (s) { + s += strspn(s, WHITESPACE); + if (*s != 0) + return -EINVAL; + + *usec = USEC_INFINITY; + return 0; + } + + for (;;) { + usec_t multiplier = default_unit, k; + long long l; + char *e; + + p += strspn(p, WHITESPACE); + + if (*p == 0) { + if (!something) + return -EINVAL; + + break; + } + + if (*p == '-') /* Don't allow "-0" */ + return -ERANGE; + + errno = 0; + l = strtoll(p, &e, 10); + if (errno > 0) + return -errno; + if (l < 0) + return -ERANGE; + + if (*e == '.') { + p = e + 1; + p += strspn(p, DIGITS); + } else if (e == p) + return -EINVAL; + else + p = e; + + s = extract_multiplier(p + strspn(p, WHITESPACE), &multiplier); + if (s == p && *s != '\0') + /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/ + return -EINVAL; + + p = s; + + if ((usec_t) l >= USEC_INFINITY / multiplier) + return -ERANGE; + + k = (usec_t) l * multiplier; + if (k >= USEC_INFINITY - r) + return -ERANGE; + + r += k; + + something = true; + + if (*e == '.') { + usec_t m = multiplier / 10; + const char *b; + + for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) { + k = (usec_t) (*b - '0') * m; + if (k >= USEC_INFINITY - r) + return -ERANGE; + + r += k; + } + + /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/ + if (b == e + 1) + return -EINVAL; + } + } + + *usec = r; + + return 0; +} + +int parse_sec(const char *t, usec_t *usec) { + return parse_time(t, usec, USEC_PER_SEC); +} + +int parse_sec_fix_0(const char *t, usec_t *ret) { + usec_t k; + int r; + + assert(t); + assert(ret); + + r = parse_sec(t, &k); + if (r < 0) + return r; + + *ret = k == 0 ? USEC_INFINITY : k; + return r; +} + +static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) { + static const struct { + const char *suffix; + nsec_t nsec; + } table[] = { + { "seconds", NSEC_PER_SEC }, + { "second", NSEC_PER_SEC }, + { "sec", NSEC_PER_SEC }, + { "s", NSEC_PER_SEC }, + { "minutes", NSEC_PER_MINUTE }, + { "minute", NSEC_PER_MINUTE }, + { "min", NSEC_PER_MINUTE }, + { "months", NSEC_PER_MONTH }, + { "month", NSEC_PER_MONTH }, + { "M", NSEC_PER_MONTH }, + { "msec", NSEC_PER_MSEC }, + { "ms", NSEC_PER_MSEC }, + { "m", NSEC_PER_MINUTE }, + { "hours", NSEC_PER_HOUR }, + { "hour", NSEC_PER_HOUR }, + { "hr", NSEC_PER_HOUR }, + { "h", NSEC_PER_HOUR }, + { "days", NSEC_PER_DAY }, + { "day", NSEC_PER_DAY }, + { "d", NSEC_PER_DAY }, + { "weeks", NSEC_PER_WEEK }, + { "week", NSEC_PER_WEEK }, + { "w", NSEC_PER_WEEK }, + { "years", NSEC_PER_YEAR }, + { "year", NSEC_PER_YEAR }, + { "y", NSEC_PER_YEAR }, + { "usec", NSEC_PER_USEC }, + { "us", NSEC_PER_USEC }, + { "µs", NSEC_PER_USEC }, + { "nsec", 1ULL }, + { "ns", 1ULL }, + { "", 1ULL }, /* default is nsec */ + }; + size_t i; + + for (i = 0; i < ELEMENTSOF(table); i++) { + char *e; + + e = startswith(p, table[i].suffix); + if (e) { + *multiplier = table[i].nsec; + return e; + } + } + + return p; +} + +int parse_nsec(const char *t, nsec_t *nsec) { + const char *p, *s; + nsec_t r = 0; + bool something = false; + + assert(t); + assert(nsec); + + p = t; + + p += strspn(p, WHITESPACE); + s = startswith(p, "infinity"); + if (s) { + s += strspn(s, WHITESPACE); + if (*s != 0) + return -EINVAL; + + *nsec = NSEC_INFINITY; + return 0; + } + + for (;;) { + nsec_t multiplier = 1, k; + long long l; + char *e; + + p += strspn(p, WHITESPACE); + + if (*p == 0) { + if (!something) + return -EINVAL; + + break; + } + + if (*p == '-') /* Don't allow "-0" */ + return -ERANGE; + + errno = 0; + l = strtoll(p, &e, 10); + if (errno > 0) + return -errno; + if (l < 0) + return -ERANGE; + + if (*e == '.') { + p = e + 1; + p += strspn(p, DIGITS); + } else if (e == p) + return -EINVAL; + else + p = e; + + s = extract_nsec_multiplier(p + strspn(p, WHITESPACE), &multiplier); + if (s == p && *s != '\0') + /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/ + return -EINVAL; + + p = s; + + if ((nsec_t) l >= NSEC_INFINITY / multiplier) + return -ERANGE; + + k = (nsec_t) l * multiplier; + if (k >= NSEC_INFINITY - r) + return -ERANGE; + + r += k; + + something = true; + + if (*e == '.') { + nsec_t m = multiplier / 10; + const char *b; + + for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) { + k = (nsec_t) (*b - '0') * m; + if (k >= NSEC_INFINITY - r) + return -ERANGE; + + r += k; + } + + /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/ + if (b == e + 1) + return -EINVAL; + } + } + + *nsec = r; + + return 0; +} + +bool ntp_synced(void) { + struct timex txc = {}; + + if (adjtimex(&txc) < 0) + return false; + + if (txc.status & STA_UNSYNC) + return false; + + return true; +} + +int get_timezones(char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_strv_free_ char **zones = NULL; + size_t n_zones = 0, n_allocated = 0; + int r; + + assert(ret); + + zones = strv_new("UTC"); + if (!zones) + return -ENOMEM; + + n_allocated = 2; + n_zones = 1; + + f = fopen("/usr/share/zoneinfo/zone.tab", "re"); + if (f) { + for (;;) { + _cleanup_free_ char *line = NULL; + char *p, *w; + size_t k; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + p = strstrip(line); + + if (isempty(p) || *p == '#') + continue; + + /* Skip over country code */ + p += strcspn(p, WHITESPACE); + p += strspn(p, WHITESPACE); + + /* Skip over coordinates */ + p += strcspn(p, WHITESPACE); + p += strspn(p, WHITESPACE); + + /* Found timezone name */ + k = strcspn(p, WHITESPACE); + if (k <= 0) + continue; + + w = strndup(p, k); + if (!w) + return -ENOMEM; + + if (!GREEDY_REALLOC(zones, n_allocated, n_zones + 2)) { + free(w); + return -ENOMEM; + } + + zones[n_zones++] = w; + zones[n_zones] = NULL; + } + + strv_sort(zones); + + } else if (errno != ENOENT) + return -errno; + + *ret = TAKE_PTR(zones); + + return 0; +} + +bool timezone_is_valid(const char *name, int log_level) { + bool slash = false; + const char *p, *t; + _cleanup_close_ int fd = -1; + char buf[4]; + int r; + + if (isempty(name)) + return false; + + if (name[0] == '/') + return false; + + for (p = name; *p; p++) { + if (!(*p >= '0' && *p <= '9') && + !(*p >= 'a' && *p <= 'z') && + !(*p >= 'A' && *p <= 'Z') && + !IN_SET(*p, '-', '_', '+', '/')) + return false; + + if (*p == '/') { + + if (slash) + return false; + + slash = true; + } else + slash = false; + } + + if (slash) + return false; + + if (p - name >= PATH_MAX) + return false; + + t = strjoina("/usr/share/zoneinfo/", name); + + fd = open(t, O_RDONLY|O_CLOEXEC); + if (fd < 0) { + log_full_errno(log_level, errno, "Failed to open timezone file '%s': %m", t); + return false; + } + + r = fd_verify_regular(fd); + if (r < 0) { + log_full_errno(log_level, r, "Timezone file '%s' is not a regular file: %m", t); + return false; + } + + r = loop_read_exact(fd, buf, 4, false); + if (r < 0) { + log_full_errno(log_level, r, "Failed to read from timezone file '%s': %m", t); + return false; + } + + /* Magic from tzfile(5) */ + if (memcmp(buf, "TZif", 4) != 0) { + log_full(log_level, "Timezone file '%s' has wrong magic bytes", t); + return false; + } + + return true; +} + +bool clock_boottime_supported(void) { + static int supported = -1; + + /* Note that this checks whether CLOCK_BOOTTIME is available in general as well as available for timerfds()! */ + + if (supported < 0) { + int fd; + + fd = timerfd_create(CLOCK_BOOTTIME, TFD_NONBLOCK|TFD_CLOEXEC); + if (fd < 0) + supported = false; + else { + safe_close(fd); + supported = true; + } + } + + return supported; +} + +clockid_t clock_boottime_or_monotonic(void) { + if (clock_boottime_supported()) + return CLOCK_BOOTTIME; + else + return CLOCK_MONOTONIC; +} + +bool clock_supported(clockid_t clock) { + struct timespec ts; + + switch (clock) { + + case CLOCK_MONOTONIC: + case CLOCK_REALTIME: + return true; + + case CLOCK_BOOTTIME: + return clock_boottime_supported(); + + case CLOCK_BOOTTIME_ALARM: + if (!clock_boottime_supported()) + return false; + + _fallthrough_; + default: + /* For everything else, check properly */ + return clock_gettime(clock, &ts) >= 0; + } +} + +int get_timezone(char **tz) { + _cleanup_free_ char *t = NULL; + const char *e; + char *z; + int r; + + r = readlink_malloc("/etc/localtime", &t); + if (r < 0) + return r; /* returns EINVAL if not a symlink */ + + e = PATH_STARTSWITH_SET(t, "/usr/share/zoneinfo/", "../usr/share/zoneinfo/"); + if (!e) + return -EINVAL; + + if (!timezone_is_valid(e, LOG_DEBUG)) + return -EINVAL; + + z = strdup(e); + if (!z) + return -ENOMEM; + + *tz = z; + return 0; +} + +time_t mktime_or_timegm(struct tm *tm, bool utc) { + return utc ? timegm(tm) : mktime(tm); +} + +struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc) { + return utc ? gmtime_r(t, tm) : localtime_r(t, tm); +} + +unsigned long usec_to_jiffies(usec_t u) { + static thread_local unsigned long hz = 0; + long r; + + if (hz == 0) { + r = sysconf(_SC_CLK_TCK); + + assert(r > 0); + hz = r; + } + + return DIV_ROUND_UP(u , USEC_PER_SEC / hz); +} + +usec_t usec_shift_clock(usec_t x, clockid_t from, clockid_t to) { + usec_t a, b; + + if (x == USEC_INFINITY) + return USEC_INFINITY; + if (map_clock_id(from) == map_clock_id(to)) + return x; + + a = now(from); + b = now(to); + + if (x > a) + /* x lies in the future */ + return usec_add(b, usec_sub_unsigned(x, a)); + else + /* x lies in the past */ + return usec_sub_unsigned(b, usec_sub_unsigned(a, x)); +} + +bool in_utc_timezone(void) { + tzset(); + + return timezone == 0 && daylight == 0; +} + +int time_change_fd(void) { + + /* We only care for the cancellation event, hence we set the timeout to the latest possible value. */ + static const struct itimerspec its = { + .it_value.tv_sec = TIME_T_MAX, + }; + + _cleanup_close_ int fd; + + assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX)); + + /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever CLOCK_REALTIME makes a jump relative to + * CLOCK_MONOTONIC. */ + + fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC); + if (fd < 0) + return -errno; + + if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0) + return -errno; + + return TAKE_FD(fd); +} diff --git a/src/basic/time-util.h b/src/basic/time-util.h new file mode 100644 index 0000000..5316305 --- /dev/null +++ b/src/basic/time-util.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <time.h> + +typedef uint64_t usec_t; +typedef uint64_t nsec_t; + +#define PRI_NSEC PRIu64 +#define PRI_USEC PRIu64 +#define NSEC_FMT "%" PRI_NSEC +#define USEC_FMT "%" PRI_USEC + +#include "macro.h" + +typedef struct dual_timestamp { + usec_t realtime; + usec_t monotonic; +} dual_timestamp; + +typedef struct triple_timestamp { + usec_t realtime; + usec_t monotonic; + usec_t boottime; +} triple_timestamp; + +#define USEC_INFINITY ((usec_t) -1) +#define NSEC_INFINITY ((nsec_t) -1) + +#define MSEC_PER_SEC 1000ULL +#define USEC_PER_SEC ((usec_t) 1000000ULL) +#define USEC_PER_MSEC ((usec_t) 1000ULL) +#define NSEC_PER_SEC ((nsec_t) 1000000000ULL) +#define NSEC_PER_MSEC ((nsec_t) 1000000ULL) +#define NSEC_PER_USEC ((nsec_t) 1000ULL) + +#define USEC_PER_MINUTE ((usec_t) (60ULL*USEC_PER_SEC)) +#define NSEC_PER_MINUTE ((nsec_t) (60ULL*NSEC_PER_SEC)) +#define USEC_PER_HOUR ((usec_t) (60ULL*USEC_PER_MINUTE)) +#define NSEC_PER_HOUR ((nsec_t) (60ULL*NSEC_PER_MINUTE)) +#define USEC_PER_DAY ((usec_t) (24ULL*USEC_PER_HOUR)) +#define NSEC_PER_DAY ((nsec_t) (24ULL*NSEC_PER_HOUR)) +#define USEC_PER_WEEK ((usec_t) (7ULL*USEC_PER_DAY)) +#define NSEC_PER_WEEK ((nsec_t) (7ULL*NSEC_PER_DAY)) +#define USEC_PER_MONTH ((usec_t) (2629800ULL*USEC_PER_SEC)) +#define NSEC_PER_MONTH ((nsec_t) (2629800ULL*NSEC_PER_SEC)) +#define USEC_PER_YEAR ((usec_t) (31557600ULL*USEC_PER_SEC)) +#define NSEC_PER_YEAR ((nsec_t) (31557600ULL*NSEC_PER_SEC)) + +/* We assume a maximum timezone length of 6. TZNAME_MAX is not defined on Linux, but glibc internally initializes this + * to 6. Let's rely on that. */ +#define FORMAT_TIMESTAMP_MAX (3+1+10+1+8+1+6+1+6+1) +#define FORMAT_TIMESTAMP_WIDTH 28 /* when outputting, assume this width */ +#define FORMAT_TIMESTAMP_RELATIVE_MAX 256 +#define FORMAT_TIMESPAN_MAX 64 + +#define TIME_T_MAX (time_t)((UINTMAX_C(1) << ((sizeof(time_t) << 3) - 1)) - 1) + +#define DUAL_TIMESTAMP_NULL ((struct dual_timestamp) {}) +#define TRIPLE_TIMESTAMP_NULL ((struct triple_timestamp) {}) + +usec_t now(clockid_t clock); +nsec_t now_nsec(clockid_t clock); + +dual_timestamp* dual_timestamp_get(dual_timestamp *ts); +dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u); +dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u); +dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u); + +triple_timestamp* triple_timestamp_get(triple_timestamp *ts); +triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u); + +#define DUAL_TIMESTAMP_HAS_CLOCK(clock) \ + IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC) + +#define TRIPLE_TIMESTAMP_HAS_CLOCK(clock) \ + IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) + +static inline bool dual_timestamp_is_set(const dual_timestamp *ts) { + return ((ts->realtime > 0 && ts->realtime != USEC_INFINITY) || + (ts->monotonic > 0 && ts->monotonic != USEC_INFINITY)); +} + +static inline bool triple_timestamp_is_set(const triple_timestamp *ts) { + return ((ts->realtime > 0 && ts->realtime != USEC_INFINITY) || + (ts->monotonic > 0 && ts->monotonic != USEC_INFINITY) || + (ts->boottime > 0 && ts->boottime != USEC_INFINITY)); +} + +usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock); + +usec_t timespec_load(const struct timespec *ts) _pure_; +nsec_t timespec_load_nsec(const struct timespec *ts) _pure_; +struct timespec *timespec_store(struct timespec *ts, usec_t u); + +usec_t timeval_load(const struct timeval *tv) _pure_; +struct timeval *timeval_store(struct timeval *tv, usec_t u); + +char *format_timestamp(char *buf, size_t l, usec_t t); +char *format_timestamp_utc(char *buf, size_t l, usec_t t); +char *format_timestamp_us(char *buf, size_t l, usec_t t); +char *format_timestamp_us_utc(char *buf, size_t l, usec_t t); +char *format_timestamp_relative(char *buf, size_t l, usec_t t); +char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy); + +int parse_timestamp(const char *t, usec_t *usec); + +int parse_sec(const char *t, usec_t *usec); +int parse_sec_fix_0(const char *t, usec_t *usec); +int parse_time(const char *t, usec_t *usec, usec_t default_unit); +int parse_nsec(const char *t, nsec_t *nsec); + +bool ntp_synced(void); + +int get_timezones(char ***l); +bool timezone_is_valid(const char *name, int log_level); + +bool clock_boottime_supported(void); +bool clock_supported(clockid_t clock); +clockid_t clock_boottime_or_monotonic(void); + +usec_t usec_shift_clock(usec_t, clockid_t from, clockid_t to); + +int get_timezone(char **timezone); + +time_t mktime_or_timegm(struct tm *tm, bool utc); +struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc); + +unsigned long usec_to_jiffies(usec_t usec); + +bool in_utc_timezone(void); + +static inline usec_t usec_add(usec_t a, usec_t b) { + usec_t c; + + /* Adds two time values, and makes sure USEC_INFINITY as input results as USEC_INFINITY in output, and doesn't + * overflow. */ + + c = a + b; + if (c < a || c < b) /* overflow check */ + return USEC_INFINITY; + + return c; +} + +static inline usec_t usec_sub_unsigned(usec_t timestamp, usec_t delta) { + + if (timestamp == USEC_INFINITY) /* Make sure infinity doesn't degrade */ + return USEC_INFINITY; + if (timestamp < delta) + return 0; + + return timestamp - delta; +} + +static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) { + if (delta < 0) + return usec_add(timestamp, (usec_t) (-delta)); + else + return usec_sub_unsigned(timestamp, (usec_t) delta); +} + +#if SIZEOF_TIME_T == 8 +/* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit year + * territory. However, since we want to stay away from this in all timezones we take one day off. */ +#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 253402214399000000) +#elif SIZEOF_TIME_T == 4 +/* With a 32bit time_t we can't go beyond 2038... */ +#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 2147483647000000) +#else +#error "Yuck, time_t is neither 4 nor 8 bytes wide?" +#endif + +int time_change_fd(void); diff --git a/src/basic/tmpfile-util.c b/src/basic/tmpfile-util.c new file mode 100644 index 0000000..bc92d6a --- /dev/null +++ b/src/basic/tmpfile-util.c @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/mman.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "macro.h" +#include "memfd-util.h" +#include "missing_fcntl.h" +#include "missing_syscall.h" +#include "path-util.h" +#include "process-util.h" +#include "random-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "tmpfile-util.h" +#include "umask-util.h" + +int fopen_temporary(const char *path, FILE **_f, char **_temp_path) { + FILE *f; + char *t; + int r, fd; + + assert(path); + assert(_f); + assert(_temp_path); + + r = tempfn_xxxxxx(path, NULL, &t); + if (r < 0) + return r; + + fd = mkostemp_safe(t); + if (fd < 0) { + free(t); + return -errno; + } + + f = fdopen(fd, "w"); + if (!f) { + unlink_noerrno(t); + free(t); + safe_close(fd); + return -errno; + } + + *_f = f; + *_temp_path = t; + + return 0; +} + +/* This is much like mkostemp() but is subject to umask(). */ +int mkostemp_safe(char *pattern) { + _cleanup_umask_ mode_t u = 0; + int fd; + + assert(pattern); + + u = umask(077); + + fd = mkostemp(pattern, O_CLOEXEC); + if (fd < 0) + return -errno; + + return fd; +} + +int fmkostemp_safe(char *pattern, const char *mode, FILE **ret_f) { + int fd; + FILE *f; + + fd = mkostemp_safe(pattern); + if (fd < 0) + return fd; + + f = fdopen(fd, mode); + if (!f) { + safe_close(fd); + return -errno; + } + + *ret_f = f; + return 0; +} + +int tempfn_xxxxxx(const char *p, const char *extra, char **ret) { + const char *fn; + char *t; + + assert(ret); + + if (isempty(p)) + return -EINVAL; + if (path_equal(p, "/")) + return -EINVAL; + + /* + * Turns this: + * /foo/bar/waldo + * + * Into this: + * /foo/bar/.#<extra>waldoXXXXXX + */ + + fn = basename(p); + if (!filename_is_valid(fn)) + return -EINVAL; + + extra = strempty(extra); + + t = new(char, strlen(p) + 2 + strlen(extra) + 6 + 1); + if (!t) + return -ENOMEM; + + strcpy(stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn), "XXXXXX"); + + *ret = path_simplify(t, false); + return 0; +} + +int tempfn_random(const char *p, const char *extra, char **ret) { + const char *fn; + char *t, *x; + uint64_t u; + unsigned i; + + assert(ret); + + if (isempty(p)) + return -EINVAL; + if (path_equal(p, "/")) + return -EINVAL; + + /* + * Turns this: + * /foo/bar/waldo + * + * Into this: + * /foo/bar/.#<extra>waldobaa2a261115984a9 + */ + + fn = basename(p); + if (!filename_is_valid(fn)) + return -EINVAL; + + extra = strempty(extra); + + t = new(char, strlen(p) + 2 + strlen(extra) + 16 + 1); + if (!t) + return -ENOMEM; + + x = stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn); + + u = random_u64(); + for (i = 0; i < 16; i++) { + *(x++) = hexchar(u & 0xF); + u >>= 4; + } + + *x = 0; + + *ret = path_simplify(t, false); + return 0; +} + +int tempfn_random_child(const char *p, const char *extra, char **ret) { + char *t, *x; + uint64_t u; + unsigned i; + int r; + + assert(ret); + + /* Turns this: + * /foo/bar/waldo + * Into this: + * /foo/bar/waldo/.#<extra>3c2b6219aa75d7d0 + */ + + if (!p) { + r = tmp_dir(&p); + if (r < 0) + return r; + } + + extra = strempty(extra); + + t = new(char, strlen(p) + 3 + strlen(extra) + 16 + 1); + if (!t) + return -ENOMEM; + + if (isempty(p)) + x = stpcpy(stpcpy(t, ".#"), extra); + else + x = stpcpy(stpcpy(stpcpy(t, p), "/.#"), extra); + + u = random_u64(); + for (i = 0; i < 16; i++) { + *(x++) = hexchar(u & 0xF); + u >>= 4; + } + + *x = 0; + + *ret = path_simplify(t, false); + return 0; +} + +int open_tmpfile_unlinkable(const char *directory, int flags) { + char *p; + int fd, r; + + if (!directory) { + r = tmp_dir(&directory); + if (r < 0) + return r; + } else if (isempty(directory)) + return -EINVAL; + + /* Returns an unlinked temporary file that cannot be linked into the file system anymore */ + + /* Try O_TMPFILE first, if it is supported */ + fd = open(directory, flags|O_TMPFILE|O_EXCL, S_IRUSR|S_IWUSR); + if (fd >= 0) + return fd; + + /* Fall back to unguessable name + unlinking */ + p = strjoina(directory, "/systemd-tmp-XXXXXX"); + + fd = mkostemp_safe(p); + if (fd < 0) + return fd; + + (void) unlink(p); + + return fd; +} + +int open_tmpfile_linkable(const char *target, int flags, char **ret_path) { + _cleanup_free_ char *tmp = NULL; + int r, fd; + + assert(target); + assert(ret_path); + + /* Don't allow O_EXCL, as that has a special meaning for O_TMPFILE */ + assert((flags & O_EXCL) == 0); + + /* Creates a temporary file, that shall be renamed to "target" later. If possible, this uses O_TMPFILE – in + * which case "ret_path" will be returned as NULL. If not possible a the tempoary path name used is returned in + * "ret_path". Use link_tmpfile() below to rename the result after writing the file in full. */ + + fd = open_parent(target, O_TMPFILE|flags, 0640); + if (fd >= 0) { + *ret_path = NULL; + return fd; + } + + log_debug_errno(fd, "Failed to use O_TMPFILE for %s: %m", target); + + r = tempfn_random(target, NULL, &tmp); + if (r < 0) + return r; + + fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640); + if (fd < 0) + return -errno; + + *ret_path = TAKE_PTR(tmp); + + return fd; +} + +int link_tmpfile(int fd, const char *path, const char *target) { + int r; + + assert(fd >= 0); + assert(target); + + /* Moves a temporary file created with open_tmpfile() above into its final place. if "path" is NULL an fd + * created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE is not supported + * on the directory, and renameat2() is used instead. + * + * Note that in both cases we will not replace existing files. This is because linkat() does not support this + * operation currently (renameat2() does), and there is no nice way to emulate this. */ + + if (path) { + r = rename_noreplace(AT_FDCWD, path, AT_FDCWD, target); + if (r < 0) + return r; + } else { + char proc_fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1]; + + xsprintf(proc_fd_path, "/proc/self/fd/%i", fd); + + if (linkat(AT_FDCWD, proc_fd_path, AT_FDCWD, target, AT_SYMLINK_FOLLOW) < 0) + return -errno; + } + + return 0; +} + +int mkdtemp_malloc(const char *template, char **ret) { + _cleanup_free_ char *p = NULL; + int r; + + assert(ret); + + if (template) + p = strdup(template); + else { + const char *tmp; + + r = tmp_dir(&tmp); + if (r < 0) + return r; + + p = strjoin(tmp, "/XXXXXX"); + } + if (!p) + return -ENOMEM; + + if (!mkdtemp(p)) + return -errno; + + *ret = TAKE_PTR(p); + return 0; +} diff --git a/src/basic/tmpfile-util.h b/src/basic/tmpfile-util.h new file mode 100644 index 0000000..802c85d --- /dev/null +++ b/src/basic/tmpfile-util.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdio.h> + +int fopen_temporary(const char *path, FILE **_f, char **_temp_path); +int mkostemp_safe(char *pattern); +int fmkostemp_safe(char *pattern, const char *mode, FILE**_f); + +int tempfn_xxxxxx(const char *p, const char *extra, char **ret); +int tempfn_random(const char *p, const char *extra, char **ret); +int tempfn_random_child(const char *p, const char *extra, char **ret); + +int open_tmpfile_unlinkable(const char *directory, int flags); +int open_tmpfile_linkable(const char *target, int flags, char **ret_path); + +int link_tmpfile(int fd, const char *path, const char *target); + +int mkdtemp_malloc(const char *template, char **ret); diff --git a/src/basic/umask-util.h b/src/basic/umask-util.h new file mode 100644 index 0000000..e964292 --- /dev/null +++ b/src/basic/umask-util.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "macro.h" + +static inline void umaskp(mode_t *u) { + umask(*u); +} + +#define _cleanup_umask_ _cleanup_(umaskp) + +struct _umask_struct_ { + mode_t mask; + bool quit; +}; + +static inline void _reset_umask_(struct _umask_struct_ *s) { + umask(s->mask); +}; + +#define RUN_WITH_UMASK(mask) \ + for (_cleanup_(_reset_umask_) struct _umask_struct_ _saved_umask_ = { umask(mask), false }; \ + !_saved_umask_.quit ; \ + _saved_umask_.quit = true) diff --git a/src/basic/unaligned.h b/src/basic/unaligned.h new file mode 100644 index 0000000..00c17f8 --- /dev/null +++ b/src/basic/unaligned.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <endian.h> +#include <stdint.h> + +/* BE */ + +static inline uint16_t unaligned_read_be16(const void *_u) { + const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u; + + return be16toh(u->x); +} + +static inline uint32_t unaligned_read_be32(const void *_u) { + const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u; + + return be32toh(u->x); +} + +static inline uint64_t unaligned_read_be64(const void *_u) { + const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u; + + return be64toh(u->x); +} + +static inline void unaligned_write_be16(void *_u, uint16_t a) { + struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u; + + u->x = be16toh(a); +} + +static inline void unaligned_write_be32(void *_u, uint32_t a) { + struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u; + + u->x = be32toh(a); +} + +static inline void unaligned_write_be64(void *_u, uint64_t a) { + struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u; + + u->x = be64toh(a); +} + +/* LE */ + +static inline uint16_t unaligned_read_le16(const void *_u) { + const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u; + + return le16toh(u->x); +} + +static inline uint32_t unaligned_read_le32(const void *_u) { + const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u; + + return le32toh(u->x); +} + +static inline uint64_t unaligned_read_le64(const void *_u) { + const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u; + + return le64toh(u->x); +} + +static inline void unaligned_write_le16(void *_u, uint16_t a) { + struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u; + + u->x = le16toh(a); +} + +static inline void unaligned_write_le32(void *_u, uint32_t a) { + struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u; + + u->x = le32toh(a); +} + +static inline void unaligned_write_le64(void *_u, uint64_t a) { + struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u; + + u->x = le64toh(a); +} + +#if __BYTE_ORDER == __BIG_ENDIAN +#define unaligned_read_ne16 unaligned_read_be16 +#define unaligned_read_ne32 unaligned_read_be32 +#define unaligned_read_ne64 unaligned_read_be64 + +#define unaligned_write_ne16 unaligned_write_be16 +#define unaligned_write_ne32 unaligned_write_be32 +#define unaligned_write_ne64 unaligned_write_be64 +#else +#define unaligned_read_ne16 unaligned_read_le16 +#define unaligned_read_ne32 unaligned_read_le32 +#define unaligned_read_ne64 unaligned_read_le64 + +#define unaligned_write_ne16 unaligned_write_le16 +#define unaligned_write_ne32 unaligned_write_le32 +#define unaligned_write_ne64 unaligned_write_le64 +#endif diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c new file mode 100644 index 0000000..245daab --- /dev/null +++ b/src/basic/unit-def.c @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "alloc-util.h" +#include "bus-label.h" +#include "string-table.h" +#include "unit-def.h" +#include "unit-name.h" + +char *unit_dbus_path_from_name(const char *name) { + _cleanup_free_ char *e = NULL; + + assert(name); + + e = bus_label_escape(name); + if (!e) + return NULL; + + return strappend("/org/freedesktop/systemd1/unit/", e); +} + +int unit_name_from_dbus_path(const char *path, char **name) { + const char *e; + char *n; + + e = startswith(path, "/org/freedesktop/systemd1/unit/"); + if (!e) + return -EINVAL; + + n = bus_label_unescape(e); + if (!n) + return -ENOMEM; + + *name = n; + return 0; +} + +const char* unit_dbus_interface_from_type(UnitType t) { + + static const char *const table[_UNIT_TYPE_MAX] = { + [UNIT_SERVICE] = "org.freedesktop.systemd1.Service", + [UNIT_SOCKET] = "org.freedesktop.systemd1.Socket", + [UNIT_TARGET] = "org.freedesktop.systemd1.Target", + [UNIT_DEVICE] = "org.freedesktop.systemd1.Device", + [UNIT_MOUNT] = "org.freedesktop.systemd1.Mount", + [UNIT_AUTOMOUNT] = "org.freedesktop.systemd1.Automount", + [UNIT_SWAP] = "org.freedesktop.systemd1.Swap", + [UNIT_TIMER] = "org.freedesktop.systemd1.Timer", + [UNIT_PATH] = "org.freedesktop.systemd1.Path", + [UNIT_SLICE] = "org.freedesktop.systemd1.Slice", + [UNIT_SCOPE] = "org.freedesktop.systemd1.Scope", + }; + + if (t < 0) + return NULL; + if (t >= _UNIT_TYPE_MAX) + return NULL; + + return table[t]; +} + +const char *unit_dbus_interface_from_name(const char *name) { + UnitType t; + + t = unit_name_to_type(name); + if (t < 0) + return NULL; + + return unit_dbus_interface_from_type(t); +} + +static const char* const unit_type_table[_UNIT_TYPE_MAX] = { + [UNIT_SERVICE] = "service", + [UNIT_SOCKET] = "socket", + [UNIT_TARGET] = "target", + [UNIT_DEVICE] = "device", + [UNIT_MOUNT] = "mount", + [UNIT_AUTOMOUNT] = "automount", + [UNIT_SWAP] = "swap", + [UNIT_TIMER] = "timer", + [UNIT_PATH] = "path", + [UNIT_SLICE] = "slice", + [UNIT_SCOPE] = "scope", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType); + +static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = { + [UNIT_STUB] = "stub", + [UNIT_LOADED] = "loaded", + [UNIT_NOT_FOUND] = "not-found", + [UNIT_BAD_SETTING] = "bad-setting", + [UNIT_ERROR] = "error", + [UNIT_MERGED] = "merged", + [UNIT_MASKED] = "masked" +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState); + +static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = { + [UNIT_ACTIVE] = "active", + [UNIT_RELOADING] = "reloading", + [UNIT_INACTIVE] = "inactive", + [UNIT_FAILED] = "failed", + [UNIT_ACTIVATING] = "activating", + [UNIT_DEACTIVATING] = "deactivating" +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState); + +static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = { + [AUTOMOUNT_DEAD] = "dead", + [AUTOMOUNT_WAITING] = "waiting", + [AUTOMOUNT_RUNNING] = "running", + [AUTOMOUNT_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState); + +static const char* const device_state_table[_DEVICE_STATE_MAX] = { + [DEVICE_DEAD] = "dead", + [DEVICE_TENTATIVE] = "tentative", + [DEVICE_PLUGGED] = "plugged", +}; + +DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState); + +static const char* const mount_state_table[_MOUNT_STATE_MAX] = { + [MOUNT_DEAD] = "dead", + [MOUNT_MOUNTING] = "mounting", + [MOUNT_MOUNTING_DONE] = "mounting-done", + [MOUNT_MOUNTED] = "mounted", + [MOUNT_REMOUNTING] = "remounting", + [MOUNT_UNMOUNTING] = "unmounting", + [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm", + [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill", + [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm", + [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill", + [MOUNT_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState); + +static const char* const path_state_table[_PATH_STATE_MAX] = { + [PATH_DEAD] = "dead", + [PATH_WAITING] = "waiting", + [PATH_RUNNING] = "running", + [PATH_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(path_state, PathState); + +static const char* const scope_state_table[_SCOPE_STATE_MAX] = { + [SCOPE_DEAD] = "dead", + [SCOPE_RUNNING] = "running", + [SCOPE_ABANDONED] = "abandoned", + [SCOPE_STOP_SIGTERM] = "stop-sigterm", + [SCOPE_STOP_SIGKILL] = "stop-sigkill", + [SCOPE_FAILED] = "failed", +}; + +DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState); + +static const char* const service_state_table[_SERVICE_STATE_MAX] = { + [SERVICE_DEAD] = "dead", + [SERVICE_START_PRE] = "start-pre", + [SERVICE_START] = "start", + [SERVICE_START_POST] = "start-post", + [SERVICE_RUNNING] = "running", + [SERVICE_EXITED] = "exited", + [SERVICE_RELOAD] = "reload", + [SERVICE_STOP] = "stop", + [SERVICE_STOP_WATCHDOG] = "stop-watchdog", + [SERVICE_STOP_SIGTERM] = "stop-sigterm", + [SERVICE_STOP_SIGKILL] = "stop-sigkill", + [SERVICE_STOP_POST] = "stop-post", + [SERVICE_FINAL_SIGTERM] = "final-sigterm", + [SERVICE_FINAL_SIGKILL] = "final-sigkill", + [SERVICE_FAILED] = "failed", + [SERVICE_AUTO_RESTART] = "auto-restart", +}; + +DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState); + +static const char* const slice_state_table[_SLICE_STATE_MAX] = { + [SLICE_DEAD] = "dead", + [SLICE_ACTIVE] = "active" +}; + +DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState); + +static const char* const socket_state_table[_SOCKET_STATE_MAX] = { + [SOCKET_DEAD] = "dead", + [SOCKET_START_PRE] = "start-pre", + [SOCKET_START_CHOWN] = "start-chown", + [SOCKET_START_POST] = "start-post", + [SOCKET_LISTENING] = "listening", + [SOCKET_RUNNING] = "running", + [SOCKET_STOP_PRE] = "stop-pre", + [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm", + [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill", + [SOCKET_STOP_POST] = "stop-post", + [SOCKET_FINAL_SIGTERM] = "final-sigterm", + [SOCKET_FINAL_SIGKILL] = "final-sigkill", + [SOCKET_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState); + +static const char* const swap_state_table[_SWAP_STATE_MAX] = { + [SWAP_DEAD] = "dead", + [SWAP_ACTIVATING] = "activating", + [SWAP_ACTIVATING_DONE] = "activating-done", + [SWAP_ACTIVE] = "active", + [SWAP_DEACTIVATING] = "deactivating", + [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm", + [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill", + [SWAP_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState); + +static const char* const target_state_table[_TARGET_STATE_MAX] = { + [TARGET_DEAD] = "dead", + [TARGET_ACTIVE] = "active" +}; + +DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState); + +static const char* const timer_state_table[_TIMER_STATE_MAX] = { + [TIMER_DEAD] = "dead", + [TIMER_WAITING] = "waiting", + [TIMER_RUNNING] = "running", + [TIMER_ELAPSED] = "elapsed", + [TIMER_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState); + +static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = { + [UNIT_REQUIRES] = "Requires", + [UNIT_REQUISITE] = "Requisite", + [UNIT_WANTS] = "Wants", + [UNIT_BINDS_TO] = "BindsTo", + [UNIT_PART_OF] = "PartOf", + [UNIT_REQUIRED_BY] = "RequiredBy", + [UNIT_REQUISITE_OF] = "RequisiteOf", + [UNIT_WANTED_BY] = "WantedBy", + [UNIT_BOUND_BY] = "BoundBy", + [UNIT_CONSISTS_OF] = "ConsistsOf", + [UNIT_CONFLICTS] = "Conflicts", + [UNIT_CONFLICTED_BY] = "ConflictedBy", + [UNIT_BEFORE] = "Before", + [UNIT_AFTER] = "After", + [UNIT_ON_FAILURE] = "OnFailure", + [UNIT_TRIGGERS] = "Triggers", + [UNIT_TRIGGERED_BY] = "TriggeredBy", + [UNIT_PROPAGATES_RELOAD_TO] = "PropagatesReloadTo", + [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom", + [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf", + [UNIT_REFERENCES] = "References", + [UNIT_REFERENCED_BY] = "ReferencedBy", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency); + +static const char* const notify_access_table[_NOTIFY_ACCESS_MAX] = { + [NOTIFY_NONE] = "none", + [NOTIFY_MAIN] = "main", + [NOTIFY_EXEC] = "exec", + [NOTIFY_ALL] = "all" +}; + +DEFINE_STRING_TABLE_LOOKUP(notify_access, NotifyAccess); diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h new file mode 100644 index 0000000..85f3e42 --- /dev/null +++ b/src/basic/unit-def.h @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +typedef enum UnitType { + UNIT_SERVICE = 0, + UNIT_SOCKET, + UNIT_TARGET, + UNIT_DEVICE, + UNIT_MOUNT, + UNIT_AUTOMOUNT, + UNIT_SWAP, + UNIT_TIMER, + UNIT_PATH, + UNIT_SLICE, + UNIT_SCOPE, + _UNIT_TYPE_MAX, + _UNIT_TYPE_INVALID = -1 +} UnitType; + +typedef enum UnitLoadState { + UNIT_STUB = 0, + UNIT_LOADED, + UNIT_NOT_FOUND, /* error condition #1: unit file not found */ + UNIT_BAD_SETTING, /* error condition #2: we couldn't parse some essential unit file setting */ + UNIT_ERROR, /* error condition #3: other "system" error, catchall for the rest */ + UNIT_MERGED, + UNIT_MASKED, + _UNIT_LOAD_STATE_MAX, + _UNIT_LOAD_STATE_INVALID = -1 +} UnitLoadState; + +typedef enum UnitActiveState { + UNIT_ACTIVE, + UNIT_RELOADING, + UNIT_INACTIVE, + UNIT_FAILED, + UNIT_ACTIVATING, + UNIT_DEACTIVATING, + _UNIT_ACTIVE_STATE_MAX, + _UNIT_ACTIVE_STATE_INVALID = -1 +} UnitActiveState; + +typedef enum AutomountState { + AUTOMOUNT_DEAD, + AUTOMOUNT_WAITING, + AUTOMOUNT_RUNNING, + AUTOMOUNT_FAILED, + _AUTOMOUNT_STATE_MAX, + _AUTOMOUNT_STATE_INVALID = -1 +} AutomountState; + +/* We simply watch devices, we cannot plug/unplug them. That + * simplifies the state engine greatly */ +typedef enum DeviceState { + DEVICE_DEAD, + DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */ + DEVICE_PLUGGED, /* announced by udev */ + _DEVICE_STATE_MAX, + _DEVICE_STATE_INVALID = -1 +} DeviceState; + +typedef enum MountState { + MOUNT_DEAD, + MOUNT_MOUNTING, /* /usr/bin/mount is running, but the mount is not done yet. */ + MOUNT_MOUNTING_DONE, /* /usr/bin/mount is running, and the mount is done. */ + MOUNT_MOUNTED, + MOUNT_REMOUNTING, + MOUNT_UNMOUNTING, + MOUNT_REMOUNTING_SIGTERM, + MOUNT_REMOUNTING_SIGKILL, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_FAILED, + _MOUNT_STATE_MAX, + _MOUNT_STATE_INVALID = -1 +} MountState; + +typedef enum PathState { + PATH_DEAD, + PATH_WAITING, + PATH_RUNNING, + PATH_FAILED, + _PATH_STATE_MAX, + _PATH_STATE_INVALID = -1 +} PathState; + +typedef enum ScopeState { + SCOPE_DEAD, + SCOPE_RUNNING, + SCOPE_ABANDONED, + SCOPE_STOP_SIGTERM, + SCOPE_STOP_SIGKILL, + SCOPE_FAILED, + _SCOPE_STATE_MAX, + _SCOPE_STATE_INVALID = -1 +} ScopeState; + +typedef enum ServiceState { + SERVICE_DEAD, + SERVICE_START_PRE, + SERVICE_START, + SERVICE_START_POST, + SERVICE_RUNNING, + SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */ + SERVICE_RELOAD, + SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */ + SERVICE_STOP_WATCHDOG, + SERVICE_STOP_SIGTERM, + SERVICE_STOP_SIGKILL, + SERVICE_STOP_POST, + SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */ + SERVICE_FINAL_SIGKILL, + SERVICE_FAILED, + SERVICE_AUTO_RESTART, + _SERVICE_STATE_MAX, + _SERVICE_STATE_INVALID = -1 +} ServiceState; + +typedef enum SliceState { + SLICE_DEAD, + SLICE_ACTIVE, + _SLICE_STATE_MAX, + _SLICE_STATE_INVALID = -1 +} SliceState; + +typedef enum SocketState { + SOCKET_DEAD, + SOCKET_START_PRE, + SOCKET_START_CHOWN, + SOCKET_START_POST, + SOCKET_LISTENING, + SOCKET_RUNNING, + SOCKET_STOP_PRE, + SOCKET_STOP_PRE_SIGTERM, + SOCKET_STOP_PRE_SIGKILL, + SOCKET_STOP_POST, + SOCKET_FINAL_SIGTERM, + SOCKET_FINAL_SIGKILL, + SOCKET_FAILED, + _SOCKET_STATE_MAX, + _SOCKET_STATE_INVALID = -1 +} SocketState; + +typedef enum SwapState { + SWAP_DEAD, + SWAP_ACTIVATING, /* /sbin/swapon is running, but the swap not yet enabled. */ + SWAP_ACTIVATING_DONE, /* /sbin/swapon is running, and the swap is done. */ + SWAP_ACTIVE, + SWAP_DEACTIVATING, + SWAP_DEACTIVATING_SIGTERM, + SWAP_DEACTIVATING_SIGKILL, + SWAP_FAILED, + _SWAP_STATE_MAX, + _SWAP_STATE_INVALID = -1 +} SwapState; + +typedef enum TargetState { + TARGET_DEAD, + TARGET_ACTIVE, + _TARGET_STATE_MAX, + _TARGET_STATE_INVALID = -1 +} TargetState; + +typedef enum TimerState { + TIMER_DEAD, + TIMER_WAITING, + TIMER_RUNNING, + TIMER_ELAPSED, + TIMER_FAILED, + _TIMER_STATE_MAX, + _TIMER_STATE_INVALID = -1 +} TimerState; + +typedef enum UnitDependency { + /* Positive dependencies */ + UNIT_REQUIRES, + UNIT_REQUISITE, + UNIT_WANTS, + UNIT_BINDS_TO, + UNIT_PART_OF, + + /* Inverse of the above */ + UNIT_REQUIRED_BY, /* inverse of 'requires' is 'required_by' */ + UNIT_REQUISITE_OF, /* inverse of 'requisite' is 'requisite_of' */ + UNIT_WANTED_BY, /* inverse of 'wants' */ + UNIT_BOUND_BY, /* inverse of 'binds_to' */ + UNIT_CONSISTS_OF, /* inverse of 'part_of' */ + + /* Negative dependencies */ + UNIT_CONFLICTS, /* inverse of 'conflicts' is 'conflicted_by' */ + UNIT_CONFLICTED_BY, + + /* Order */ + UNIT_BEFORE, /* inverse of 'before' is 'after' and vice versa */ + UNIT_AFTER, + + /* On Failure */ + UNIT_ON_FAILURE, + + /* Triggers (i.e. a socket triggers a service) */ + UNIT_TRIGGERS, + UNIT_TRIGGERED_BY, + + /* Propagate reloads */ + UNIT_PROPAGATES_RELOAD_TO, + UNIT_RELOAD_PROPAGATED_FROM, + + /* Joins namespace of */ + UNIT_JOINS_NAMESPACE_OF, + + /* Reference information for GC logic */ + UNIT_REFERENCES, /* Inverse of 'references' is 'referenced_by' */ + UNIT_REFERENCED_BY, + + _UNIT_DEPENDENCY_MAX, + _UNIT_DEPENDENCY_INVALID = -1 +} UnitDependency; + +typedef enum NotifyAccess { + NOTIFY_NONE, + NOTIFY_ALL, + NOTIFY_MAIN, + NOTIFY_EXEC, + _NOTIFY_ACCESS_MAX, + _NOTIFY_ACCESS_INVALID = -1 +} NotifyAccess; + +char *unit_dbus_path_from_name(const char *name); +int unit_name_from_dbus_path(const char *path, char **name); + +const char* unit_dbus_interface_from_type(UnitType t); +const char *unit_dbus_interface_from_name(const char *name); + +const char *unit_type_to_string(UnitType i) _const_; +UnitType unit_type_from_string(const char *s) _pure_; + +const char *unit_load_state_to_string(UnitLoadState i) _const_; +UnitLoadState unit_load_state_from_string(const char *s) _pure_; + +const char *unit_active_state_to_string(UnitActiveState i) _const_; +UnitActiveState unit_active_state_from_string(const char *s) _pure_; + +const char* automount_state_to_string(AutomountState i) _const_; +AutomountState automount_state_from_string(const char *s) _pure_; + +const char* device_state_to_string(DeviceState i) _const_; +DeviceState device_state_from_string(const char *s) _pure_; + +const char* mount_state_to_string(MountState i) _const_; +MountState mount_state_from_string(const char *s) _pure_; + +const char* path_state_to_string(PathState i) _const_; +PathState path_state_from_string(const char *s) _pure_; + +const char* scope_state_to_string(ScopeState i) _const_; +ScopeState scope_state_from_string(const char *s) _pure_; + +const char* service_state_to_string(ServiceState i) _const_; +ServiceState service_state_from_string(const char *s) _pure_; + +const char* slice_state_to_string(SliceState i) _const_; +SliceState slice_state_from_string(const char *s) _pure_; + +const char* socket_state_to_string(SocketState i) _const_; +SocketState socket_state_from_string(const char *s) _pure_; + +const char* swap_state_to_string(SwapState i) _const_; +SwapState swap_state_from_string(const char *s) _pure_; + +const char* target_state_to_string(TargetState i) _const_; +TargetState target_state_from_string(const char *s) _pure_; + +const char *timer_state_to_string(TimerState i) _const_; +TimerState timer_state_from_string(const char *s) _pure_; + +const char *unit_dependency_to_string(UnitDependency i) _const_; +UnitDependency unit_dependency_from_string(const char *s) _pure_; + +const char* notify_access_to_string(NotifyAccess i) _const_; +NotifyAccess notify_access_from_string(const char *s) _pure_; diff --git a/src/basic/unit-name.c b/src/basic/unit-name.c new file mode 100644 index 0000000..1b81fe2 --- /dev/null +++ b/src/basic/unit-name.c @@ -0,0 +1,775 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "glob-util.h" +#include "hexdecoct.h" +#include "path-util.h" +#include "special.h" +#include "string-util.h" +#include "strv.h" +#include "unit-name.h" + +/* Characters valid in a unit name. */ +#define VALID_CHARS \ + DIGITS \ + LETTERS \ + ":-_.\\" + +/* The same, but also permits the single @ character that may appear */ +#define VALID_CHARS_WITH_AT \ + "@" \ + VALID_CHARS + +/* All chars valid in a unit name glob */ +#define VALID_CHARS_GLOB \ + VALID_CHARS_WITH_AT \ + "[]!-*?" + +bool unit_name_is_valid(const char *n, UnitNameFlags flags) { + const char *e, *i, *at; + + assert((flags & ~(UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) == 0); + + if (_unlikely_(flags == 0)) + return false; + + if (isempty(n)) + return false; + + if (strlen(n) >= UNIT_NAME_MAX) + return false; + + e = strrchr(n, '.'); + if (!e || e == n) + return false; + + if (unit_type_from_string(e + 1) < 0) + return false; + + for (i = n, at = NULL; i < e; i++) { + + if (*i == '@' && !at) + at = i; + + if (!strchr("@" VALID_CHARS, *i)) + return false; + } + + if (at == n) + return false; + + if (flags & UNIT_NAME_PLAIN) + if (!at) + return true; + + if (flags & UNIT_NAME_INSTANCE) + if (at && e > at + 1) + return true; + + if (flags & UNIT_NAME_TEMPLATE) + if (at && e == at + 1) + return true; + + return false; +} + +bool unit_prefix_is_valid(const char *p) { + + /* We don't allow additional @ in the prefix string */ + + if (isempty(p)) + return false; + + return in_charset(p, VALID_CHARS); +} + +bool unit_instance_is_valid(const char *i) { + + /* The max length depends on the length of the string, so we + * don't really check this here. */ + + if (isempty(i)) + return false; + + /* We allow additional @ in the instance string, we do not + * allow them in the prefix! */ + + return in_charset(i, "@" VALID_CHARS); +} + +bool unit_suffix_is_valid(const char *s) { + if (isempty(s)) + return false; + + if (s[0] != '.') + return false; + + if (unit_type_from_string(s + 1) < 0) + return false; + + return true; +} + +int unit_name_to_prefix(const char *n, char **ret) { + const char *p; + char *s; + + assert(n); + assert(ret); + + if (!unit_name_is_valid(n, UNIT_NAME_ANY)) + return -EINVAL; + + p = strchr(n, '@'); + if (!p) + p = strrchr(n, '.'); + + assert_se(p); + + s = strndup(n, p - n); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int unit_name_to_instance(const char *n, char **instance) { + const char *p, *d; + char *i; + + assert(n); + assert(instance); + + if (!unit_name_is_valid(n, UNIT_NAME_ANY)) + return -EINVAL; + + /* Everything past the first @ and before the last . is the instance */ + p = strchr(n, '@'); + if (!p) { + *instance = NULL; + return 0; + } + + p++; + + d = strrchr(p, '.'); + if (!d) + return -EINVAL; + + i = strndup(p, d-p); + if (!i) + return -ENOMEM; + + *instance = i; + return 1; +} + +int unit_name_to_prefix_and_instance(const char *n, char **ret) { + const char *d; + char *s; + + assert(n); + assert(ret); + + if (!unit_name_is_valid(n, UNIT_NAME_ANY)) + return -EINVAL; + + d = strrchr(n, '.'); + if (!d) + return -EINVAL; + + s = strndup(n, d - n); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +UnitType unit_name_to_type(const char *n) { + const char *e; + + assert(n); + + if (!unit_name_is_valid(n, UNIT_NAME_ANY)) + return _UNIT_TYPE_INVALID; + + assert_se(e = strrchr(n, '.')); + + return unit_type_from_string(e + 1); +} + +int unit_name_change_suffix(const char *n, const char *suffix, char **ret) { + char *e, *s; + size_t a, b; + + assert(n); + assert(suffix); + assert(ret); + + if (!unit_name_is_valid(n, UNIT_NAME_ANY)) + return -EINVAL; + + if (!unit_suffix_is_valid(suffix)) + return -EINVAL; + + assert_se(e = strrchr(n, '.')); + + a = e - n; + b = strlen(suffix); + + s = new(char, a + b + 1); + if (!s) + return -ENOMEM; + + strcpy(mempcpy(s, n, a), suffix); + *ret = s; + + return 0; +} + +int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret) { + UnitType type; + + assert(prefix); + assert(suffix); + assert(ret); + + if (suffix[0] != '.') + return -EINVAL; + + type = unit_type_from_string(suffix + 1); + if (type < 0) + return -EINVAL; + + return unit_name_build_from_type(prefix, instance, type, ret); +} + +int unit_name_build_from_type(const char *prefix, const char *instance, UnitType type, char **ret) { + const char *ut; + char *s; + + assert(prefix); + assert(type >= 0); + assert(type < _UNIT_TYPE_MAX); + assert(ret); + + if (!unit_prefix_is_valid(prefix)) + return -EINVAL; + + if (instance && !unit_instance_is_valid(instance)) + return -EINVAL; + + ut = unit_type_to_string(type); + + if (!instance) + s = strjoin(prefix, ".", ut); + else + s = strjoin(prefix, "@", instance, ".", ut); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +static char *do_escape_char(char c, char *t) { + assert(t); + + *(t++) = '\\'; + *(t++) = 'x'; + *(t++) = hexchar(c >> 4); + *(t++) = hexchar(c); + + return t; +} + +static char *do_escape(const char *f, char *t) { + assert(f); + assert(t); + + /* do not create units with a leading '.', like for "/.dotdir" mount points */ + if (*f == '.') { + t = do_escape_char(*f, t); + f++; + } + + for (; *f; f++) { + if (*f == '/') + *(t++) = '-'; + else if (IN_SET(*f, '-', '\\') || !strchr(VALID_CHARS, *f)) + t = do_escape_char(*f, t); + else + *(t++) = *f; + } + + return t; +} + +char *unit_name_escape(const char *f) { + char *r, *t; + + assert(f); + + r = new(char, strlen(f)*4+1); + if (!r) + return NULL; + + t = do_escape(f, r); + *t = 0; + + return r; +} + +int unit_name_unescape(const char *f, char **ret) { + _cleanup_free_ char *r = NULL; + char *t; + + assert(f); + + r = strdup(f); + if (!r) + return -ENOMEM; + + for (t = r; *f; f++) { + if (*f == '-') + *(t++) = '/'; + else if (*f == '\\') { + int a, b; + + if (f[1] != 'x') + return -EINVAL; + + a = unhexchar(f[2]); + if (a < 0) + return -EINVAL; + + b = unhexchar(f[3]); + if (b < 0) + return -EINVAL; + + *(t++) = (char) (((uint8_t) a << 4U) | (uint8_t) b); + f += 3; + } else + *(t++) = *f; + } + + *t = 0; + + *ret = TAKE_PTR(r); + + return 0; +} + +int unit_name_path_escape(const char *f, char **ret) { + char *p, *s; + + assert(f); + assert(ret); + + p = strdupa(f); + if (!p) + return -ENOMEM; + + path_simplify(p, false); + + if (empty_or_root(p)) + s = strdup("-"); + else { + if (!path_is_normalized(p)) + return -EINVAL; + + /* Truncate trailing slashes */ + delete_trailing_chars(p, "/"); + + /* Truncate leading slashes */ + p = skip_leading_chars(p, "/"); + + s = unit_name_escape(p); + } + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int unit_name_path_unescape(const char *f, char **ret) { + char *s; + int r; + + assert(f); + + if (isempty(f)) + return -EINVAL; + + if (streq(f, "-")) { + s = strdup("/"); + if (!s) + return -ENOMEM; + } else { + char *w; + + r = unit_name_unescape(f, &w); + if (r < 0) + return r; + + /* Don't accept trailing or leading slashes */ + if (startswith(w, "/") || endswith(w, "/")) { + free(w); + return -EINVAL; + } + + /* Prefix a slash again */ + s = strappend("/", w); + free(w); + if (!s) + return -ENOMEM; + + if (!path_is_normalized(s)) { + free(s); + return -EINVAL; + } + } + + if (ret) + *ret = s; + else + free(s); + + return 0; +} + +int unit_name_replace_instance(const char *f, const char *i, char **ret) { + const char *p, *e; + char *s; + size_t a, b; + + assert(f); + assert(i); + assert(ret); + + if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) + return -EINVAL; + if (!unit_instance_is_valid(i)) + return -EINVAL; + + assert_se(p = strchr(f, '@')); + assert_se(e = strrchr(f, '.')); + + a = p - f; + b = strlen(i); + + s = new(char, a + 1 + b + strlen(e) + 1); + if (!s) + return -ENOMEM; + + strcpy(mempcpy(mempcpy(s, f, a + 1), i, b), e); + + *ret = s; + return 0; +} + +int unit_name_template(const char *f, char **ret) { + const char *p, *e; + char *s; + size_t a; + + assert(f); + assert(ret); + + if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) + return -EINVAL; + + assert_se(p = strchr(f, '@')); + assert_se(e = strrchr(f, '.')); + + a = p - f; + + s = new(char, a + 1 + strlen(e) + 1); + if (!s) + return -ENOMEM; + + strcpy(mempcpy(s, f, a + 1), e); + + *ret = s; + return 0; +} + +int unit_name_from_path(const char *path, const char *suffix, char **ret) { + _cleanup_free_ char *p = NULL; + char *s = NULL; + int r; + + assert(path); + assert(suffix); + assert(ret); + + if (!unit_suffix_is_valid(suffix)) + return -EINVAL; + + r = unit_name_path_escape(path, &p); + if (r < 0) + return r; + + s = strappend(p, suffix); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret) { + _cleanup_free_ char *p = NULL; + char *s; + int r; + + assert(prefix); + assert(path); + assert(suffix); + assert(ret); + + if (!unit_prefix_is_valid(prefix)) + return -EINVAL; + + if (!unit_suffix_is_valid(suffix)) + return -EINVAL; + + r = unit_name_path_escape(path, &p); + if (r < 0) + return r; + + s = strjoin(prefix, "@", p, suffix); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int unit_name_to_path(const char *name, char **ret) { + _cleanup_free_ char *prefix = NULL; + int r; + + assert(name); + + r = unit_name_to_prefix(name, &prefix); + if (r < 0) + return r; + + return unit_name_path_unescape(prefix, ret); +} + +static bool do_escape_mangle(const char *f, bool allow_globs, char *t) { + const char *valid_chars; + bool mangled = false; + + assert(f); + assert(t); + + /* We'll only escape the obvious characters here, to play safe. + * + * Returns true if any characters were mangled, false otherwise. + */ + + valid_chars = allow_globs ? VALID_CHARS_GLOB : VALID_CHARS_WITH_AT; + + for (; *f; f++) + if (*f == '/') { + *(t++) = '-'; + mangled = true; + } else if (!strchr(valid_chars, *f)) { + t = do_escape_char(*f, t); + mangled = true; + } else + *(t++) = *f; + *t = 0; + + return mangled; +} + +/** + * Convert a string to a unit name. /dev/blah is converted to dev-blah.device, + * /blah/blah is converted to blah-blah.mount, anything else is left alone, + * except that @suffix is appended if a valid unit suffix is not present. + * + * If @allow_globs, globs characters are preserved. Otherwise, they are escaped. + */ +int unit_name_mangle_with_suffix(const char *name, UnitNameMangle flags, const char *suffix, char **ret) { + char *s; + int r; + bool mangled; + + assert(name); + assert(suffix); + assert(ret); + + if (isempty(name)) /* We cannot mangle empty unit names to become valid, sorry. */ + return -EINVAL; + + if (!unit_suffix_is_valid(suffix)) + return -EINVAL; + + /* Already a fully valid unit name? If so, no mangling is necessary... */ + if (unit_name_is_valid(name, UNIT_NAME_ANY)) + goto good; + + /* Already a fully valid globbing expression? If so, no mangling is necessary either... */ + if ((flags & UNIT_NAME_MANGLE_GLOB) && + string_is_glob(name) && + in_charset(name, VALID_CHARS_GLOB)) + goto good; + + if (is_device_path(name)) { + r = unit_name_from_path(name, ".device", ret); + if (r >= 0) + return 1; + if (r != -EINVAL) + return r; + } + + if (path_is_absolute(name)) { + r = unit_name_from_path(name, ".mount", ret); + if (r >= 0) + return 1; + if (r != -EINVAL) + return r; + } + + s = new(char, strlen(name) * 4 + strlen(suffix) + 1); + if (!s) + return -ENOMEM; + + mangled = do_escape_mangle(name, flags & UNIT_NAME_MANGLE_GLOB, s); + if (mangled) + log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG, + "Invalid unit name \"%s\" was escaped as \"%s\" (maybe you should use systemd-escape?)", + name, s); + + /* Append a suffix if it doesn't have any, but only if this is not a glob, so that we can allow "foo.*" as a + * valid glob. */ + if ((!(flags & UNIT_NAME_MANGLE_GLOB) || !string_is_glob(s)) && unit_name_to_type(s) < 0) + strcat(s, suffix); + + *ret = s; + return 1; + +good: + s = strdup(name); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int slice_build_parent_slice(const char *slice, char **ret) { + char *s, *dash; + int r; + + assert(slice); + assert(ret); + + if (!slice_name_is_valid(slice)) + return -EINVAL; + + if (streq(slice, SPECIAL_ROOT_SLICE)) { + *ret = NULL; + return 0; + } + + s = strdup(slice); + if (!s) + return -ENOMEM; + + dash = strrchr(s, '-'); + if (dash) + strcpy(dash, ".slice"); + else { + r = free_and_strdup(&s, SPECIAL_ROOT_SLICE); + if (r < 0) { + free(s); + return r; + } + } + + *ret = s; + return 1; +} + +int slice_build_subslice(const char *slice, const char *name, char **ret) { + char *subslice; + + assert(slice); + assert(name); + assert(ret); + + if (!slice_name_is_valid(slice)) + return -EINVAL; + + if (!unit_prefix_is_valid(name)) + return -EINVAL; + + if (streq(slice, SPECIAL_ROOT_SLICE)) + subslice = strappend(name, ".slice"); + else { + char *e; + + assert_se(e = endswith(slice, ".slice")); + + subslice = new(char, (e - slice) + 1 + strlen(name) + 6 + 1); + if (!subslice) + return -ENOMEM; + + stpcpy(stpcpy(stpcpy(mempcpy(subslice, slice, e - slice), "-"), name), ".slice"); + } + + *ret = subslice; + return 0; +} + +bool slice_name_is_valid(const char *name) { + const char *p, *e; + bool dash = false; + + if (!unit_name_is_valid(name, UNIT_NAME_PLAIN)) + return false; + + if (streq(name, SPECIAL_ROOT_SLICE)) + return true; + + e = endswith(name, ".slice"); + if (!e) + return false; + + for (p = name; p < e; p++) { + + if (*p == '-') { + + /* Don't allow initial dash */ + if (p == name) + return false; + + /* Don't allow multiple dashes */ + if (dash) + return false; + + dash = true; + } else + dash = false; + } + + /* Don't allow trailing hash */ + if (dash) + return false; + + return true; +} diff --git a/src/basic/unit-name.h b/src/basic/unit-name.h new file mode 100644 index 0000000..0629db3 --- /dev/null +++ b/src/basic/unit-name.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" +#include "unit-def.h" + +#define UNIT_NAME_MAX 256 + +typedef enum UnitNameFlags { + UNIT_NAME_PLAIN = 1 << 0, /* Allow foo.service */ + UNIT_NAME_INSTANCE = 1 << 1, /* Allow foo@bar.service */ + UNIT_NAME_TEMPLATE = 1 << 2, /* Allow foo@.service */ + UNIT_NAME_ANY = UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE, +} UnitNameFlags; + +bool unit_name_is_valid(const char *n, UnitNameFlags flags) _pure_; +bool unit_prefix_is_valid(const char *p) _pure_; +bool unit_instance_is_valid(const char *i) _pure_; +bool unit_suffix_is_valid(const char *s) _pure_; + +static inline int unit_prefix_and_instance_is_valid(const char *p) { + /* For prefix+instance and instance the same rules apply */ + return unit_instance_is_valid(p); +} + +int unit_name_to_prefix(const char *n, char **prefix); +int unit_name_to_instance(const char *n, char **instance); +int unit_name_to_prefix_and_instance(const char *n, char **ret); + +UnitType unit_name_to_type(const char *n) _pure_; + +int unit_name_change_suffix(const char *n, const char *suffix, char **ret); + +int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret); +int unit_name_build_from_type(const char *prefix, const char *instance, UnitType, char **ret); + +char *unit_name_escape(const char *f); +int unit_name_unescape(const char *f, char **ret); +int unit_name_path_escape(const char *f, char **ret); +int unit_name_path_unescape(const char *f, char **ret); + +int unit_name_replace_instance(const char *f, const char *i, char **ret); + +int unit_name_template(const char *f, char **ret); + +int unit_name_from_path(const char *path, const char *suffix, char **ret); +int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret); +int unit_name_to_path(const char *name, char **ret); + +typedef enum UnitNameMangle { + UNIT_NAME_MANGLE_GLOB = 1 << 0, + UNIT_NAME_MANGLE_WARN = 1 << 1, +} UnitNameMangle; + +int unit_name_mangle_with_suffix(const char *name, UnitNameMangle flags, const char *suffix, char **ret); + +static inline int unit_name_mangle(const char *name, UnitNameMangle flags, char **ret) { + return unit_name_mangle_with_suffix(name, flags, ".service", ret); +} + +int slice_build_parent_slice(const char *slice, char **ret); +int slice_build_subslice(const char *slice, const char *name, char **subslice); +bool slice_name_is_valid(const char *name); diff --git a/src/basic/user-util.c b/src/basic/user-util.c new file mode 100644 index 0000000..260f3d2 --- /dev/null +++ b/src/basic/user-util.c @@ -0,0 +1,857 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <alloca.h> +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#include <pwd.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> +#include <utmp.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "utf8.h" + +bool uid_is_valid(uid_t uid) { + + /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.436. */ + + /* Some libc APIs use UID_INVALID as special placeholder */ + if (uid == (uid_t) UINT32_C(0xFFFFFFFF)) + return false; + + /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */ + if (uid == (uid_t) UINT32_C(0xFFFF)) + return false; + + return true; +} + +int parse_uid(const char *s, uid_t *ret) { + uint32_t uid = 0; + int r; + + assert(s); + + assert_cc(sizeof(uid_t) == sizeof(uint32_t)); + r = safe_atou32(s, &uid); + if (r < 0) + return r; + + if (!uid_is_valid(uid)) + return -ENXIO; /* we return ENXIO instead of EINVAL + * here, to make it easy to distuingish + * invalid numeric uids from invalid + * strings. */ + + if (ret) + *ret = uid; + + return 0; +} + +char* getlogname_malloc(void) { + uid_t uid; + struct stat st; + + if (isatty(STDIN_FILENO) && fstat(STDIN_FILENO, &st) >= 0) + uid = st.st_uid; + else + uid = getuid(); + + return uid_to_name(uid); +} + +char *getusername_malloc(void) { + const char *e; + + e = getenv("USER"); + if (e) + return strdup(e); + + return uid_to_name(getuid()); +} + +static bool is_nologin_shell(const char *shell) { + + return PATH_IN_SET(shell, + /* 'nologin' is the friendliest way to disable logins for a user account. It prints a nice + * message and exits. Different distributions place the binary at different places though, + * hence let's list them all. */ + "/bin/nologin", + "/sbin/nologin", + "/usr/bin/nologin", + "/usr/sbin/nologin", + /* 'true' and 'false' work too for the same purpose, but are less friendly as they don't do + * any message printing. Different distributions place the binary at various places but at + * least not in the 'sbin' directory. */ + "/bin/false", + "/usr/bin/false", + "/bin/true", + "/usr/bin/true"); +} + +static int synthesize_user_creds( + const char **username, + uid_t *uid, gid_t *gid, + const char **home, + const char **shell, + UserCredsFlags flags) { + + /* We enforce some special rules for uid=0 and uid=65534: in order to avoid NSS lookups for root we hardcode + * their user record data. */ + + if (STR_IN_SET(*username, "root", "0")) { + *username = "root"; + + if (uid) + *uid = 0; + if (gid) + *gid = 0; + + if (home) + *home = "/root"; + + if (shell) + *shell = "/bin/sh"; + + return 0; + } + + if (synthesize_nobody() && + STR_IN_SET(*username, NOBODY_USER_NAME, "65534")) { + *username = NOBODY_USER_NAME; + + if (uid) + *uid = UID_NOBODY; + if (gid) + *gid = GID_NOBODY; + + if (home) + *home = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/"; + + if (shell) + *shell = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/sbin/nologin"; + + return 0; + } + + return -ENOMEDIUM; +} + +int get_user_creds( + const char **username, + uid_t *uid, gid_t *gid, + const char **home, + const char **shell, + UserCredsFlags flags) { + + uid_t u = UID_INVALID; + struct passwd *p; + int r; + + assert(username); + assert(*username); + + if (!FLAGS_SET(flags, USER_CREDS_PREFER_NSS) || + (!home && !shell)) { + + /* So here's the deal: normally, we'll try to synthesize all records we can synthesize, and override + * the user database with that. However, if the user specifies USER_CREDS_PREFER_NSS then the + * user database will override the synthetic records instead — except if the user is only interested in + * the UID and/or GID (but not the home directory, or the shell), in which case we'll always override + * the user database (i.e. the USER_CREDS_PREFER_NSS flag has no effect in this case). Why? + * Simply because there are valid usecase where the user might change the home directory or the shell + * of the relevant users, but changing the UID/GID mappings for them is something we explicitly don't + * support. */ + + r = synthesize_user_creds(username, uid, gid, home, shell, flags); + if (r >= 0) + return 0; + if (r != -ENOMEDIUM) /* not a username we can synthesize */ + return r; + } + + if (parse_uid(*username, &u) >= 0) { + errno = 0; + p = getpwuid(u); + + /* If there are multiple users with the same id, make sure to leave $USER to the configured value + * instead of the first occurrence in the database. However if the uid was configured by a numeric uid, + * then let's pick the real username from /etc/passwd. */ + if (p) + *username = p->pw_name; + else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING) && !gid && !home && !shell) { + + /* If the specified user is a numeric UID and it isn't in the user database, and the caller + * passed USER_CREDS_ALLOW_MISSING and was only interested in the UID, then juts return that + * and don't complain. */ + + if (uid) + *uid = u; + + return 0; + } + } else { + errno = 0; + p = getpwnam(*username); + } + if (!p) { + r = errno > 0 ? -errno : -ESRCH; + + /* If the user requested that we only synthesize as fallback, do so now */ + if (FLAGS_SET(flags, USER_CREDS_PREFER_NSS)) { + if (synthesize_user_creds(username, uid, gid, home, shell, flags) >= 0) + return 0; + } + + return r; + } + + if (uid) { + if (!uid_is_valid(p->pw_uid)) + return -EBADMSG; + + *uid = p->pw_uid; + } + + if (gid) { + if (!gid_is_valid(p->pw_gid)) + return -EBADMSG; + + *gid = p->pw_gid; + } + + if (home) { + if (FLAGS_SET(flags, USER_CREDS_CLEAN) && empty_or_root(p->pw_dir)) + *home = NULL; + else + *home = p->pw_dir; + } + + if (shell) { + if (FLAGS_SET(flags, USER_CREDS_CLEAN) && (isempty(p->pw_shell) || is_nologin_shell(p->pw_shell))) + *shell = NULL; + else + *shell = p->pw_shell; + } + + return 0; +} + +int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags) { + struct group *g; + gid_t id; + + assert(groupname); + + /* We enforce some special rules for gid=0: in order to avoid NSS lookups for root we hardcode its data. */ + + if (STR_IN_SET(*groupname, "root", "0")) { + *groupname = "root"; + + if (gid) + *gid = 0; + + return 0; + } + + if (synthesize_nobody() && + STR_IN_SET(*groupname, NOBODY_GROUP_NAME, "65534")) { + *groupname = NOBODY_GROUP_NAME; + + if (gid) + *gid = GID_NOBODY; + + return 0; + } + + if (parse_gid(*groupname, &id) >= 0) { + errno = 0; + g = getgrgid(id); + + if (g) + *groupname = g->gr_name; + else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING)) { + if (gid) + *gid = id; + + return 0; + } + } else { + errno = 0; + g = getgrnam(*groupname); + } + + if (!g) + return errno > 0 ? -errno : -ESRCH; + + if (gid) { + if (!gid_is_valid(g->gr_gid)) + return -EBADMSG; + + *gid = g->gr_gid; + } + + return 0; +} + +char* uid_to_name(uid_t uid) { + char *ret; + int r; + + /* Shortcut things to avoid NSS lookups */ + if (uid == 0) + return strdup("root"); + if (synthesize_nobody() && + uid == UID_NOBODY) + return strdup(NOBODY_USER_NAME); + + if (uid_is_valid(uid)) { + long bufsize; + + bufsize = sysconf(_SC_GETPW_R_SIZE_MAX); + if (bufsize <= 0) + bufsize = 4096; + + for (;;) { + struct passwd pwbuf, *pw = NULL; + _cleanup_free_ char *buf = NULL; + + buf = malloc(bufsize); + if (!buf) + return NULL; + + r = getpwuid_r(uid, &pwbuf, buf, (size_t) bufsize, &pw); + if (r == 0 && pw) + return strdup(pw->pw_name); + if (r != ERANGE) + break; + + bufsize *= 2; + } + } + + if (asprintf(&ret, UID_FMT, uid) < 0) + return NULL; + + return ret; +} + +char* gid_to_name(gid_t gid) { + char *ret; + int r; + + if (gid == 0) + return strdup("root"); + if (synthesize_nobody() && + gid == GID_NOBODY) + return strdup(NOBODY_GROUP_NAME); + + if (gid_is_valid(gid)) { + long bufsize; + + bufsize = sysconf(_SC_GETGR_R_SIZE_MAX); + if (bufsize <= 0) + bufsize = 4096; + + for (;;) { + struct group grbuf, *gr = NULL; + _cleanup_free_ char *buf = NULL; + + buf = malloc(bufsize); + if (!buf) + return NULL; + + r = getgrgid_r(gid, &grbuf, buf, (size_t) bufsize, &gr); + if (r == 0 && gr) + return strdup(gr->gr_name); + if (r != ERANGE) + break; + + bufsize *= 2; + } + } + + if (asprintf(&ret, GID_FMT, gid) < 0) + return NULL; + + return ret; +} + +int in_gid(gid_t gid) { + long ngroups_max; + gid_t *gids; + int r, i; + + if (getgid() == gid) + return 1; + + if (getegid() == gid) + return 1; + + if (!gid_is_valid(gid)) + return -EINVAL; + + ngroups_max = sysconf(_SC_NGROUPS_MAX); + assert(ngroups_max > 0); + + gids = newa(gid_t, ngroups_max); + + r = getgroups(ngroups_max, gids); + if (r < 0) + return -errno; + + for (i = 0; i < r; i++) + if (gids[i] == gid) + return 1; + + return 0; +} + +int in_group(const char *name) { + int r; + gid_t gid; + + r = get_group_creds(&name, &gid, 0); + if (r < 0) + return r; + + return in_gid(gid); +} + +int get_home_dir(char **_h) { + struct passwd *p; + const char *e; + char *h; + uid_t u; + + assert(_h); + + /* Take the user specified one */ + e = secure_getenv("HOME"); + if (e && path_is_absolute(e)) { + h = strdup(e); + if (!h) + return -ENOMEM; + + *_h = h; + return 0; + } + + /* Hardcode home directory for root and nobody to avoid NSS */ + u = getuid(); + if (u == 0) { + h = strdup("/root"); + if (!h) + return -ENOMEM; + + *_h = h; + return 0; + } + if (synthesize_nobody() && + u == UID_NOBODY) { + h = strdup("/"); + if (!h) + return -ENOMEM; + + *_h = h; + return 0; + } + + /* Check the database... */ + errno = 0; + p = getpwuid(u); + if (!p) + return errno > 0 ? -errno : -ESRCH; + + if (!path_is_absolute(p->pw_dir)) + return -EINVAL; + + h = strdup(p->pw_dir); + if (!h) + return -ENOMEM; + + *_h = h; + return 0; +} + +int get_shell(char **_s) { + struct passwd *p; + const char *e; + char *s; + uid_t u; + + assert(_s); + + /* Take the user specified one */ + e = getenv("SHELL"); + if (e) { + s = strdup(e); + if (!s) + return -ENOMEM; + + *_s = s; + return 0; + } + + /* Hardcode shell for root and nobody to avoid NSS */ + u = getuid(); + if (u == 0) { + s = strdup("/bin/sh"); + if (!s) + return -ENOMEM; + + *_s = s; + return 0; + } + if (synthesize_nobody() && + u == UID_NOBODY) { + s = strdup("/sbin/nologin"); + if (!s) + return -ENOMEM; + + *_s = s; + return 0; + } + + /* Check the database... */ + errno = 0; + p = getpwuid(u); + if (!p) + return errno > 0 ? -errno : -ESRCH; + + if (!path_is_absolute(p->pw_shell)) + return -EINVAL; + + s = strdup(p->pw_shell); + if (!s) + return -ENOMEM; + + *_s = s; + return 0; +} + +int reset_uid_gid(void) { + int r; + + r = maybe_setgroups(0, NULL); + if (r < 0) + return r; + + if (setresgid(0, 0, 0) < 0) + return -errno; + + if (setresuid(0, 0, 0) < 0) + return -errno; + + return 0; +} + +int take_etc_passwd_lock(const char *root) { + + struct flock flock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + + const char *path; + int fd, r; + + /* This is roughly the same as lckpwdf(), but not as awful. We + * don't want to use alarm() and signals, hence we implement + * our own trivial version of this. + * + * Note that shadow-utils also takes per-database locks in + * addition to lckpwdf(). However, we don't given that they + * are redundant as they invoke lckpwdf() first and keep + * it during everything they do. The per-database locks are + * awfully racy, and thus we just won't do them. */ + + if (root) + path = prefix_roota(root, ETC_PASSWD_LOCK_PATH); + else + path = ETC_PASSWD_LOCK_PATH; + + fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600); + if (fd < 0) + return log_debug_errno(errno, "Cannot open %s: %m", path); + + r = fcntl(fd, F_SETLKW, &flock); + if (r < 0) { + safe_close(fd); + return log_debug_errno(errno, "Locking %s failed: %m", path); + } + + return fd; +} + +bool valid_user_group_name(const char *u) { + const char *i; + long sz; + + /* Checks if the specified name is a valid user/group name. Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, + * 3.437. We are a bit stricter here however. Specifically we deviate from POSIX rules: + * + * - We don't allow any dots (this would break chown syntax which permits dots as user/group name separator) + * - We require that names fit into the appropriate utmp field + * - We don't allow empty user names + * + * Note that other systems are even more restrictive, and don't permit underscores or uppercase characters. + */ + + if (isempty(u)) + return false; + + if (!(u[0] >= 'a' && u[0] <= 'z') && + !(u[0] >= 'A' && u[0] <= 'Z') && + u[0] != '_') + return false; + + for (i = u+1; *i; i++) { + if (!(*i >= 'a' && *i <= 'z') && + !(*i >= 'A' && *i <= 'Z') && + !(*i >= '0' && *i <= '9') && + !IN_SET(*i, '_', '-')) + return false; + } + + sz = sysconf(_SC_LOGIN_NAME_MAX); + assert_se(sz > 0); + + if ((size_t) (i-u) > (size_t) sz) + return false; + + if ((size_t) (i-u) > UT_NAMESIZE - 1) + return false; + + return true; +} + +bool valid_user_group_name_or_id(const char *u) { + + /* Similar as above, but is also fine with numeric UID/GID specifications, as long as they are in the right + * range, and not the invalid user ids. */ + + if (isempty(u)) + return false; + + if (valid_user_group_name(u)) + return true; + + return parse_uid(u, NULL) >= 0; +} + +bool valid_gecos(const char *d) { + + if (!d) + return false; + + if (!utf8_is_valid(d)) + return false; + + if (string_has_cc(d, NULL)) + return false; + + /* Colons are used as field separators, and hence not OK */ + if (strchr(d, ':')) + return false; + + return true; +} + +bool valid_home(const char *p) { + /* Note that this function is also called by valid_shell(), any + * changes must account for that. */ + + if (isempty(p)) + return false; + + if (!utf8_is_valid(p)) + return false; + + if (string_has_cc(p, NULL)) + return false; + + if (!path_is_absolute(p)) + return false; + + if (!path_is_normalized(p)) + return false; + + /* Colons are used as field separators, and hence not OK */ + if (strchr(p, ':')) + return false; + + return true; +} + +int maybe_setgroups(size_t size, const gid_t *list) { + int r; + + /* Check if setgroups is allowed before we try to drop all the auxiliary groups */ + if (size == 0) { /* Dropping all aux groups? */ + _cleanup_free_ char *setgroups_content = NULL; + bool can_setgroups; + + r = read_one_line_file("/proc/self/setgroups", &setgroups_content); + if (r == -ENOENT) + /* Old kernels don't have /proc/self/setgroups, so assume we can use setgroups */ + can_setgroups = true; + else if (r < 0) + return r; + else + can_setgroups = streq(setgroups_content, "allow"); + + if (!can_setgroups) { + log_debug("Skipping setgroups(), /proc/self/setgroups is set to 'deny'"); + return 0; + } + } + + if (setgroups(size, list) < 0) + return -errno; + + return 0; +} + +bool synthesize_nobody(void) { + /* Returns true when we shall synthesize the "nobody" user (which we do by default). This can be turned off by + * touching /etc/systemd/dont-synthesize-nobody in order to provide upgrade compatibility with legacy systems + * that used the "nobody" user name and group name for other UIDs/GIDs than 65534. + * + * Note that we do not employ any kind of synchronization on the following caching variable. If the variable is + * accessed in multi-threaded programs in the worst case it might happen that we initialize twice, but that + * shouldn't matter as each initialization should come to the same result. */ + static int cache = -1; + + if (cache < 0) + cache = access("/etc/systemd/dont-synthesize-nobody", F_OK) < 0; + + return cache; +} + +int putpwent_sane(const struct passwd *pw, FILE *stream) { + assert(pw); + assert(stream); + + errno = 0; + if (putpwent(pw, stream) != 0) + return errno > 0 ? -errno : -EIO; + + return 0; +} + +int putspent_sane(const struct spwd *sp, FILE *stream) { + assert(sp); + assert(stream); + + errno = 0; + if (putspent(sp, stream) != 0) + return errno > 0 ? -errno : -EIO; + + return 0; +} + +int putgrent_sane(const struct group *gr, FILE *stream) { + assert(gr); + assert(stream); + + errno = 0; + if (putgrent(gr, stream) != 0) + return errno > 0 ? -errno : -EIO; + + return 0; +} + +#if ENABLE_GSHADOW +int putsgent_sane(const struct sgrp *sg, FILE *stream) { + assert(sg); + assert(stream); + + errno = 0; + if (putsgent(sg, stream) != 0) + return errno > 0 ? -errno : -EIO; + + return 0; +} +#endif + +int fgetpwent_sane(FILE *stream, struct passwd **pw) { + struct passwd *p; + + assert(pw); + assert(stream); + + errno = 0; + p = fgetpwent(stream); + if (!p && errno != ENOENT) + return errno > 0 ? -errno : -EIO; + + *pw = p; + return !!p; +} + +int fgetspent_sane(FILE *stream, struct spwd **sp) { + struct spwd *s; + + assert(sp); + assert(stream); + + errno = 0; + s = fgetspent(stream); + if (!s && errno != ENOENT) + return errno > 0 ? -errno : -EIO; + + *sp = s; + return !!s; +} + +int fgetgrent_sane(FILE *stream, struct group **gr) { + struct group *g; + + assert(gr); + assert(stream); + + errno = 0; + g = fgetgrent(stream); + if (!g && errno != ENOENT) + return errno > 0 ? -errno : -EIO; + + *gr = g; + return !!g; +} + +#if ENABLE_GSHADOW +int fgetsgent_sane(FILE *stream, struct sgrp **sg) { + struct sgrp *s; + + assert(sg); + assert(stream); + + errno = 0; + s = fgetsgent(stream); + if (!s && errno != ENOENT) + return errno > 0 ? -errno : -EIO; + + *sg = s; + return !!s; +} +#endif diff --git a/src/basic/user-util.h b/src/basic/user-util.h new file mode 100644 index 0000000..cc899ee --- /dev/null +++ b/src/basic/user-util.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <grp.h> +#if ENABLE_GSHADOW +#include <gshadow.h> +#endif +#include <pwd.h> +#include <shadow.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> +#include <unistd.h> + +bool uid_is_valid(uid_t uid); + +static inline bool gid_is_valid(gid_t gid) { + return uid_is_valid((uid_t) gid); +} + +int parse_uid(const char *s, uid_t* ret_uid); + +static inline int parse_gid(const char *s, gid_t *ret_gid) { + return parse_uid(s, (uid_t*) ret_gid); +} + +char* getlogname_malloc(void); +char* getusername_malloc(void); + +typedef enum UserCredsFlags { + USER_CREDS_PREFER_NSS = 1 << 0, /* if set, only synthesize user records if database lacks them. Normally we bypass the userdb entirely for the records we can synthesize */ + USER_CREDS_ALLOW_MISSING = 1 << 1, /* if a numeric UID string is resolved, be OK if there's no record for it */ + USER_CREDS_CLEAN = 1 << 2, /* try to clean up shell and home fields with invalid data */ +} UserCredsFlags; + +int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell, UserCredsFlags flags); +int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags); + +char* uid_to_name(uid_t uid); +char* gid_to_name(gid_t gid); + +int in_gid(gid_t gid); +int in_group(const char *name); + +int get_home_dir(char **ret); +int get_shell(char **_ret); + +int reset_uid_gid(void); + +int take_etc_passwd_lock(const char *root); + +#define UID_INVALID ((uid_t) -1) +#define GID_INVALID ((gid_t) -1) + +#define UID_NOBODY ((uid_t) 65534U) +#define GID_NOBODY ((gid_t) 65534U) + +#define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock" + +static inline bool uid_is_dynamic(uid_t uid) { + return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX; +} + +static inline bool gid_is_dynamic(gid_t gid) { + return uid_is_dynamic((uid_t) gid); +} + +static inline bool uid_is_system(uid_t uid) { + return uid <= SYSTEM_UID_MAX; +} + +static inline bool gid_is_system(gid_t gid) { + return gid <= SYSTEM_GID_MAX; +} + +/* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer + * NULL is special */ +#define PTR_TO_UID(p) ((uid_t) (((uintptr_t) (p))-1)) +#define UID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1)) + +#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1)) +#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1)) + +static inline bool userns_supported(void) { + return access("/proc/self/uid_map", F_OK) >= 0; +} + +bool valid_user_group_name(const char *u); +bool valid_user_group_name_or_id(const char *u); +bool valid_gecos(const char *d); +bool valid_home(const char *p); + +static inline bool valid_shell(const char *p) { + /* We have the same requirements, so just piggy-back on the home check. + * + * Let's ignore /etc/shells because this is only applicable to real and + * not system users. It is also incompatible with the idea of empty /etc. + */ + return valid_home(p); +} + +int maybe_setgroups(size_t size, const gid_t *list); + +bool synthesize_nobody(void); + +int fgetpwent_sane(FILE *stream, struct passwd **pw); +int fgetspent_sane(FILE *stream, struct spwd **sp); +int fgetgrent_sane(FILE *stream, struct group **gr); +int putpwent_sane(const struct passwd *pw, FILE *stream); +int putspent_sane(const struct spwd *sp, FILE *stream); +int putgrent_sane(const struct group *gr, FILE *stream); +#if ENABLE_GSHADOW +int fgetsgent_sane(FILE *stream, struct sgrp **sg); +int putsgent_sane(const struct sgrp *sg, FILE *stream); +#endif diff --git a/src/basic/utf8.c b/src/basic/utf8.c new file mode 100644 index 0000000..e0d1949 --- /dev/null +++ b/src/basic/utf8.c @@ -0,0 +1,532 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +/* Parts of this file are based on the GLIB utf8 validation functions. The + * original license text follows. */ + +/* gutf8.c - Operations on UTF-8 strings. + * + * Copyright (C) 1999 Tom Tromey + * Copyright (C) 2000 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "gunicode.h" +#include "hexdecoct.h" +#include "macro.h" +#include "utf8.h" + +bool unichar_is_valid(char32_t ch) { + + if (ch >= 0x110000) /* End of unicode space */ + return false; + if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */ + return false; + if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */ + return false; + if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */ + return false; + + return true; +} + +static bool unichar_is_control(char32_t ch) { + + /* + 0 to ' '-1 is the C0 range. + DEL=0x7F, and DEL+1 to 0x9F is C1 range. + '\t' is in C0 range, but more or less harmless and commonly used. + */ + + return (ch < ' ' && !IN_SET(ch, '\t', '\n')) || + (0x7F <= ch && ch <= 0x9F); +} + +/* count of characters used to encode one unicode char */ +static size_t utf8_encoded_expected_len(const char *str) { + uint8_t c; + + assert(str); + + c = (uint8_t) str[0]; + if (c < 0x80) + return 1; + if ((c & 0xe0) == 0xc0) + return 2; + if ((c & 0xf0) == 0xe0) + return 3; + if ((c & 0xf8) == 0xf0) + return 4; + if ((c & 0xfc) == 0xf8) + return 5; + if ((c & 0xfe) == 0xfc) + return 6; + + return 0; +} + +/* decode one unicode char */ +int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) { + char32_t unichar; + size_t len, i; + + assert(str); + + len = utf8_encoded_expected_len(str); + + switch (len) { + case 1: + *ret_unichar = (char32_t)str[0]; + return 0; + case 2: + unichar = str[0] & 0x1f; + break; + case 3: + unichar = (char32_t)str[0] & 0x0f; + break; + case 4: + unichar = (char32_t)str[0] & 0x07; + break; + case 5: + unichar = (char32_t)str[0] & 0x03; + break; + case 6: + unichar = (char32_t)str[0] & 0x01; + break; + default: + return -EINVAL; + } + + for (i = 1; i < len; i++) { + if (((char32_t)str[i] & 0xc0) != 0x80) + return -EINVAL; + + unichar <<= 6; + unichar |= (char32_t)str[i] & 0x3f; + } + + *ret_unichar = unichar; + + return 0; +} + +bool utf8_is_printable_newline(const char* str, size_t length, bool newline) { + const char *p; + + assert(str); + + for (p = str; length;) { + int encoded_len, r; + char32_t val; + + encoded_len = utf8_encoded_valid_unichar(p); + if (encoded_len < 0 || + (size_t) encoded_len > length) + return false; + + r = utf8_encoded_to_unichar(p, &val); + if (r < 0 || + unichar_is_control(val) || + (!newline && val == '\n')) + return false; + + length -= encoded_len; + p += encoded_len; + } + + return true; +} + +char *utf8_is_valid(const char *str) { + const char *p; + + assert(str); + + p = str; + while (*p) { + int len; + + len = utf8_encoded_valid_unichar(p); + if (len < 0) + return NULL; + + p += len; + } + + return (char*) str; +} + +char *utf8_escape_invalid(const char *str) { + char *p, *s; + + assert(str); + + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; + + while (*str) { + int len; + + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + s = mempcpy(s, str, len); + str += len; + } else { + s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER); + str += 1; + } + } + + *s = '\0'; + + return p; +} + +char *utf8_escape_non_printable(const char *str) { + char *p, *s; + + assert(str); + + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; + + while (*str) { + int len; + + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + if (utf8_is_printable(str, len)) { + s = mempcpy(s, str, len); + str += len; + } else { + while (len > 0) { + *(s++) = '\\'; + *(s++) = 'x'; + *(s++) = hexchar((int) *str >> 4); + *(s++) = hexchar((int) *str); + + str += 1; + len--; + } + } + } else { + s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER); + str += 1; + } + } + + *s = '\0'; + + return p; +} + +char *ascii_is_valid(const char *str) { + const char *p; + + /* Check whether the string consists of valid ASCII bytes, + * i.e values between 0 and 127, inclusive. */ + + assert(str); + + for (p = str; *p; p++) + if ((unsigned char) *p >= 128) + return NULL; + + return (char*) str; +} + +char *ascii_is_valid_n(const char *str, size_t len) { + size_t i; + + /* Very similar to ascii_is_valid(), but checks exactly len + * bytes and rejects any NULs in that range. */ + + assert(str); + + for (i = 0; i < len; i++) + if ((unsigned char) str[i] >= 128 || str[i] == 0) + return NULL; + + return (char*) str; +} + +/** + * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8 + * @out_utf8: output buffer of at least 4 bytes or NULL + * @g: UCS-4 character to encode + * + * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8. + * The length of the character is returned. It is not zero-terminated! If the + * output buffer is NULL, only the length is returned. + * + * Returns: The length in bytes that the UTF-8 representation does or would + * occupy. + */ +size_t utf8_encode_unichar(char *out_utf8, char32_t g) { + + if (g < (1 << 7)) { + if (out_utf8) + out_utf8[0] = g & 0x7f; + return 1; + } else if (g < (1 << 11)) { + if (out_utf8) { + out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f); + out_utf8[1] = 0x80 | (g & 0x3f); + } + return 2; + } else if (g < (1 << 16)) { + if (out_utf8) { + out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f); + out_utf8[1] = 0x80 | ((g >> 6) & 0x3f); + out_utf8[2] = 0x80 | (g & 0x3f); + } + return 3; + } else if (g < (1 << 21)) { + if (out_utf8) { + out_utf8[0] = 0xf0 | ((g >> 18) & 0x07); + out_utf8[1] = 0x80 | ((g >> 12) & 0x3f); + out_utf8[2] = 0x80 | ((g >> 6) & 0x3f); + out_utf8[3] = 0x80 | (g & 0x3f); + } + return 4; + } + + return 0; +} + +char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) { + const uint8_t *f; + char *r, *t; + + assert(s); + + /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may + * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */ + if (length * 2 < length) + return NULL; /* overflow */ + + r = new(char, length * 2 + 1); + if (!r) + return NULL; + + f = (const uint8_t*) s; + t = r; + + while (f + 1 < (const uint8_t*) s + length) { + char16_t w1, w2; + + /* see RFC 2781 section 2.2 */ + + w1 = f[1] << 8 | f[0]; + f += 2; + + if (!utf16_is_surrogate(w1)) { + t += utf8_encode_unichar(t, w1); + continue; + } + + if (utf16_is_trailing_surrogate(w1)) + continue; /* spurious trailing surrogate, ignore */ + + if (f + 1 >= (const uint8_t*) s + length) + break; + + w2 = f[1] << 8 | f[0]; + f += 2; + + if (!utf16_is_trailing_surrogate(w2)) { + f -= 2; + continue; /* surrogate missing its trailing surrogate, ignore */ + } + + t += utf8_encode_unichar(t, utf16_surrogate_pair_to_unichar(w1, w2)); + } + + *t = 0; + return r; +} + +size_t utf16_encode_unichar(char16_t *out, char32_t c) { + + /* Note that this encodes as little-endian. */ + + switch (c) { + + case 0 ... 0xd7ffU: + case 0xe000U ... 0xffffU: + out[0] = htole16(c); + return 1; + + case 0x10000U ... 0x10ffffU: + c -= 0x10000U; + out[0] = htole16((c >> 10) + 0xd800U); + out[1] = htole16((c & 0x3ffU) + 0xdc00U); + return 2; + + default: /* A surrogate (invalid) */ + return 0; + } +} + +char16_t *utf8_to_utf16(const char *s, size_t length) { + char16_t *n, *p; + size_t i; + int r; + + assert(s); + + n = new(char16_t, length + 1); + if (!n) + return NULL; + + p = n; + + for (i = 0; i < length;) { + char32_t unichar; + size_t e; + + e = utf8_encoded_expected_len(s + i); + if (e <= 1) /* Invalid and single byte characters are copied as they are */ + goto copy; + + if (i + e > length) /* sequence longer than input buffer, then copy as-is */ + goto copy; + + r = utf8_encoded_to_unichar(s + i, &unichar); + if (r < 0) /* sequence invalid, then copy as-is */ + goto copy; + + p += utf16_encode_unichar(p, unichar); + i += e; + continue; + + copy: + *(p++) = htole16(s[i++]); + } + + *p = 0; + return n; +} + +size_t char16_strlen(const char16_t *s) { + size_t n = 0; + + assert(s); + + while (*s != 0) + n++, s++; + + return n; +} + +/* expected size used to encode one unicode char */ +static int utf8_unichar_to_encoded_len(char32_t unichar) { + + if (unichar < 0x80) + return 1; + if (unichar < 0x800) + return 2; + if (unichar < 0x10000) + return 3; + if (unichar < 0x200000) + return 4; + if (unichar < 0x4000000) + return 5; + + return 6; +} + +/* validate one encoded unicode char and return its length */ +int utf8_encoded_valid_unichar(const char *str) { + char32_t unichar; + size_t len, i; + int r; + + assert(str); + + len = utf8_encoded_expected_len(str); + if (len == 0) + return -EINVAL; + + /* ascii is valid */ + if (len == 1) + return 1; + + /* check if expected encoded chars are available */ + for (i = 0; i < len; i++) + if ((str[i] & 0x80) != 0x80) + return -EINVAL; + + r = utf8_encoded_to_unichar(str, &unichar); + if (r < 0) + return r; + + /* check if encoded length matches encoded value */ + if (utf8_unichar_to_encoded_len(unichar) != (int) len) + return -EINVAL; + + /* check if value has valid range */ + if (!unichar_is_valid(unichar)) + return -EINVAL; + + return (int) len; +} + +size_t utf8_n_codepoints(const char *str) { + size_t n = 0; + + /* Returns the number of UTF-8 codepoints in this string, or (size_t) -1 if the string is not valid UTF-8. */ + + while (*str != 0) { + int k; + + k = utf8_encoded_valid_unichar(str); + if (k < 0) + return (size_t) -1; + + str += k; + n++; + } + + return n; +} + +size_t utf8_console_width(const char *str) { + size_t n = 0; + + /* Returns the approximate width a string will take on screen when printed on a character cell + * terminal/console. */ + + while (*str != 0) { + char32_t c; + + if (utf8_encoded_to_unichar(str, &c) < 0) + return (size_t) -1; + + str = utf8_next_char(str); + + n += unichar_iswide(c) ? 2 : 1; + } + + return n; +} diff --git a/src/basic/utf8.h b/src/basic/utf8.h new file mode 100644 index 0000000..6284569 --- /dev/null +++ b/src/basic/utf8.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <uchar.h> + +#include "macro.h" +#include "missing_type.h" + +#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd" +#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf" + +bool unichar_is_valid(char32_t c); + +char *utf8_is_valid(const char *s) _pure_; +char *ascii_is_valid(const char *s) _pure_; +char *ascii_is_valid_n(const char *str, size_t len); + +bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_; +#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true) + +char *utf8_escape_invalid(const char *s); +char *utf8_escape_non_printable(const char *str); + +size_t utf8_encode_unichar(char *out_utf8, char32_t g); +size_t utf16_encode_unichar(char16_t *out, char32_t c); + +char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */); +char16_t *utf8_to_utf16(const char *s, size_t length); + +size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */ + +int utf8_encoded_valid_unichar(const char *str); +int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar); + +static inline bool utf16_is_surrogate(char16_t c) { + return c >= 0xd800U && c <= 0xdfffU; +} + +static inline bool utf16_is_trailing_surrogate(char16_t c) { + return c >= 0xdc00U && c <= 0xdfffU; +} + +static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) { + return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U); +} + +size_t utf8_n_codepoints(const char *str); +size_t utf8_console_width(const char *str); diff --git a/src/basic/util.c b/src/basic/util.c new file mode 100644 index 0000000..e577c93 --- /dev/null +++ b/src/basic/util.c @@ -0,0 +1,637 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <alloca.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/statfs.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "build.h" +#include "cgroup-util.h" +#include "def.h" +#include "device-nodes.h" +#include "dirent-util.h" +#include "env-file.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "procfs-util.h" +#include "set.h" +#include "signal-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "umask-util.h" +#include "user-util.h" +#include "util.h" +#include "virt.h" + +int saved_argc = 0; +char **saved_argv = NULL; +static int saved_in_initrd = -1; + +size_t page_size(void) { + static thread_local size_t pgsz = 0; + long r; + + if (_likely_(pgsz > 0)) + return pgsz; + + r = sysconf(_SC_PAGESIZE); + assert(r > 0); + + pgsz = (size_t) r; + return pgsz; +} + +bool plymouth_running(void) { + return access("/run/plymouth/pid", F_OK) >= 0; +} + +bool display_is_local(const char *display) { + assert(display); + + return + display[0] == ':' && + display[1] >= '0' && + display[1] <= '9'; +} + +bool kexec_loaded(void) { + _cleanup_free_ char *s = NULL; + + if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0) + return false; + + return s[0] == '1'; +} + +int prot_from_flags(int flags) { + + switch (flags & O_ACCMODE) { + + case O_RDONLY: + return PROT_READ; + + case O_WRONLY: + return PROT_WRITE; + + case O_RDWR: + return PROT_READ|PROT_WRITE; + + default: + return -EINVAL; + } +} + +bool in_initrd(void) { + struct statfs s; + int r; + + if (saved_in_initrd >= 0) + return saved_in_initrd; + + /* We make two checks here: + * + * 1. the flag file /etc/initrd-release must exist + * 2. the root file system must be a memory file system + * + * The second check is extra paranoia, since misdetecting an + * initrd can have bad consequences due the initrd + * emptying when transititioning to the main systemd. + */ + + r = getenv_bool_secure("SYSTEMD_IN_INITRD"); + if (r < 0 && r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m"); + + if (r >= 0) + saved_in_initrd = r > 0; + else + saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 && + statfs("/", &s) >= 0 && + is_temporary_fs(&s); + + return saved_in_initrd; +} + +void in_initrd_force(bool value) { + saved_in_initrd = value; +} + +/* hey glibc, APIs with callbacks without a user pointer are so useless */ +void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, + __compar_d_fn_t compar, void *arg) { + size_t l, u, idx; + const void *p; + int comparison; + + assert(!size_multiply_overflow(nmemb, size)); + + l = 0; + u = nmemb; + while (l < u) { + idx = (l + u) / 2; + p = (const uint8_t*) base + idx * size; + comparison = compar(key, p, arg); + if (comparison < 0) + u = idx; + else if (comparison > 0) + l = idx + 1; + else + return (void *)p; + } + return NULL; +} + +bool memeqzero(const void *data, size_t length) { + /* Does the buffer consist entirely of NULs? + * Copied from https://github.com/systemd/casync/, copied in turn from + * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92, + * which is licensed CC-0. + */ + + const uint8_t *p = data; + size_t i; + + /* Check first 16 bytes manually */ + for (i = 0; i < 16; i++, length--) { + if (length == 0) + return true; + if (p[i]) + return false; + } + + /* Now we know first 16 bytes are NUL, memcmp with self. */ + return memcmp(data, p + i, length) == 0; +} + +int on_ac_power(void) { + bool found_offline = false, found_online = false; + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir("/sys/class/power_supply"); + if (!d) + return errno == ENOENT ? true : -errno; + + FOREACH_DIRENT(de, d, return -errno) { + _cleanup_close_ int fd = -1, device = -1; + char contents[6]; + ssize_t n; + + device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (device < 0) { + if (IN_SET(errno, ENOENT, ENOTDIR)) + continue; + + return -errno; + } + + fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) { + if (errno == ENOENT) + continue; + + return -errno; + } + + n = read(fd, contents, sizeof(contents)); + if (n < 0) + return -errno; + + if (n != 6 || memcmp(contents, "Mains\n", 6)) + continue; + + safe_close(fd); + fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) { + if (errno == ENOENT) + continue; + + return -errno; + } + + n = read(fd, contents, sizeof(contents)); + if (n < 0) + return -errno; + + if (n != 2 || contents[1] != '\n') + return -EIO; + + if (contents[0] == '1') { + found_online = true; + break; + } else if (contents[0] == '0') + found_offline = true; + else + return -EIO; + } + + return found_online || !found_offline; +} + +int container_get_leader(const char *machine, pid_t *pid) { + _cleanup_free_ char *s = NULL, *class = NULL; + const char *p; + pid_t leader; + int r; + + assert(machine); + assert(pid); + + if (streq(machine, ".host")) { + *pid = 1; + return 0; + } + + if (!machine_name_is_valid(machine)) + return -EINVAL; + + p = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, p, + "LEADER", &s, + "CLASS", &class); + if (r == -ENOENT) + return -EHOSTDOWN; + if (r < 0) + return r; + if (!s) + return -EIO; + + if (!streq_ptr(class, "container")) + return -EIO; + + r = parse_pid(s, &leader); + if (r < 0) + return r; + if (leader <= 1) + return -EIO; + + *pid = leader; + return 0; +} + +int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; + int rfd = -1; + + assert(pid >= 0); + + if (mntns_fd) { + const char *mntns; + + mntns = procfs_file_alloca(pid, "ns/mnt"); + mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (mntnsfd < 0) + return -errno; + } + + if (pidns_fd) { + const char *pidns; + + pidns = procfs_file_alloca(pid, "ns/pid"); + pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (pidnsfd < 0) + return -errno; + } + + if (netns_fd) { + const char *netns; + + netns = procfs_file_alloca(pid, "ns/net"); + netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (netnsfd < 0) + return -errno; + } + + if (userns_fd) { + const char *userns; + + userns = procfs_file_alloca(pid, "ns/user"); + usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (usernsfd < 0 && errno != ENOENT) + return -errno; + } + + if (root_fd) { + const char *root; + + root = procfs_file_alloca(pid, "root"); + rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (rfd < 0) + return -errno; + } + + if (pidns_fd) + *pidns_fd = pidnsfd; + + if (mntns_fd) + *mntns_fd = mntnsfd; + + if (netns_fd) + *netns_fd = netnsfd; + + if (userns_fd) + *userns_fd = usernsfd; + + if (root_fd) + *root_fd = rfd; + + pidnsfd = mntnsfd = netnsfd = usernsfd = -1; + + return 0; +} + +int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { + if (userns_fd >= 0) { + /* Can't setns to your own userns, since then you could + * escalate from non-root to root in your own namespace, so + * check if namespaces equal before attempting to enter. */ + _cleanup_free_ char *userns_fd_path = NULL; + int r; + if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0) + return -ENOMEM; + + r = files_same(userns_fd_path, "/proc/self/ns/user", 0); + if (r < 0) + return r; + if (r) + userns_fd = -1; + } + + if (pidns_fd >= 0) + if (setns(pidns_fd, CLONE_NEWPID) < 0) + return -errno; + + if (mntns_fd >= 0) + if (setns(mntns_fd, CLONE_NEWNS) < 0) + return -errno; + + if (netns_fd >= 0) + if (setns(netns_fd, CLONE_NEWNET) < 0) + return -errno; + + if (userns_fd >= 0) + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return -errno; + + if (root_fd >= 0) { + if (fchdir(root_fd) < 0) + return -errno; + + if (chroot(".") < 0) + return -errno; + } + + return reset_uid_gid(); +} + +uint64_t physical_memory(void) { + _cleanup_free_ char *root = NULL, *value = NULL; + uint64_t mem, lim; + size_t ps; + long sc; + int r; + + /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of + * memory. + * + * In order to support containers nicely that have a configured memory limit we'll take the minimum of the + * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */ + + sc = sysconf(_SC_PHYS_PAGES); + assert(sc > 0); + + ps = page_size(); + mem = (uint64_t) sc * (uint64_t) ps; + + r = cg_get_root_path(&root); + if (r < 0) { + log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m"); + return mem; + } + + r = cg_all_unified(); + if (r < 0) { + log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m"); + return mem; + } + if (r > 0) { + r = cg_get_attribute("memory", root, "memory.max", &value); + if (r < 0) { + log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m"); + return mem; + } + + if (streq(value, "max")) + return mem; + } else { + r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value); + if (r < 0) { + log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m"); + return mem; + } + } + + r = safe_atou64(value, &lim); + if (r < 0) { + log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value); + return mem; + } + if (lim == UINT64_MAX) + return mem; + + /* Make sure the limit is a multiple of our own page size */ + lim /= ps; + lim *= ps; + + return MIN(mem, lim); +} + +uint64_t physical_memory_scale(uint64_t v, uint64_t max) { + uint64_t p, m, ps, r; + + assert(max > 0); + + /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success + * the result is a multiple of the page size (rounds down). */ + + ps = page_size(); + assert(ps > 0); + + p = physical_memory() / ps; + assert(p > 0); + + m = p * v; + if (m / p != v) + return UINT64_MAX; + + m /= max; + + r = m * ps; + if (r / ps != m) + return UINT64_MAX; + + return r; +} + +uint64_t system_tasks_max(void) { + + uint64_t a = TASKS_MAX, b = TASKS_MAX; + _cleanup_free_ char *root = NULL; + int r; + + /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this + * limit: + * + * a) the maximum tasks value the kernel allows on this architecture + * b) the cgroups pids_max attribute for the system + * c) the kernel's configured maximum PID value + * + * And then pick the smallest of the three */ + + r = procfs_tasks_get_limit(&a); + if (r < 0) + log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m"); + + r = cg_get_root_path(&root); + if (r < 0) + log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m"); + else { + _cleanup_free_ char *value = NULL; + + r = cg_get_attribute("pids", root, "pids.max", &value); + if (r < 0) + log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m"); + else if (!streq(value, "max")) { + r = safe_atou64(value, &b); + if (r < 0) + log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m"); + } + } + + return MIN3(TASKS_MAX, + a <= 0 ? TASKS_MAX : a, + b <= 0 ? TASKS_MAX : b); +} + +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { + uint64_t t, m; + + assert(max > 0); + + /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages + * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ + + t = system_tasks_max(); + assert(t > 0); + + m = t * v; + if (m / t != v) /* overflow? */ + return UINT64_MAX; + + return m / max; +} + +int version(void) { + puts("systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n" + SYSTEMD_FEATURES); + return 0; +} + +/* This is a direct translation of str_verscmp from boot.c */ +static bool is_digit(int c) { + return c >= '0' && c <= '9'; +} + +static int c_order(int c) { + if (c == 0 || is_digit(c)) + return 0; + + if ((c >= 'a') && (c <= 'z')) + return c; + + return c + 0x10000; +} + +int str_verscmp(const char *s1, const char *s2) { + const char *os1, *os2; + + assert(s1); + assert(s2); + + os1 = s1; + os2 = s2; + + while (*s1 || *s2) { + int first; + + while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) { + int order; + + order = c_order(*s1) - c_order(*s2); + if (order != 0) + return order; + s1++; + s2++; + } + + while (*s1 == '0') + s1++; + while (*s2 == '0') + s2++; + + first = 0; + while (is_digit(*s1) && is_digit(*s2)) { + if (first == 0) + first = *s1 - *s2; + s1++; + s2++; + } + + if (is_digit(*s1)) + return 1; + if (is_digit(*s2)) + return -1; + + if (first != 0) + return first; + } + + return strcmp(os1, os2); +} + +/* Turn off core dumps but only if we're running outside of a container. */ +void disable_coredumps(void) { + int r; + + if (detect_container() > 0) + return; + + r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m"); +} diff --git a/src/basic/util.h b/src/basic/util.h new file mode 100644 index 0000000..dc33d66 --- /dev/null +++ b/src/basic/util.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <alloca.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <locale.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/inotify.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "format-util.h" +#include "macro.h" +#include "time-util.h" + +size_t page_size(void) _pure_; +#define PAGE_ALIGN(l) ALIGN_TO((l), page_size()) + +static inline const char* yes_no(bool b) { + return b ? "yes" : "no"; +} + +static inline const char* true_false(bool b) { + return b ? "true" : "false"; +} + +static inline const char* one_zero(bool b) { + return b ? "1" : "0"; +} + +static inline const char* enable_disable(bool b) { + return b ? "enable" : "disable"; +} + +bool plymouth_running(void); + +bool display_is_local(const char *display) _pure_; + +#define NULSTR_FOREACH(i, l) \ + for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1) + +#define NULSTR_FOREACH_PAIR(i, j, l) \ + for ((i) = (l), (j) = strchr((i), 0)+1; (i) && *(i); (i) = strchr((j), 0)+1, (j) = *(i) ? strchr((i), 0)+1 : (i)) + +extern int saved_argc; +extern char **saved_argv; + +bool kexec_loaded(void); + +int prot_from_flags(int flags) _const_; + +bool in_initrd(void); +void in_initrd_force(bool value); + +void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, + __compar_d_fn_t compar, void *arg); + +#define typesafe_bsearch_r(k, b, n, func, userdata) \ + ({ \ + const typeof(b[0]) *_k = k; \ + int (*_func_)(const typeof(b[0])*, const typeof(b[0])*, typeof(userdata)) = func; \ + xbsearch_r((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_d_fn_t) _func_, userdata); \ + }) + +/** + * Normal bsearch requires base to be nonnull. Here were require + * that only if nmemb > 0. + */ +static inline void* bsearch_safe(const void *key, const void *base, + size_t nmemb, size_t size, __compar_fn_t compar) { + if (nmemb <= 0) + return NULL; + + assert(base); + return bsearch(key, base, nmemb, size, compar); +} + +#define typesafe_bsearch(k, b, n, func) \ + ({ \ + const typeof(b[0]) *_k = k; \ + int (*_func_)(const typeof(b[0])*, const typeof(b[0])*) = func; \ + bsearch_safe((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_fn_t) _func_); \ + }) + +/** + * Normal qsort requires base to be nonnull. Here were require + * that only if nmemb > 0. + */ +static inline void qsort_safe(void *base, size_t nmemb, size_t size, __compar_fn_t compar) { + if (nmemb <= 1) + return; + + assert(base); + qsort(base, nmemb, size, compar); +} + +/* A wrapper around the above, but that adds typesafety: the element size is automatically derived from the type and so + * is the prototype for the comparison function */ +#define typesafe_qsort(p, n, func) \ + ({ \ + int (*_func_)(const typeof(p[0])*, const typeof(p[0])*) = func; \ + qsort_safe((p), (n), sizeof((p)[0]), (__compar_fn_t) _func_); \ + }) + +static inline void qsort_r_safe(void *base, size_t nmemb, size_t size, __compar_d_fn_t compar, void *userdata) { + if (nmemb <= 1) + return; + + assert(base); + qsort_r(base, nmemb, size, compar, userdata); +} + +#define typesafe_qsort_r(p, n, func, userdata) \ + ({ \ + int (*_func_)(const typeof(p[0])*, const typeof(p[0])*, typeof(userdata)) = func; \ + qsort_r_safe((p), (n), sizeof((p)[0]), (__compar_d_fn_t) _func_, userdata); \ + }) + +/* Normal memcpy requires src to be nonnull. We do nothing if n is 0. */ +static inline void memcpy_safe(void *dst, const void *src, size_t n) { + if (n == 0) + return; + assert(src); + memcpy(dst, src, n); +} + +/* Normal memcmp requires s1 and s2 to be nonnull. We do nothing if n is 0. */ +static inline int memcmp_safe(const void *s1, const void *s2, size_t n) { + if (n == 0) + return 0; + assert(s1); + assert(s2); + return memcmp(s1, s2, n); +} + +/* Compare s1 (length n1) with s2 (length n2) in lexicographic order. */ +static inline int memcmp_nn(const void *s1, size_t n1, const void *s2, size_t n2) { + return memcmp_safe(s1, s2, MIN(n1, n2)) + ?: CMP(n1, n2); +} + +int on_ac_power(void); + +#define memzero(x,l) \ + ({ \ + size_t _l_ = (l); \ + void *_x_ = (x); \ + _l_ == 0 ? _x_ : memset(_x_, 0, _l_); \ + }) + +#define zero(x) (memzero(&(x), sizeof(x))) + +bool memeqzero(const void *data, size_t length); + +#define eqzero(x) memeqzero(x, sizeof(x)) + +static inline void *mempset(void *s, int c, size_t n) { + memset(s, c, n); + return (uint8_t*)s + n; +} + +static inline void _reset_errno_(int *saved_errno) { + if (*saved_errno < 0) /* Invalidated by UNPROTECT_ERRNO? */ + return; + + errno = *saved_errno; +} + +#define PROTECT_ERRNO \ + _cleanup_(_reset_errno_) _unused_ int _saved_errno_ = errno + +#define UNPROTECT_ERRNO \ + do { \ + errno = _saved_errno_; \ + _saved_errno_ = -1; \ + } while (false) + +static inline int negative_errno(void) { + /* This helper should be used to shut up gcc if you know 'errno' is + * negative. Instead of "return -errno;", use "return negative_errno();" + * It will suppress bogus gcc warnings in case it assumes 'errno' might + * be 0 and thus the caller's error-handling might not be triggered. */ + assert_return(errno > 0, -EINVAL); + return -errno; +} + +static inline unsigned u64log2(uint64_t n) { +#if __SIZEOF_LONG_LONG__ == 8 + return (n > 1) ? (unsigned) __builtin_clzll(n) ^ 63U : 0; +#else +#error "Wut?" +#endif +} + +static inline unsigned u32ctz(uint32_t n) { +#if __SIZEOF_INT__ == 4 + return n != 0 ? __builtin_ctz(n) : 32; +#else +#error "Wut?" +#endif +} + +static inline unsigned log2i(int x) { + assert(x > 0); + + return __SIZEOF_INT__ * 8 - __builtin_clz(x) - 1; +} + +static inline unsigned log2u(unsigned x) { + assert(x > 0); + + return sizeof(unsigned) * 8 - __builtin_clz(x) - 1; +} + +static inline unsigned log2u_round_up(unsigned x) { + assert(x > 0); + + if (x == 1) + return 0; + + return log2u(x - 1) + 1; +} + +int container_get_leader(const char *machine, pid_t *pid); + +int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd); +int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd); + +uint64_t physical_memory(void); +uint64_t physical_memory_scale(uint64_t v, uint64_t max); + +uint64_t system_tasks_max(void); +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max); + +int version(void); + +int str_verscmp(const char *s1, const char *s2); + +void disable_coredumps(void); diff --git a/src/basic/virt.c b/src/basic/virt.c new file mode 100644 index 0000000..f63f15f --- /dev/null +++ b/src/basic/virt.c @@ -0,0 +1,642 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#if defined(__i386__) || defined(__x86_64__) +#include <cpuid.h> +#endif +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dirent-util.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "macro.h" +#include "process-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "virt.h" + +static int detect_vm_cpuid(void) { + + /* CPUID is an x86 specific interface. */ +#if defined(__i386__) || defined(__x86_64__) + + static const struct { + const char *cpuid; + int id; + } cpuid_vendor_table[] = { + { "XenVMMXenVMM", VIRTUALIZATION_XEN }, + { "KVMKVMKVM", VIRTUALIZATION_KVM }, + { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU }, + /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */ + { "VMwareVMware", VIRTUALIZATION_VMWARE }, + /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */ + { "Microsoft Hv", VIRTUALIZATION_MICROSOFT }, + /* https://wiki.freebsd.org/bhyve */ + { "bhyve bhyve ", VIRTUALIZATION_BHYVE }, + { "QNXQVMBSQG", VIRTUALIZATION_QNX }, + }; + + uint32_t eax, ebx, ecx, edx; + bool hypervisor; + + /* http://lwn.net/Articles/301888/ */ + + /* First detect whether there is a hypervisor */ + if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) + return VIRTUALIZATION_NONE; + + hypervisor = ecx & 0x80000000U; + + if (hypervisor) { + union { + uint32_t sig32[3]; + char text[13]; + } sig = {}; + unsigned j; + + /* There is a hypervisor, see what it is */ + __cpuid(0x40000000U, eax, ebx, ecx, edx); + + sig.sig32[0] = ebx; + sig.sig32[1] = ecx; + sig.sig32[2] = edx; + + log_debug("Virtualization found, CPUID=%s", sig.text); + + for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++) + if (streq(sig.text, cpuid_vendor_table[j].cpuid)) + return cpuid_vendor_table[j].id; + + return VIRTUALIZATION_VM_OTHER; + } +#endif + log_debug("No virtualization found in CPUID"); + + return VIRTUALIZATION_NONE; +} + +static int detect_vm_device_tree(void) { +#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__) + _cleanup_free_ char *hvtype = NULL; + int r; + + r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype); + if (r == -ENOENT) { + _cleanup_closedir_ DIR *dir = NULL; + struct dirent *dent; + + dir = opendir("/proc/device-tree"); + if (!dir) { + if (errno == ENOENT) { + log_debug_errno(errno, "/proc/device-tree: %m"); + return VIRTUALIZATION_NONE; + } + return -errno; + } + + FOREACH_DIRENT(dent, dir, return -errno) + if (strstr(dent->d_name, "fw-cfg")) { + log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name); + return VIRTUALIZATION_QEMU; + } + + log_debug("No virtualization found in /proc/device-tree/*"); + return VIRTUALIZATION_NONE; + } else if (r < 0) + return r; + + log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype); + if (streq(hvtype, "linux,kvm")) + return VIRTUALIZATION_KVM; + else if (strstr(hvtype, "xen")) + return VIRTUALIZATION_XEN; + else + return VIRTUALIZATION_VM_OTHER; +#else + log_debug("This platform does not support /proc/device-tree"); + return VIRTUALIZATION_NONE; +#endif +} + +static int detect_vm_dmi(void) { +#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + + static const char *const dmi_vendors[] = { + "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */ + "/sys/class/dmi/id/sys_vendor", + "/sys/class/dmi/id/board_vendor", + "/sys/class/dmi/id/bios_vendor" + }; + + static const struct { + const char *vendor; + int id; + } dmi_vendor_table[] = { + { "KVM", VIRTUALIZATION_KVM }, + { "QEMU", VIRTUALIZATION_QEMU }, + /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */ + { "VMware", VIRTUALIZATION_VMWARE }, + { "VMW", VIRTUALIZATION_VMWARE }, + { "innotek GmbH", VIRTUALIZATION_ORACLE }, + { "Xen", VIRTUALIZATION_XEN }, + { "Bochs", VIRTUALIZATION_BOCHS }, + { "Parallels", VIRTUALIZATION_PARALLELS }, + /* https://wiki.freebsd.org/bhyve */ + { "BHYVE", VIRTUALIZATION_BHYVE }, + }; + unsigned i; + int r; + + for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) { + _cleanup_free_ char *s = NULL; + unsigned j; + + r = read_one_line_file(dmi_vendors[i], &s); + if (r < 0) { + if (r == -ENOENT) + continue; + + return r; + } + + for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++) + if (startswith(s, dmi_vendor_table[j].vendor)) { + log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]); + return dmi_vendor_table[j].id; + } + } +#endif + + log_debug("No virtualization found in DMI"); + + return VIRTUALIZATION_NONE; +} + +static int detect_vm_xen(void) { + + /* Check for Dom0 will be executed later in detect_vm_xen_dom0 + The presence of /proc/xen indicates some form of a Xen domain */ + if (access("/proc/xen", F_OK) < 0) { + log_debug("Virtualization XEN not found, /proc/xen does not exist"); + return VIRTUALIZATION_NONE; + } + + log_debug("Virtualization XEN found (/proc/xen exists)"); + return VIRTUALIZATION_XEN; +} + +#define XENFEAT_dom0 11 /* xen/include/public/features.h */ +#define PATH_FEATURES "/sys/hypervisor/properties/features" +/* Returns -errno, or 0 for domU, or 1 for dom0 */ +static int detect_vm_xen_dom0(void) { + _cleanup_free_ char *domcap = NULL; + char *cap, *i; + int r; + + r = read_one_line_file(PATH_FEATURES, &domcap); + if (r < 0 && r != -ENOENT) + return r; + if (r == 0) { + unsigned long features; + + /* Here, we need to use sscanf() instead of safe_atoul() + * as the string lacks the leading "0x". */ + r = sscanf(domcap, "%lx", &features); + if (r == 1) { + r = !!(features & (1U << XENFEAT_dom0)); + log_debug("Virtualization XEN, found %s with value %08lx, " + "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.", + PATH_FEATURES, features, r ? "" : " not"); + return r; + } + log_debug("Virtualization XEN, found %s, unhandled content '%s'", + PATH_FEATURES, domcap); + } + + r = read_one_line_file("/proc/xen/capabilities", &domcap); + if (r == -ENOENT) { + log_debug("Virtualization XEN because /proc/xen/capabilities does not exist"); + return 0; + } + if (r < 0) + return r; + + i = domcap; + while ((cap = strsep(&i, ","))) + if (streq(cap, "control_d")) + break; + if (!cap) { + log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)"); + return 0; + } + + log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)"); + return 1; +} + +static int detect_vm_hypervisor(void) { + _cleanup_free_ char *hvtype = NULL; + int r; + + r = read_one_line_file("/sys/hypervisor/type", &hvtype); + if (r == -ENOENT) + return VIRTUALIZATION_NONE; + if (r < 0) + return r; + + log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype); + + if (streq(hvtype, "xen")) + return VIRTUALIZATION_XEN; + else + return VIRTUALIZATION_VM_OTHER; +} + +static int detect_vm_uml(void) { + _cleanup_fclose_ FILE *f = NULL; + int r; + + /* Detect User-Mode Linux by reading /proc/cpuinfo */ + f = fopen("/proc/cpuinfo", "re"); + if (!f) { + if (errno == ENOENT) { + log_debug("/proc/cpuinfo not found, assuming no UML virtualization."); + return VIRTUALIZATION_NONE; + } + return -errno; + } + + for (;;) { + _cleanup_free_ char *line = NULL; + const char *t; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + t = startswith(line, "vendor_id\t: "); + if (t) { + if (startswith(t, "User Mode Linux")) { + log_debug("UML virtualization found in /proc/cpuinfo"); + return VIRTUALIZATION_UML; + } + + break; + } + } + + log_debug("UML virtualization not found in /proc/cpuinfo."); + return VIRTUALIZATION_NONE; +} + +static int detect_vm_zvm(void) { + +#if defined(__s390__) + _cleanup_free_ char *t = NULL; + int r; + + r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t); + if (r == -ENOENT) + return VIRTUALIZATION_NONE; + if (r < 0) + return r; + + log_debug("Virtualization %s found in /proc/sysinfo", t); + if (streq(t, "z/VM")) + return VIRTUALIZATION_ZVM; + else + return VIRTUALIZATION_KVM; +#else + log_debug("This platform does not support /proc/sysinfo"); + return VIRTUALIZATION_NONE; +#endif +} + +/* Returns a short identifier for the various VM implementations */ +int detect_vm(void) { + static thread_local int cached_found = _VIRTUALIZATION_INVALID; + bool other = false; + int r, dmi; + + if (cached_found >= 0) + return cached_found; + + /* We have to use the correct order here: + * + * → First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft + * Hyper-V. + * + * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is + * overwritten. + * + * → Third, try to detect from DMI. */ + + dmi = detect_vm_dmi(); + if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) { + r = dmi; + goto finish; + } + + r = detect_vm_cpuid(); + if (r < 0) + return r; + if (r == VIRTUALIZATION_VM_OTHER) + other = true; + else if (r != VIRTUALIZATION_NONE) + goto finish; + + /* Now, let's get back to DMI */ + if (dmi < 0) + return dmi; + if (dmi == VIRTUALIZATION_VM_OTHER) + other = true; + else if (dmi != VIRTUALIZATION_NONE) { + r = dmi; + goto finish; + } + + /* x86 xen will most likely be detected by cpuid. If not (most likely + * because we're not an x86 guest), then we should try the /proc/xen + * directory next. If that's not found, then we check for the high-level + * hypervisor sysfs file. + */ + + r = detect_vm_xen(); + if (r < 0) + return r; + if (r == VIRTUALIZATION_VM_OTHER) + other = true; + else if (r != VIRTUALIZATION_NONE) + goto finish; + + r = detect_vm_hypervisor(); + if (r < 0) + return r; + if (r == VIRTUALIZATION_VM_OTHER) + other = true; + else if (r != VIRTUALIZATION_NONE) + goto finish; + + r = detect_vm_device_tree(); + if (r < 0) + return r; + if (r == VIRTUALIZATION_VM_OTHER) + other = true; + else if (r != VIRTUALIZATION_NONE) + goto finish; + + r = detect_vm_uml(); + if (r < 0) + return r; + if (r == VIRTUALIZATION_VM_OTHER) + other = true; + else if (r != VIRTUALIZATION_NONE) + goto finish; + + r = detect_vm_zvm(); + if (r < 0) + return r; + +finish: + /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others. + * In order to detect the Dom0 as not virtualization we need to + * double-check it */ + if (r == VIRTUALIZATION_XEN) { + int dom0; + + dom0 = detect_vm_xen_dom0(); + if (dom0 < 0) + return dom0; + if (dom0 > 0) + r = VIRTUALIZATION_NONE; + } else if (r == VIRTUALIZATION_NONE && other) + r = VIRTUALIZATION_VM_OTHER; + + cached_found = r; + log_debug("Found VM virtualization %s", virtualization_to_string(r)); + return r; +} + +int detect_container(void) { + + static const struct { + const char *value; + int id; + } value_table[] = { + { "lxc", VIRTUALIZATION_LXC }, + { "lxc-libvirt", VIRTUALIZATION_LXC_LIBVIRT }, + { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN }, + { "docker", VIRTUALIZATION_DOCKER }, + { "rkt", VIRTUALIZATION_RKT }, + }; + + static thread_local int cached_found = _VIRTUALIZATION_INVALID; + _cleanup_free_ char *m = NULL; + const char *e = NULL; + unsigned j; + int r; + + if (cached_found >= 0) + return cached_found; + + /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */ + if (access("/proc/vz", F_OK) >= 0 && + access("/proc/bc", F_OK) < 0) { + r = VIRTUALIZATION_OPENVZ; + goto finish; + } + + if (getpid_cached() == 1) { + /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */ + + e = getenv("container"); + if (isempty(e)) { + r = VIRTUALIZATION_NONE; + goto finish; + } + + goto translate_name; + } + + /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing + * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */ + r = read_one_line_file("/run/systemd/container", &m); + if (r >= 0) { + e = m; + goto translate_name; + } + if (r != -ENOENT) + return log_debug_errno(r, "Failed to read /run/systemd/container: %m"); + + /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */ + r = getenv_for_pid(1, "container", &m); + if (r > 0) { + e = m; + goto translate_name; + } + if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */ + log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m"); + + /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown + * there is not 1, we know we are in a PID namespace. and hence a container. */ + r = read_one_line_file("/proc/1/sched", &m); + if (r >= 0) { + const char *t; + + t = strrchr(m, '('); + if (!t) + return -EIO; + + if (!startswith(t, "(1,")) { + r = VIRTUALIZATION_CONTAINER_OTHER; + goto finish; + } + } else if (r != -ENOENT) + return r; + + /* If that didn't work, give up, assume no container manager. */ + r = VIRTUALIZATION_NONE; + goto finish; + +translate_name: + for (j = 0; j < ELEMENTSOF(value_table); j++) + if (streq(e, value_table[j].value)) { + r = value_table[j].id; + goto finish; + } + + r = VIRTUALIZATION_CONTAINER_OTHER; + +finish: + log_debug("Found container virtualization %s.", virtualization_to_string(r)); + cached_found = r; + return r; +} + +int detect_virtualization(void) { + int r; + + r = detect_container(); + if (r == 0) + r = detect_vm(); + + return r; +} + +static int userns_has_mapping(const char *name) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *buf = NULL; + size_t n_allocated = 0; + ssize_t n; + uint32_t a, b, c; + int r; + + f = fopen(name, "re"); + if (!f) { + log_debug_errno(errno, "Failed to open %s: %m", name); + return errno == ENOENT ? false : -errno; + } + + n = getline(&buf, &n_allocated, f); + if (n < 0) { + if (feof(f)) { + log_debug("%s is empty, we're in an uninitialized user namespace", name); + return true; + } + + return log_debug_errno(errno, "Failed to read %s: %m", name); + } + + r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c); + if (r < 3) + return log_debug_errno(errno, "Failed to parse %s: %m", name); + + if (a == 0 && b == 0 && c == UINT32_MAX) { + /* The kernel calls mappings_overlap() and does not allow overlaps */ + log_debug("%s has a full 1:1 mapping", name); + return false; + } + + /* Anything else implies that we are in a user namespace */ + log_debug("Mapping found in %s, we're in a user namespace", name); + return true; +} + +int running_in_userns(void) { + _cleanup_free_ char *line = NULL; + int r; + + r = userns_has_mapping("/proc/self/uid_map"); + if (r != 0) + return r; + + r = userns_has_mapping("/proc/self/gid_map"); + if (r != 0) + return r; + + /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also + * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups" + * also does not exist. We cannot distinguish those two cases, so assume that + * we're running on a stripped-down recent kernel, rather than on an old one, + * and if the file is not found, return false. + */ + r = read_one_line_file("/proc/self/setgroups", &line); + if (r < 0) { + log_debug_errno(r, "/proc/self/setgroups: %m"); + return r == -ENOENT ? false : r; + } + + truncate_nl(line); + r = streq(line, "deny"); + /* See user_namespaces(7) for a description of this "setgroups" contents. */ + log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in"); + return r; +} + +int running_in_chroot(void) { + int r; + + if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0) + return 0; + + r = files_same("/proc/1/root", "/", 0); + if (r < 0) + return r; + + return r == 0; +} + +static const char *const virtualization_table[_VIRTUALIZATION_MAX] = { + [VIRTUALIZATION_NONE] = "none", + [VIRTUALIZATION_KVM] = "kvm", + [VIRTUALIZATION_QEMU] = "qemu", + [VIRTUALIZATION_BOCHS] = "bochs", + [VIRTUALIZATION_XEN] = "xen", + [VIRTUALIZATION_UML] = "uml", + [VIRTUALIZATION_VMWARE] = "vmware", + [VIRTUALIZATION_ORACLE] = "oracle", + [VIRTUALIZATION_MICROSOFT] = "microsoft", + [VIRTUALIZATION_ZVM] = "zvm", + [VIRTUALIZATION_PARALLELS] = "parallels", + [VIRTUALIZATION_BHYVE] = "bhyve", + [VIRTUALIZATION_QNX] = "qnx", + [VIRTUALIZATION_VM_OTHER] = "vm-other", + + [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn", + [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt", + [VIRTUALIZATION_LXC] = "lxc", + [VIRTUALIZATION_OPENVZ] = "openvz", + [VIRTUALIZATION_DOCKER] = "docker", + [VIRTUALIZATION_RKT] = "rkt", + [VIRTUALIZATION_CONTAINER_OTHER] = "container-other", +}; + +DEFINE_STRING_TABLE_LOOKUP(virtualization, int); diff --git a/src/basic/virt.h b/src/basic/virt.h new file mode 100644 index 0000000..c4cf4bf --- /dev/null +++ b/src/basic/virt.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +enum { + VIRTUALIZATION_NONE = 0, + + VIRTUALIZATION_VM_FIRST, + VIRTUALIZATION_KVM = VIRTUALIZATION_VM_FIRST, + VIRTUALIZATION_QEMU, + VIRTUALIZATION_BOCHS, + VIRTUALIZATION_XEN, + VIRTUALIZATION_UML, + VIRTUALIZATION_VMWARE, + VIRTUALIZATION_ORACLE, + VIRTUALIZATION_MICROSOFT, + VIRTUALIZATION_ZVM, + VIRTUALIZATION_PARALLELS, + VIRTUALIZATION_BHYVE, + VIRTUALIZATION_QNX, + VIRTUALIZATION_VM_OTHER, + VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER, + + VIRTUALIZATION_CONTAINER_FIRST, + VIRTUALIZATION_SYSTEMD_NSPAWN = VIRTUALIZATION_CONTAINER_FIRST, + VIRTUALIZATION_LXC_LIBVIRT, + VIRTUALIZATION_LXC, + VIRTUALIZATION_OPENVZ, + VIRTUALIZATION_DOCKER, + VIRTUALIZATION_RKT, + VIRTUALIZATION_CONTAINER_OTHER, + VIRTUALIZATION_CONTAINER_LAST = VIRTUALIZATION_CONTAINER_OTHER, + + _VIRTUALIZATION_MAX, + _VIRTUALIZATION_INVALID = -1 +}; + +static inline bool VIRTUALIZATION_IS_VM(int x) { + return x >= VIRTUALIZATION_VM_FIRST && x <= VIRTUALIZATION_VM_LAST; +} + +static inline bool VIRTUALIZATION_IS_CONTAINER(int x) { + return x >= VIRTUALIZATION_CONTAINER_FIRST && x <= VIRTUALIZATION_CONTAINER_LAST; +} + +int detect_vm(void); +int detect_container(void); +int detect_virtualization(void); + +int running_in_userns(void); +int running_in_chroot(void); + +const char *virtualization_to_string(int v) _const_; +int virtualization_from_string(const char *s) _pure_; diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c new file mode 100644 index 0000000..0ee0979 --- /dev/null +++ b/src/basic/xattr-util.c @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <sys/xattr.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "macro.h" +#include "missing.h" +#include "sparse-endian.h" +#include "stdio-util.h" +#include "string-util.h" +#include "time-util.h" +#include "xattr-util.h" + +int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink) { + char *v; + size_t l; + ssize_t n; + + assert(path); + assert(name); + assert(value); + + for (l = 100; ; l = (size_t) n + 1) { + v = new0(char, l); + if (!v) + return -ENOMEM; + + if (allow_symlink) + n = lgetxattr(path, name, v, l); + else + n = getxattr(path, name, v, l); + + if (n >= 0 && (size_t) n < l) { + *value = v; + return n; + } + + free(v); + + if (n < 0 && errno != ERANGE) + return -errno; + + if (allow_symlink) + n = lgetxattr(path, name, NULL, 0); + else + n = getxattr(path, name, NULL, 0); + if (n < 0) + return -errno; + } +} + +int fgetxattr_malloc(int fd, const char *name, char **value) { + char *v; + size_t l; + ssize_t n; + + assert(fd >= 0); + assert(name); + assert(value); + + for (l = 100; ; l = (size_t) n + 1) { + v = new0(char, l); + if (!v) + return -ENOMEM; + + n = fgetxattr(fd, name, v, l); + + if (n >= 0 && (size_t) n < l) { + *value = v; + return n; + } + + free(v); + + if (n < 0 && errno != ERANGE) + return -errno; + + n = fgetxattr(fd, name, NULL, 0); + if (n < 0) + return -errno; + } +} + +int fgetxattrat_fake( + int dirfd, + const char *filename, + const char *attribute, + void *value, size_t size, + int flags, + size_t *ret_size) { + + char fn[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + _cleanup_close_ int fd = -1; + ssize_t l; + + /* The kernel doesn't have a fgetxattrat() command, hence let's emulate one */ + + if (flags & ~(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH)) + return -EINVAL; + + if (isempty(filename)) { + if (!(flags & AT_EMPTY_PATH)) + return -EINVAL; + + xsprintf(fn, "/proc/self/fd/%i", dirfd); + } else { + fd = openat(dirfd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + return -errno; + + xsprintf(fn, "/proc/self/fd/%i", fd); + } + + l = getxattr(fn, attribute, value, size); + if (l < 0) + return -errno; + + *ret_size = l; + return 0; +} + +static int parse_crtime(le64_t le, usec_t *usec) { + uint64_t u; + + assert(usec); + + u = le64toh(le); + if (IN_SET(u, 0, (uint64_t) -1)) + return -EIO; + + *usec = (usec_t) u; + return 0; +} + +int fd_getcrtime_at(int dirfd, const char *name, usec_t *ret, int flags) { + struct_statx sx; + usec_t a, b; + le64_t le; + size_t n; + int r; + + assert(ret); + + if (flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW)) + return -EINVAL; + + /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept + * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was + * implemented on various file systems on the lower level since a while, but never was accessible). However, we + * needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a + * long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first + * time, where it is available. Thius function will read it, but it tries to keep some compatibility with older + * systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the + * concept is useful for determining how "old" a file really is, and hence using the older of the two makes + * most sense. */ + + if (statx(dirfd, strempty(name), flags|AT_STATX_DONT_SYNC, STATX_BTIME, &sx) >= 0 && + (sx.stx_mask & STATX_BTIME) && + sx.stx_btime.tv_sec != 0) + a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC + + (usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC; + else + a = USEC_INFINITY; + + r = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags, &n); + if (r >= 0) { + if (n != sizeof(le)) + r = -EIO; + else + r = parse_crtime(le, &b); + } + if (r < 0) { + if (a != USEC_INFINITY) { + *ret = a; + return 0; + } + + return r; + } + + if (a != USEC_INFINITY) + *ret = MIN(a, b); + else + *ret = b; + + return 0; +} + +int fd_getcrtime(int fd, usec_t *ret) { + return fd_getcrtime_at(fd, NULL, ret, AT_EMPTY_PATH); +} + +int path_getcrtime(const char *p, usec_t *ret) { + return fd_getcrtime_at(AT_FDCWD, p, ret, 0); +} + +int fd_setcrtime(int fd, usec_t usec) { + le64_t le; + + assert(fd >= 0); + + if (IN_SET(usec, 0, USEC_INFINITY)) + usec = now(CLOCK_REALTIME); + + le = htole64((uint64_t) usec); + if (fsetxattr(fd, "user.crtime_usec", &le, sizeof(le), 0) < 0) + return -errno; + + return 0; +} diff --git a/src/basic/xattr-util.h b/src/basic/xattr-util.h new file mode 100644 index 0000000..9fa85d7 --- /dev/null +++ b/src/basic/xattr-util.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <sys/types.h> + +#include "time-util.h" + +int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink); +int fgetxattr_malloc(int fd, const char *name, char **value); + +int fgetxattrat_fake( + int dirfd, + const char *filename, + const char *attribute, + void *value, size_t size, + int flags, + size_t *ret_size); + +int fd_setcrtime(int fd, usec_t usec); + +int fd_getcrtime(int fd, usec_t *usec); +int path_getcrtime(const char *p, usec_t *usec); +int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags); |