diff options
Diffstat (limited to '')
-rw-r--r-- | lib/vdso/Kconfig | 33 | ||||
-rw-r--r-- | lib/vdso/Makefile | 22 | ||||
-rw-r--r-- | lib/vdso/gettimeofday.c | 441 |
3 files changed, 496 insertions, 0 deletions
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig new file mode 100644 index 000000000..d883ac299 --- /dev/null +++ b/lib/vdso/Kconfig @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 + +config HAVE_GENERIC_VDSO + bool + +if HAVE_GENERIC_VDSO + +config GENERIC_GETTIMEOFDAY + bool + help + This is a generic implementation of gettimeofday vdso. + Each architecture that enables this feature has to + provide the fallback implementation. + +config GENERIC_VDSO_32 + bool + depends on GENERIC_GETTIMEOFDAY && !64BIT + help + This config option helps to avoid possible performance issues + in 32 bit only architectures. + +config GENERIC_COMPAT_VDSO + bool + help + This config option enables the compat VDSO layer. + +config GENERIC_VDSO_TIME_NS + bool + help + Selected by architectures which support time namespaces in the + VDSO + +endif diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile new file mode 100644 index 000000000..e814061d6 --- /dev/null +++ b/lib/vdso/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH)) + +c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c) + +# This cmd checks that the vdso library does not contain absolute relocation +# It has to be called after the linking of the vdso library and requires it +# as a parameter. +# +# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds +# to the absolute relocation types printed by "objdump -R" and accepted by the +# dynamic linker. +ifndef ARCH_REL_TYPE_ABS +$(error ARCH_REL_TYPE_ABS is not set) +endif + +quiet_cmd_vdso_check = VDSOCHK $@ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ + then (echo >&2 "$@: dynamic relocations are not supported"; \ + rm -f $@; /bin/false); fi diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c new file mode 100644 index 000000000..ce2f69552 --- /dev/null +++ b/lib/vdso/gettimeofday.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic userspace implementations of gettimeofday() and similar. + */ +#include <vdso/datapage.h> +#include <vdso/helpers.h> + +#ifndef vdso_calc_delta +/* + * Default implementation which works for all sane clocksources. That + * obviously excludes x86/TSC. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return ((cycles - last) & mask) * mult; +} +#endif + +#ifndef vdso_shift_ns +static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) +{ + return ns >> shift; +} +#endif + +#ifndef __arch_vdso_hres_capable +static inline bool __arch_vdso_hres_capable(void) +{ + return true; +} +#endif + +#ifndef vdso_clocksource_ok +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return vd->clock_mode != VDSO_CLOCKMODE_NONE; +} +#endif + +#ifndef vdso_cycles_ok +static inline bool vdso_cycles_ok(u64 cycles) +{ + return true; +} +#endif + +#ifdef CONFIG_TIME_NS +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_data *vd; + const struct timens_offset *offs = &vdns->offset[clk]; + const struct vdso_timestamp *vdso_ts; + u64 cycles, last, ns; + u32 seq; + s64 sec; + + vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); + vd = __arch_get_timens_vdso_data(vd); + if (clk != CLOCK_MONOTONIC_RAW) + vd = &vd[CS_HRES_COARSE]; + else + vd = &vd[CS_RAW]; + vdso_ts = &vd->basetime[clk]; + + do { + seq = vdso_read_begin(vd); + + if (unlikely(!vdso_clocksource_ok(vd))) + return -1; + + cycles = __arch_get_hw_counter(vd->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; + ns = vdso_ts->nsec; + last = vd->cycle_last; + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns = vdso_shift_ns(ns, vd->shift); + sec = vdso_ts->sec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* Add the namespace offset */ + sec += offs->sec; + ns += offs->nsec; + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} +#else +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return NULL; +} + +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + return -EINVAL; +} +#endif + +static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u64 cycles, last, sec, ns; + u32 seq; + + /* Allows to compile the high resolution parts out */ + if (!__arch_vdso_hres_capable()) + return -1; + + do { + /* + * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace + * enabled tasks have a special VVAR page installed which + * has vd->seq set to 1 and vd->clock_mode set to + * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks + * this does not affect performance because if vd->seq is + * odd, i.e. a concurrent update is in progress the extra + * check for vd->clock_mode is just a few extra + * instructions while spin waiting for vd->seq to become + * even again. + */ + while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + return do_hres_timens(vd, clk, ts); + cpu_relax(); + } + smp_rmb(); + + if (unlikely(!vdso_clocksource_ok(vd))) + return -1; + + cycles = __arch_get_hw_counter(vd->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; + ns = vdso_ts->nsec; + last = vd->cycle_last; + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns = vdso_shift_ns(ns, vd->shift); + sec = vdso_ts->sec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +#ifdef CONFIG_TIME_NS +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns); + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + const struct timens_offset *offs = &vdns->offset[clk]; + u64 nsec; + s64 sec; + s32 seq; + + do { + seq = vdso_read_begin(vd); + sec = vdso_ts->sec; + nsec = vdso_ts->nsec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* Add the namespace offset */ + sec += offs->sec; + nsec += offs->nsec; + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); + ts->tv_nsec = nsec; + return 0; +} +#else +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + return -1; +} +#endif + +static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u32 seq; + + do { + /* + * Open coded to handle VDSO_CLOCK_TIMENS. See comment in + * do_hres(). + */ + while ((seq = READ_ONCE(vd->seq)) & 1) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + return do_coarse_timens(vd, clk, ts); + cpu_relax(); + } + smp_rmb(); + + ts->tv_sec = vdso_ts->sec; + ts->tv_nsec = vdso_ts->nsec; + } while (unlikely(vdso_read_retry(vd, seq))); + + return 0; +} + +static __always_inline int +__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock, + struct __kernel_timespec *ts) +{ + u32 msk; + + /* Check for negative values or invalid clocks */ + if (unlikely((u32) clock >= MAX_CLOCKS)) + return -1; + + /* + * Convert the clockid to a bitmask and use it to check which + * clocks are handled in the VDSO directly. + */ + msk = 1U << clock; + if (likely(msk & VDSO_HRES)) + vd = &vd[CS_HRES_COARSE]; + else if (msk & VDSO_COARSE) + return do_coarse(&vd[CS_HRES_COARSE], clock, ts); + else if (msk & VDSO_RAW) + vd = &vd[CS_RAW]; + else + return -1; + + return do_hres(vd, clock, ts); +} + +static __maybe_unused int +__cvdso_clock_gettime_data(const struct vdso_data *vd, clockid_t clock, + struct __kernel_timespec *ts) +{ + int ret = __cvdso_clock_gettime_common(vd, clock, ts); + + if (unlikely(ret)) + return clock_gettime_fallback(clock, ts); + return 0; +} + +static __maybe_unused int +__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime_data(__arch_get_vdso_data(), clock, ts); +} + +#ifdef BUILD_VDSO32 +static __maybe_unused int +__cvdso_clock_gettime32_data(const struct vdso_data *vd, clockid_t clock, + struct old_timespec32 *res) +{ + struct __kernel_timespec ts; + int ret; + + ret = __cvdso_clock_gettime_common(vd, clock, &ts); + + if (unlikely(ret)) + return clock_gettime32_fallback(clock, res); + + /* For ret == 0 */ + res->tv_sec = ts.tv_sec; + res->tv_nsec = ts.tv_nsec; + + return ret; +} + +static __maybe_unused int +__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_gettime32_data(__arch_get_vdso_data(), clock, res); +} +#endif /* BUILD_VDSO32 */ + +static __maybe_unused int +__cvdso_gettimeofday_data(const struct vdso_data *vd, + struct __kernel_old_timeval *tv, struct timezone *tz) +{ + + if (likely(tv != NULL)) { + struct __kernel_timespec ts; + + if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) + return gettimeofday_fallback(tv, tz); + + tv->tv_sec = ts.tv_sec; + tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC; + } + + if (unlikely(tz != NULL)) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vd = __arch_get_timens_vdso_data(vd); + + tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; + tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; + } + + return 0; +} + +static __maybe_unused int +__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + return __cvdso_gettimeofday_data(__arch_get_vdso_data(), tv, tz); +} + +#ifdef VDSO_HAS_TIME +static __maybe_unused __kernel_old_time_t +__cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) +{ + __kernel_old_time_t t; + + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vd = __arch_get_timens_vdso_data(vd); + + t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); + + if (time) + *time = t; + + return t; +} + +static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time) +{ + return __cvdso_time_data(__arch_get_vdso_data(), time); +} +#endif /* VDSO_HAS_TIME */ + +#ifdef VDSO_HAS_CLOCK_GETRES +static __maybe_unused +int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, + struct __kernel_timespec *res) +{ + u32 msk; + u64 ns; + + /* Check for negative values or invalid clocks */ + if (unlikely((u32) clock >= MAX_CLOCKS)) + return -1; + + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vd = __arch_get_timens_vdso_data(vd); + + /* + * Convert the clockid to a bitmask and use it to check which + * clocks are handled in the VDSO directly. + */ + msk = 1U << clock; + if (msk & (VDSO_HRES | VDSO_RAW)) { + /* + * Preserves the behaviour of posix_get_hrtimer_res(). + */ + ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); + } else if (msk & VDSO_COARSE) { + /* + * Preserves the behaviour of posix_get_coarse_res(). + */ + ns = LOW_RES_NSEC; + } else { + return -1; + } + + if (likely(res)) { + res->tv_sec = 0; + res->tv_nsec = ns; + } + return 0; +} + +static __maybe_unused +int __cvdso_clock_getres_data(const struct vdso_data *vd, clockid_t clock, + struct __kernel_timespec *res) +{ + int ret = __cvdso_clock_getres_common(vd, clock, res); + + if (unlikely(ret)) + return clock_getres_fallback(clock, res); + return 0; +} + +static __maybe_unused +int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) +{ + return __cvdso_clock_getres_data(__arch_get_vdso_data(), clock, res); +} + +#ifdef BUILD_VDSO32 +static __maybe_unused int +__cvdso_clock_getres_time32_data(const struct vdso_data *vd, clockid_t clock, + struct old_timespec32 *res) +{ + struct __kernel_timespec ts; + int ret; + + ret = __cvdso_clock_getres_common(vd, clock, &ts); + + if (unlikely(ret)) + return clock_getres32_fallback(clock, res); + + if (likely(res)) { + res->tv_sec = ts.tv_sec; + res->tv_nsec = ts.tv_nsec; + } + return ret; +} + +static __maybe_unused int +__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32_data(__arch_get_vdso_data(), + clock, res); +} +#endif /* BUILD_VDSO32 */ +#endif /* VDSO_HAS_CLOCK_GETRES */ |