From 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 12:05:51 +0200 Subject: Adding upstream version 5.10.209. Signed-off-by: Daniel Baumann --- arch/sparc/vdso/.gitignore | 4 + arch/sparc/vdso/Makefile | 144 ++++++++++ arch/sparc/vdso/checkundef.sh | 10 + arch/sparc/vdso/vclock_gettime.c | 373 ++++++++++++++++++++++++++ arch/sparc/vdso/vdso-layout.lds.S | 98 +++++++ arch/sparc/vdso/vdso-note.S | 12 + arch/sparc/vdso/vdso.lds.S | 27 ++ arch/sparc/vdso/vdso2c.c | 228 ++++++++++++++++ arch/sparc/vdso/vdso2c.h | 142 ++++++++++ arch/sparc/vdso/vdso32/.gitignore | 2 + arch/sparc/vdso/vdso32/vclock_gettime.c | 22 ++ arch/sparc/vdso/vdso32/vdso-note.S | 12 + arch/sparc/vdso/vdso32/vdso32.lds.S | 26 ++ arch/sparc/vdso/vma.c | 457 ++++++++++++++++++++++++++++++++ 14 files changed, 1557 insertions(+) create mode 100644 arch/sparc/vdso/.gitignore create mode 100644 arch/sparc/vdso/Makefile create mode 100644 arch/sparc/vdso/checkundef.sh create mode 100644 arch/sparc/vdso/vclock_gettime.c create mode 100644 arch/sparc/vdso/vdso-layout.lds.S create mode 100644 arch/sparc/vdso/vdso-note.S create mode 100644 arch/sparc/vdso/vdso.lds.S create mode 100644 arch/sparc/vdso/vdso2c.c create mode 100644 arch/sparc/vdso/vdso2c.h create mode 100644 arch/sparc/vdso/vdso32/.gitignore create mode 100644 arch/sparc/vdso/vdso32/vclock_gettime.c create mode 100644 arch/sparc/vdso/vdso32/vdso-note.S create mode 100644 arch/sparc/vdso/vdso32/vdso32.lds.S create mode 100644 arch/sparc/vdso/vma.c (limited to 'arch/sparc/vdso') diff --git a/arch/sparc/vdso/.gitignore b/arch/sparc/vdso/.gitignore new file mode 100644 index 000000000..8d4ebc990 --- /dev/null +++ b/arch/sparc/vdso/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso.lds +vdso-image-*.c +vdso2c diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile new file mode 100644 index 000000000..c5e1545bc --- /dev/null +++ b/arch/sparc/vdso/Makefile @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Building vDSO images for sparc. +# + +VDSO64-$(CONFIG_SPARC64) := y +VDSOCOMPAT-$(CONFIG_COMPAT) := y + +# files to link into the vdso +vobjs-y := vdso-note.o vclock_gettime.o + +# files to link into kernel +obj-y += vma.o + +# vDSO images to build +vdso_img-$(VDSO64-y) += 64 +vdso_img-$(VDSOCOMPAT-y) += 32 + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.lds $(vobjs-y) + +# Build the vDSO image C files and link them in. +vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) +vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) +vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) +obj-y += $(vdso_img_objs) +targets += $(vdso_img_cfiles) +targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) + +CPPFLAGS_vdso.lds += -P -C + +VDSO_LDFLAGS_vdso.lds = -m elf64_sparc -soname linux-vdso.so.1 --no-undefined \ + -z max-page-size=8192 + +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include +hostprogs += vdso2c + +quiet_cmd_vdso2c = VDSO2C $@ + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ + +$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE + $(call if_changed,vdso2c) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \ + -fno-omit-frame-pointer -foptimize-sibling-calls \ + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7 + +$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) + +# +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +# +CFLAGS_REMOVE_vclock_gettime.o = -pg +CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg + +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) +VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1 + +#This makes sure the $(obj) subdirectory exists even though vdso32/ +#is not a kbuild sub-make subdirectory +override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ + +targets += vdso32/vdso32.lds +targets += vdso32/vdso-note.o +targets += vdso32/vclock_gettime.o + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO +$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32 + +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic +KBUILD_CFLAGS_32 += -fno-stack-protector +KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) +KBUILD_CFLAGS_32 += -fno-omit-frame-pointer +KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_32 += -mv8plus +$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + +$(obj)/vdso32.so.dbg: FORCE \ + $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/vclock_gettime.o \ + $(obj)/vdso32/vdso-note.o + $(call if_changed,vdso) + +# +# The DSO images are built using a special linker script. +# +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(LD) -nostdlib -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ + -T $(filter %.lds,$^) $(filter %.o,$^) && \ + sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@' + +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic +GCOV_PROFILE := n + +# +# Install the unstripped copies of vdso*.so. If our toolchain supports +# build-id, install .build-id links as well. +# +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) +define cmd_vdso_install + cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ + if readelf -n $< |grep -q 'Build ID'; then \ + buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ + first=`echo $$buildid | cut -b-2`; \ + last=`echo $$buildid | cut -b3-`; \ + mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ + ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ + fi +endef + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE + @mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE + $(call cmd,vdso_install) + +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) FORCE diff --git a/arch/sparc/vdso/checkundef.sh b/arch/sparc/vdso/checkundef.sh new file mode 100644 index 000000000..2d85876ff --- /dev/null +++ b/arch/sparc/vdso/checkundef.sh @@ -0,0 +1,10 @@ +#!/bin/sh +objdump="$1" +file="$2" +$objdump -t "$file" | grep '*UUND*' | grep -v '#scratch' > /dev/null 2>&1 +if [ $? -eq 1 ]; then + exit 0 +else + echo "$file: undefined symbols found" >&2 + exit 1 +fi diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c new file mode 100644 index 000000000..e794edde6 --- /dev/null +++ b/arch/sparc/vdso/vclock_gettime.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * The code should have no internal unresolved relocations. + * Check with readelf after changing. + * Also alternative() doesn't work. + */ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SPARC64 +#define SYSCALL_STRING \ + "ta 0x6d;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#else +#define SYSCALL_STRING \ + "ta 0x10;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#endif + +#define SYSCALL_CLOBBERS \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ + "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \ + "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \ + "cc", "memory" + +/* + * Compute the vvar page's address in the process address space, and return it + * as a pointer to the vvar_data. + */ +notrace static __always_inline struct vvar_data *get_vvar_data(void) +{ + unsigned long ret; + + /* + * vdso data page is the first vDSO page so grab the PC + * and move up a page to get to the data page. + */ + __asm__("rd %%pc, %0" : "=r" (ret)); + ret &= ~(8192 - 1); + ret -= 8192; + + return (struct vvar_data *) ret; +} + +notrace static long vdso_fallback_gettime(long clock, struct __kernel_old_timespec *ts) +{ + register long num __asm__("g1") = __NR_clock_gettime; + register long o0 __asm__("o0") = clock; + register long o1 __asm__("o1") = (long) ts; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + register long num __asm__("g1") = __NR_gettimeofday; + register long o0 __asm__("o0") = (long) tv; + register long o1 __asm__("o1") = (long) tz; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +#ifdef CONFIG_SPARC64 +notrace static __always_inline u64 vread_tick(void) +{ + u64 ret; + + __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); + return ret; +} + +notrace static __always_inline u64 vread_tick_stick(void) +{ + u64 ret; + + __asm__ __volatile__("rd %%asr24, %0" : "=r" (ret)); + return ret; +} +#else +notrace static __always_inline u64 vread_tick(void) +{ + register unsigned long long ret asm("o4"); + + __asm__ __volatile__("rd %%tick, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret)); + return ret; +} + +notrace static __always_inline u64 vread_tick_stick(void) +{ + register unsigned long long ret asm("o4"); + + __asm__ __volatile__("rd %%asr24, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret)); + return ret; +} +#endif + +notrace static __always_inline u64 vgetsns(struct vvar_data *vvar) +{ + u64 v; + u64 cycles; + + cycles = vread_tick(); + v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask; + return v * vvar->clock.mult; +} + +notrace static __always_inline u64 vgetsns_stick(struct vvar_data *vvar) +{ + u64 v; + u64 cycles; + + cycles = vread_tick_stick(); + v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask; + return v * vvar->clock.mult; +} + +notrace static __always_inline int do_realtime(struct vvar_data *vvar, + struct __kernel_old_timespec *ts) +{ + unsigned long seq; + u64 ns; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->wall_time_sec; + ns = vvar->wall_time_snsec; + ns += vgetsns(vvar); + ns >>= vvar->clock.shift; + } while (unlikely(vvar_read_retry(vvar, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar, + struct __kernel_old_timespec *ts) +{ + unsigned long seq; + u64 ns; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->wall_time_sec; + ns = vvar->wall_time_snsec; + ns += vgetsns_stick(vvar); + ns >>= vvar->clock.shift; + } while (unlikely(vvar_read_retry(vvar, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +notrace static __always_inline int do_monotonic(struct vvar_data *vvar, + struct __kernel_old_timespec *ts) +{ + unsigned long seq; + u64 ns; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->monotonic_time_sec; + ns = vvar->monotonic_time_snsec; + ns += vgetsns(vvar); + ns >>= vvar->clock.shift; + } while (unlikely(vvar_read_retry(vvar, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar, + struct __kernel_old_timespec *ts) +{ + unsigned long seq; + u64 ns; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->monotonic_time_sec; + ns = vvar->monotonic_time_snsec; + ns += vgetsns_stick(vvar); + ns >>= vvar->clock.shift; + } while (unlikely(vvar_read_retry(vvar, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +notrace static int do_realtime_coarse(struct vvar_data *vvar, + struct __kernel_old_timespec *ts) +{ + unsigned long seq; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->wall_time_coarse_sec; + ts->tv_nsec = vvar->wall_time_coarse_nsec; + } while (unlikely(vvar_read_retry(vvar, seq))); + return 0; +} + +notrace static int do_monotonic_coarse(struct vvar_data *vvar, + struct __kernel_old_timespec *ts) +{ + unsigned long seq; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->monotonic_time_coarse_sec; + ts->tv_nsec = vvar->monotonic_time_coarse_nsec; + } while (unlikely(vvar_read_retry(vvar, seq))); + + return 0; +} + +notrace int +__vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts) +{ + struct vvar_data *vvd = get_vvar_data(); + + switch (clock) { + case CLOCK_REALTIME: + if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) + break; + return do_realtime(vvd, ts); + case CLOCK_MONOTONIC: + if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) + break; + return do_monotonic(vvd, ts); + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(vvd, ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(vvd, ts); + } + /* + * Unknown clock ID ? Fall back to the syscall. + */ + return vdso_fallback_gettime(clock, ts); +} +int +clock_gettime(clockid_t, struct __kernel_old_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +notrace int +__vdso_clock_gettime_stick(clockid_t clock, struct __kernel_old_timespec *ts) +{ + struct vvar_data *vvd = get_vvar_data(); + + switch (clock) { + case CLOCK_REALTIME: + if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) + break; + return do_realtime_stick(vvd, ts); + case CLOCK_MONOTONIC: + if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) + break; + return do_monotonic_stick(vvd, ts); + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(vvd, ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(vvd, ts); + } + /* + * Unknown clock ID ? Fall back to the syscall. + */ + return vdso_fallback_gettime(clock, ts); +} + +notrace int +__vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + struct vvar_data *vvd = get_vvar_data(); + + if (likely(vvd->vclock_mode != VCLOCK_NONE)) { + if (likely(tv != NULL)) { + union tstv_t { + struct __kernel_old_timespec ts; + struct __kernel_old_timeval tv; + } *tstv = (union tstv_t *) tv; + do_realtime(vvd, &tstv->ts); + /* + * Assign before dividing to ensure that the division is + * done in the type of tv_usec, not tv_nsec. + * + * There cannot be > 1 billion usec in a second: + * do_realtime() has already distributed such overflow + * into tv_sec. So we can assign it to an int safely. + */ + tstv->tv.tv_usec = tstv->ts.tv_nsec; + tstv->tv.tv_usec /= 1000; + } + if (unlikely(tz != NULL)) { + /* Avoid memcpy. Some old compilers fail to inline it */ + tz->tz_minuteswest = vvd->tz_minuteswest; + tz->tz_dsttime = vvd->tz_dsttime; + } + return 0; + } + return vdso_fallback_gettimeofday(tv, tz); +} +int +gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); + +notrace int +__vdso_gettimeofday_stick(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + struct vvar_data *vvd = get_vvar_data(); + + if (likely(vvd->vclock_mode != VCLOCK_NONE)) { + if (likely(tv != NULL)) { + union tstv_t { + struct __kernel_old_timespec ts; + struct __kernel_old_timeval tv; + } *tstv = (union tstv_t *) tv; + do_realtime_stick(vvd, &tstv->ts); + /* + * Assign before dividing to ensure that the division is + * done in the type of tv_usec, not tv_nsec. + * + * There cannot be > 1 billion usec in a second: + * do_realtime() has already distributed such overflow + * into tv_sec. So we can assign it to an int safely. + */ + tstv->tv.tv_usec = tstv->ts.tv_nsec; + tstv->tv.tv_usec /= 1000; + } + if (unlikely(tz != NULL)) { + /* Avoid memcpy. Some old compilers fail to inline it */ + tz->tz_minuteswest = vvd->tz_minuteswest; + tz->tz_dsttime = vvd->tz_dsttime; + } + return 0; + } + return vdso_fallback_gettimeofday(tv, tz); +} diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S new file mode 100644 index 000000000..d31e57e8a --- /dev/null +++ b/arch/sparc/vdso/vdso-layout.lds.S @@ -0,0 +1,98 @@ +/* + * Linker script for vDSO. This is an ELF shared object prelinked to + * its virtual address, and with only one read-only segment. + * This script controls its layout. + */ + +#if defined(BUILD_VDSO64) +# define SHDR_SIZE 64 +#elif defined(BUILD_VDSO32) +# define SHDR_SIZE 40 +#else +# error unknown VDSO target +#endif + +#define NUM_FAKE_SHDRS 7 + +SECTIONS +{ + /* + * User/kernel shared data is before the vDSO. This may be a little + * uglier than putting it after the vDSO, but it avoids issues with + * non-allocatable things that dangle past the end of the PT_LOAD + * segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries + */ + + vvar_start = . -8192; + vvar_data = vvar_start; + + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { + *(.rodata*) + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + + /* + * Ideally this would live in a C file: kept in here for + * compatibility with x86-64. + */ + VDSO_FAKE_SECTION_TABLE_START = .; + . = . + NUM_FAKE_SHDRS * SHDR_SIZE; + VDSO_FAKE_SECTION_TABLE_END = .; + } :text + + .fake_shstrtab : { *(.fake_shstrtab) } :text + + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + + /* + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. + */ + + .text : { *(.text*) } :text =0x90909090, + + /DISCARD/ : { + *(.discard) + *(.discard.*) + *(__bug_table) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/arch/sparc/vdso/vdso-note.S b/arch/sparc/vdso/vdso-note.S new file mode 100644 index 000000000..79a071e43 --- /dev/null +++ b/arch/sparc/vdso/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S new file mode 100644 index 000000000..629ab6900 --- /dev/null +++ b/arch/sparc/vdso/vdso.lds.S @@ -0,0 +1,27 @@ +/* + * Linker script for 64-bit vDSO. + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#define BUILD_VDSO64 + +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + __vdso_clock_gettime_stick; + gettimeofday; + __vdso_gettimeofday; + __vdso_gettimeofday_stick; + local: *; + }; +} diff --git a/arch/sparc/vdso/vdso2c.c b/arch/sparc/vdso/vdso2c.c new file mode 100644 index 000000000..ab7504176 --- /dev/null +++ b/arch/sparc/vdso/vdso2c.c @@ -0,0 +1,228 @@ +/* + * vdso2c - A vdso image preparation tool + * Copyright (c) 2014 Andy Lutomirski and others + * Licensed under the GPL v2 + * + * vdso2c requires stripped and unstripped input. It would be trivial + * to fully strip the input in here, but, for reasons described below, + * we need to write a section table. Doing this is more or less + * equivalent to dropping all non-allocatable sections, but it's + * easier to let objcopy handle that instead of doing it ourselves. + * If we ever need to do something fancier than what objcopy provides, + * it would be straightforward to add here. + * + * We keep a section table for a few reasons: + * + * Binutils has issues debugging the vDSO: it reads the section table to + * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which + * would break build-id if we removed the section table. Binutils + * also requires that shstrndx != 0. See: + * https://sourceware.org/bugzilla/show_bug.cgi?id=17064 + * + * elfutils might not look for PT_NOTE if there is a section table at + * all. I don't know whether this matters for any practical purpose. + * + * For simplicity, rather than hacking up a partial section table, we + * just write a mostly complete one. We omit non-dynamic symbols, + * though, since they're rather large. + * + * Once binutils gets fixed, we might be able to drop this for all but + * the 64-bit vdso, since build-id only works in kernel RPMs, and + * systems that update to new enough kernel RPMs will likely update + * binutils in sync. build-id has never worked for home-built kernel + * RPMs without manual symlinking, and I suspect that no one ever does + * that. + */ + +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +const char *outfilename; + +/* Symbols that we need in vdso2c. */ +enum { + sym_vvar_start, + sym_VDSO_FAKE_SECTION_TABLE_START, + sym_VDSO_FAKE_SECTION_TABLE_END, +}; + +struct vdso_sym { + const char *name; + int export; +}; + +struct vdso_sym required_syms[] = { + [sym_vvar_start] = {"vvar_start", 1}, + [sym_VDSO_FAKE_SECTION_TABLE_START] = { + "VDSO_FAKE_SECTION_TABLE_START", 0 + }, + [sym_VDSO_FAKE_SECTION_TABLE_END] = { + "VDSO_FAKE_SECTION_TABLE_END", 0 + }, +}; + +__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) +static void fail(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + fprintf(stderr, "Error: "); + vfprintf(stderr, format, ap); + if (outfilename) + unlink(outfilename); + exit(1); + va_end(ap); +} + +/* + * Evil macros for big-endian reads and writes + */ +#define GBE(x, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + (__typeof__(*(x)))get_unaligned_be##bits(x), ifnot) + +#define LAST_GBE(x) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x), (void)(0)) + +#define GET_BE(x) \ + GBE(x, 64, GBE(x, 32, GBE(x, 16, LAST_GBE(x)))) + +#define PBE(x, val, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + put_unaligned_be##bits((val), (x)), ifnot) + +#define LAST_PBE(x, val) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), (void)(0)) + +#define PUT_BE(x, val) \ + PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val)))) + +#define NSYMS ARRAY_SIZE(required_syms) + +#define BITSFUNC3(name, bits, suffix) name##bits##suffix +#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) +#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, ) + +#define INT_BITS BITSFUNC2(int, ELF_BITS, _t) + +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + +#define ELF_BITS 64 +#include "vdso2c.h" +#undef ELF_BITS + +#define ELF_BITS 32 +#include "vdso2c.h" +#undef ELF_BITS + +static void go(void *raw_addr, size_t raw_len, + void *stripped_addr, size_t stripped_len, + FILE *outfile, const char *name) +{ + Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr; + + if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { + go64(raw_addr, raw_len, stripped_addr, stripped_len, + outfile, name); + } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { + go32(raw_addr, raw_len, stripped_addr, stripped_len, + outfile, name); + } else { + fail("unknown ELF class\n"); + } +} + +static void map_input(const char *name, void **addr, size_t *len, int prot) +{ + off_t tmp_len; + + int fd = open(name, O_RDONLY); + + if (fd == -1) + err(1, "%s", name); + + tmp_len = lseek(fd, 0, SEEK_END); + if (tmp_len == (off_t)-1) + err(1, "lseek"); + *len = (size_t)tmp_len; + + *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0); + if (*addr == MAP_FAILED) + err(1, "mmap"); + + close(fd); +} + +int main(int argc, char **argv) +{ + size_t raw_len, stripped_len; + void *raw_addr, *stripped_addr; + FILE *outfile; + char *name, *tmp; + int namelen; + + if (argc != 4) { + printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n"); + return 1; + } + + /* + * Figure out the struct name. If we're writing to a .so file, + * generate raw output insted. + */ + name = strdup(argv[3]); + namelen = strlen(name); + if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { + name = NULL; + } else { + tmp = strrchr(name, '/'); + if (tmp) + name = tmp + 1; + tmp = strchr(name, '.'); + if (tmp) + *tmp = '\0'; + for (tmp = name; *tmp; tmp++) + if (*tmp == '-') + *tmp = '_'; + } + + map_input(argv[1], &raw_addr, &raw_len, PROT_READ); + map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ); + + outfilename = argv[3]; + outfile = fopen(outfilename, "w"); + if (!outfile) + err(1, "%s", argv[2]); + + go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name); + + munmap(raw_addr, raw_len); + munmap(stripped_addr, stripped_len); + fclose(outfile); + + return 0; +} diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h new file mode 100644 index 000000000..60d69acc7 --- /dev/null +++ b/arch/sparc/vdso/vdso2c.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +/* + * This file is included up to twice from vdso2c.c. It generates code for + * 32-bit and 64-bit vDSOs. We will eventually need both for 64-bit builds, + * since 32-bit vDSOs will then be built for 32-bit userspace. + */ + +static void BITSFUNC(go)(void *raw_addr, size_t raw_len, + void *stripped_addr, size_t stripped_len, + FILE *outfile, const char *name) +{ + int found_load = 0; + unsigned long load_size = -1; /* Work around bogus warning */ + unsigned long mapping_size; + int i; + unsigned long j; + ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr; + ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; + ELF(Dyn) *dyn = 0, *dyn_end = 0; + INT_BITS syms[NSYMS] = {}; + + ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff)); + + /* Walk the segment table. */ + for (i = 0; i < GET_BE(&hdr->e_phnum); i++) { + if (GET_BE(&pt[i].p_type) == PT_LOAD) { + if (found_load) + fail("multiple PT_LOAD segs\n"); + + if (GET_BE(&pt[i].p_offset) != 0 || + GET_BE(&pt[i].p_vaddr) != 0) + fail("PT_LOAD in wrong place\n"); + + if (GET_BE(&pt[i].p_memsz) != GET_BE(&pt[i].p_filesz)) + fail("cannot handle memsz != filesz\n"); + + load_size = GET_BE(&pt[i].p_memsz); + found_load = 1; + } else if (GET_BE(&pt[i].p_type) == PT_DYNAMIC) { + dyn = raw_addr + GET_BE(&pt[i].p_offset); + dyn_end = raw_addr + GET_BE(&pt[i].p_offset) + + GET_BE(&pt[i].p_memsz); + } + } + if (!found_load) + fail("no PT_LOAD seg\n"); + + if (stripped_len < load_size) + fail("stripped input is too short\n"); + + /* Walk the dynamic table */ + for (i = 0; dyn + i < dyn_end && + GET_BE(&dyn[i].d_tag) != DT_NULL; i++) { + typeof(dyn[i].d_tag) tag = GET_BE(&dyn[i].d_tag); + typeof(dyn[i].d_un.d_val) val = GET_BE(&dyn[i].d_un.d_val); + + if ((tag == DT_RELSZ || tag == DT_RELASZ) && (val != 0)) + fail("vdso image contains dynamic relocations\n"); + } + + /* Walk the section table */ + for (i = 0; i < GET_BE(&hdr->e_shnum); i++) { + ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) + + GET_BE(&hdr->e_shentsize) * i; + if (GET_BE(&sh->sh_type) == SHT_SYMTAB) + symtab_hdr = sh; + } + + if (!symtab_hdr) + fail("no symbol table\n"); + + strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) + + GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link); + + /* Walk the symbol table */ + for (i = 0; + i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize); + i++) { + int k; + + ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) + + GET_BE(&symtab_hdr->sh_entsize) * i; + const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) + + GET_BE(&sym->st_name); + + for (k = 0; k < NSYMS; k++) { + if (!strcmp(name, required_syms[k].name)) { + if (syms[k]) { + fail("duplicate symbol %s\n", + required_syms[k].name); + } + + /* + * Careful: we use negative addresses, but + * st_value is unsigned, so we rely + * on syms[k] being a signed type of the + * correct width. + */ + syms[k] = GET_BE(&sym->st_value); + } + } + } + + /* Validate mapping addresses. */ + if (syms[sym_vvar_start] % 8192) + fail("vvar_begin must be a multiple of 8192\n"); + + if (!name) { + fwrite(stripped_addr, stripped_len, 1, outfile); + return; + } + + mapping_size = (stripped_len + 8191) / 8192 * 8192; + + fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); + fprintf(outfile, "#include \n"); + fprintf(outfile, "#include \n"); + fprintf(outfile, "\n"); + fprintf(outfile, + "static unsigned char raw_data[%lu] __ro_after_init __aligned(8192)= {", + mapping_size); + for (j = 0; j < stripped_len; j++) { + if (j % 10 == 0) + fprintf(outfile, "\n\t"); + fprintf(outfile, "0x%02X, ", + (int)((unsigned char *)stripped_addr)[j]); + } + fprintf(outfile, "\n};\n\n"); + + fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name); + fprintf(outfile, "\t.data = raw_data,\n"); + fprintf(outfile, "\t.size = %lu,\n", mapping_size); + for (i = 0; i < NSYMS; i++) { + if (required_syms[i].export && syms[i]) + fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n", + required_syms[i].name, (int64_t)syms[i]); + } + fprintf(outfile, "};\n"); +} diff --git a/arch/sparc/vdso/vdso32/.gitignore b/arch/sparc/vdso/vdso32/.gitignore new file mode 100644 index 000000000..516738484 --- /dev/null +++ b/arch/sparc/vdso/vdso32/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso32.lds diff --git a/arch/sparc/vdso/vdso32/vclock_gettime.c b/arch/sparc/vdso/vdso32/vclock_gettime.c new file mode 100644 index 000000000..d7f99e674 --- /dev/null +++ b/arch/sparc/vdso/vdso32/vclock_gettime.c @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#define BUILD_VDSO32 + +#ifdef CONFIG_SPARC64 + +/* + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel + * configuration + */ +#undef CONFIG_64BIT +#undef CONFIG_SPARC64 +#define BUILD_VDSO32_64 +#define CONFIG_32BIT +#undef CONFIG_QUEUED_RWLOCKS +#undef CONFIG_QUEUED_SPINLOCKS + +#endif + +#include "../vclock_gettime.c" diff --git a/arch/sparc/vdso/vdso32/vdso-note.S b/arch/sparc/vdso/vdso32/vdso-note.S new file mode 100644 index 000000000..e234983cf --- /dev/null +++ b/arch/sparc/vdso/vdso32/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO + * text. Here we can supply some information useful to userland. + */ + +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S new file mode 100644 index 000000000..218930fdf --- /dev/null +++ b/arch/sparc/vdso/vdso32/vdso32.lds.S @@ -0,0 +1,26 @@ +/* + * Linker script for sparc32 vDSO + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#define BUILD_VDSO32 +#include "../vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + __vdso_clock_gettime_stick; + gettimeofday; + __vdso_gettimeofday; + __vdso_gettimeofday_stick; + local: *; + }; +} diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c new file mode 100644 index 000000000..cc19e09b0 --- /dev/null +++ b/arch/sparc/vdso/vma.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Set up the VMAs to tell the VM about the vDSO. + * Copyright 2007 Andi Kleen, SUSE Labs. + */ + +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int __read_mostly vdso_enabled = 1; + +static struct vm_special_mapping vvar_mapping = { + .name = "[vvar]" +}; + +#ifdef CONFIG_SPARC64 +static struct vm_special_mapping vdso_mapping64 = { + .name = "[vdso]" +}; +#endif + +#ifdef CONFIG_COMPAT +static struct vm_special_mapping vdso_mapping32 = { + .name = "[vdso]" +}; +#endif + +struct vvar_data *vvar_data; + +struct vdso_elfinfo32 { + Elf32_Ehdr *hdr; + Elf32_Sym *dynsym; + unsigned long dynsymsize; + const char *dynstr; + unsigned long text; +}; + +struct vdso_elfinfo64 { + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + const char *dynstr; + unsigned long text; +}; + +struct vdso_elfinfo { + union { + struct vdso_elfinfo32 elf32; + struct vdso_elfinfo64 elf64; + } u; +}; + +static void *one_section64(struct vdso_elfinfo64 *e, const char *name, + unsigned long *size) +{ + const char *snames; + Elf64_Shdr *shdrs; + unsigned int i; + + shdrs = (void *)e->hdr + e->hdr->e_shoff; + snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset; + for (i = 1; i < e->hdr->e_shnum; i++) { + if (!strcmp(snames+shdrs[i].sh_name, name)) { + if (size) + *size = shdrs[i].sh_size; + return (void *)e->hdr + shdrs[i].sh_offset; + } + } + return NULL; +} + +static int find_sections64(const struct vdso_image *image, struct vdso_elfinfo *_e) +{ + struct vdso_elfinfo64 *e = &_e->u.elf64; + + e->hdr = image->data; + e->dynsym = one_section64(e, ".dynsym", &e->dynsymsize); + e->dynstr = one_section64(e, ".dynstr", NULL); + + if (!e->dynsym || !e->dynstr) { + pr_err("VDSO64: Missing symbol sections.\n"); + return -ENODEV; + } + return 0; +} + +static Elf64_Sym *find_sym64(const struct vdso_elfinfo64 *e, const char *name) +{ + unsigned int i; + + for (i = 0; i < (e->dynsymsize / sizeof(Elf64_Sym)); i++) { + Elf64_Sym *s = &e->dynsym[i]; + if (s->st_name == 0) + continue; + if (!strcmp(e->dynstr + s->st_name, name)) + return s; + } + return NULL; +} + +static int patchsym64(struct vdso_elfinfo *_e, const char *orig, + const char *new) +{ + struct vdso_elfinfo64 *e = &_e->u.elf64; + Elf64_Sym *osym = find_sym64(e, orig); + Elf64_Sym *nsym = find_sym64(e, new); + + if (!nsym || !osym) { + pr_err("VDSO64: Missing symbols.\n"); + return -ENODEV; + } + osym->st_value = nsym->st_value; + osym->st_size = nsym->st_size; + osym->st_info = nsym->st_info; + osym->st_other = nsym->st_other; + osym->st_shndx = nsym->st_shndx; + + return 0; +} + +static void *one_section32(struct vdso_elfinfo32 *e, const char *name, + unsigned long *size) +{ + const char *snames; + Elf32_Shdr *shdrs; + unsigned int i; + + shdrs = (void *)e->hdr + e->hdr->e_shoff; + snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset; + for (i = 1; i < e->hdr->e_shnum; i++) { + if (!strcmp(snames+shdrs[i].sh_name, name)) { + if (size) + *size = shdrs[i].sh_size; + return (void *)e->hdr + shdrs[i].sh_offset; + } + } + return NULL; +} + +static int find_sections32(const struct vdso_image *image, struct vdso_elfinfo *_e) +{ + struct vdso_elfinfo32 *e = &_e->u.elf32; + + e->hdr = image->data; + e->dynsym = one_section32(e, ".dynsym", &e->dynsymsize); + e->dynstr = one_section32(e, ".dynstr", NULL); + + if (!e->dynsym || !e->dynstr) { + pr_err("VDSO32: Missing symbol sections.\n"); + return -ENODEV; + } + return 0; +} + +static Elf32_Sym *find_sym32(const struct vdso_elfinfo32 *e, const char *name) +{ + unsigned int i; + + for (i = 0; i < (e->dynsymsize / sizeof(Elf32_Sym)); i++) { + Elf32_Sym *s = &e->dynsym[i]; + if (s->st_name == 0) + continue; + if (!strcmp(e->dynstr + s->st_name, name)) + return s; + } + return NULL; +} + +static int patchsym32(struct vdso_elfinfo *_e, const char *orig, + const char *new) +{ + struct vdso_elfinfo32 *e = &_e->u.elf32; + Elf32_Sym *osym = find_sym32(e, orig); + Elf32_Sym *nsym = find_sym32(e, new); + + if (!nsym || !osym) { + pr_err("VDSO32: Missing symbols.\n"); + return -ENODEV; + } + osym->st_value = nsym->st_value; + osym->st_size = nsym->st_size; + osym->st_info = nsym->st_info; + osym->st_other = nsym->st_other; + osym->st_shndx = nsym->st_shndx; + + return 0; +} + +static int find_sections(const struct vdso_image *image, struct vdso_elfinfo *e, + bool elf64) +{ + if (elf64) + return find_sections64(image, e); + else + return find_sections32(image, e); +} + +static int patch_one_symbol(struct vdso_elfinfo *e, const char *orig, + const char *new_target, bool elf64) +{ + if (elf64) + return patchsym64(e, orig, new_target); + else + return patchsym32(e, orig, new_target); +} + +static int stick_patch(const struct vdso_image *image, struct vdso_elfinfo *e, bool elf64) +{ + int err; + + err = find_sections(image, e, elf64); + if (err) + return err; + + err = patch_one_symbol(e, + "__vdso_gettimeofday", + "__vdso_gettimeofday_stick", elf64); + if (err) + return err; + + return patch_one_symbol(e, + "__vdso_clock_gettime", + "__vdso_clock_gettime_stick", elf64); + return 0; +} + +/* + * Allocate pages for the vdso and vvar, and copy in the vdso text from the + * kernel image. + */ +int __init init_vdso_image(const struct vdso_image *image, + struct vm_special_mapping *vdso_mapping, bool elf64) +{ + int cnpages = (image->size) / PAGE_SIZE; + struct page *dp, **dpp = NULL; + struct page *cp, **cpp = NULL; + struct vdso_elfinfo ei; + int i, dnpages = 0; + + if (tlb_type != spitfire) { + int err = stick_patch(image, &ei, elf64); + if (err) + return err; + } + + /* + * First, the vdso text. This is initialied data, an integral number of + * pages long. + */ + if (WARN_ON(image->size % PAGE_SIZE != 0)) + goto oom; + + cpp = kcalloc(cnpages, sizeof(struct page *), GFP_KERNEL); + vdso_mapping->pages = cpp; + + if (!cpp) + goto oom; + + for (i = 0; i < cnpages; i++) { + cp = alloc_page(GFP_KERNEL); + if (!cp) + goto oom; + cpp[i] = cp; + copy_page(page_address(cp), image->data + i * PAGE_SIZE); + } + + /* + * Now the vvar page. This is uninitialized data. + */ + + if (vvar_data == NULL) { + dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1; + if (WARN_ON(dnpages != 1)) + goto oom; + dpp = kcalloc(dnpages, sizeof(struct page *), GFP_KERNEL); + vvar_mapping.pages = dpp; + + if (!dpp) + goto oom; + + dp = alloc_page(GFP_KERNEL); + if (!dp) + goto oom; + + dpp[0] = dp; + vvar_data = page_address(dp); + memset(vvar_data, 0, PAGE_SIZE); + + vvar_data->seq = 0; + } + + return 0; + oom: + if (cpp != NULL) { + for (i = 0; i < cnpages; i++) { + if (cpp[i] != NULL) + __free_page(cpp[i]); + } + kfree(cpp); + vdso_mapping->pages = NULL; + } + + if (dpp != NULL) { + for (i = 0; i < dnpages; i++) { + if (dpp[i] != NULL) + __free_page(dpp[i]); + } + kfree(dpp); + vvar_mapping.pages = NULL; + } + + pr_warn("Cannot allocate vdso\n"); + vdso_enabled = 0; + return -ENOMEM; +} + +static int __init init_vdso(void) +{ + int err = 0; +#ifdef CONFIG_SPARC64 + err = init_vdso_image(&vdso_image_64_builtin, &vdso_mapping64, true); + if (err) + return err; +#endif + +#ifdef CONFIG_COMPAT + err = init_vdso_image(&vdso_image_32_builtin, &vdso_mapping32, false); +#endif + return err; + +} +subsys_initcall(init_vdso); + +struct linux_binprm; + +/* Shuffle the vdso up a bit, randomly. */ +static unsigned long vdso_addr(unsigned long start, unsigned int len) +{ + unsigned int offset; + + /* This loses some more bits than a modulo, but is cheaper */ + offset = get_random_int() & (PTRS_PER_PTE - 1); + return start + (offset << PAGE_SHIFT); +} + +static int map_vdso(const struct vdso_image *image, + struct vm_special_mapping *vdso_mapping) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long text_start, addr = 0; + int ret = 0; + + mmap_write_lock(mm); + + /* + * First, get an unmapped region: then randomize it, and make sure that + * region is free. + */ + if (current->flags & PF_RANDOMIZE) { + addr = get_unmapped_area(NULL, 0, + image->size - image->sym_vvar_start, + 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + addr = vdso_addr(addr, image->size - image->sym_vvar_start); + } + addr = get_unmapped_area(NULL, addr, + image->size - image->sym_vvar_start, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + text_start = addr - image->sym_vvar_start; + current->mm->context.vdso = (void __user *)text_start; + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + */ + vma = _install_special_mapping(mm, + text_start, + image->size, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto up_fail; + } + + vma = _install_special_mapping(mm, + addr, + -image->sym_vvar_start, + VM_READ|VM_MAYREAD, + &vvar_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + do_munmap(mm, text_start, image->size, NULL); + } + +up_fail: + if (ret) + current->mm->context.vdso = NULL; + + mmap_write_unlock(mm); + return ret; +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + + if (!vdso_enabled) + return 0; + +#if defined CONFIG_COMPAT + if (!(is_32bit_task())) + return map_vdso(&vdso_image_64_builtin, &vdso_mapping64); + else + return map_vdso(&vdso_image_32_builtin, &vdso_mapping32); +#else + return map_vdso(&vdso_image_64_builtin, &vdso_mapping64); +#endif + +} + +static __init int vdso_setup(char *s) +{ + int err; + unsigned long val; + + err = kstrtoul(s, 10, &val); + if (err) + return err; + vdso_enabled = val; + return 0; +} +__setup("vdso=", vdso_setup); -- cgit v1.2.3