From cca122ce9a0e5c8ebe745b6ae785b5b5c80f4fc6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 23 Jul 2021 13:28:52 +0200 Subject: Adding upstream version 0.7.1. Signed-off-by: Daniel Baumann --- include/gcc/aarch64/ck_pr.h | 12 +- include/gcc/aarch64/ck_pr_llsc.h | 106 +++++------ include/gcc/aarch64/ck_pr_lse.h | 37 ++-- include/gcc/ck_cc.h | 28 +-- include/gcc/ck_pr.h | 4 +- include/gcc/ppc/ck_pr.h | 32 ++-- include/gcc/s390x/ck_f_pr.h | 97 ++++++++++ include/gcc/s390x/ck_pr.h | 373 +++++++++++++++++++++++++++++++++++++++ include/gcc/sparcv9/ck_pr.h | 32 +++- include/gcc/x86/ck_pr.h | 157 +++++++++------- include/gcc/x86_64/ck_pr.h | 132 ++++++++------ 11 files changed, 787 insertions(+), 223 deletions(-) create mode 100644 include/gcc/s390x/ck_f_pr.h create mode 100644 include/gcc/s390x/ck_pr.h (limited to 'include/gcc') diff --git a/include/gcc/aarch64/ck_pr.h b/include/gcc/aarch64/ck_pr.h index e739c4d..0a47307 100644 --- a/include/gcc/aarch64/ck_pr.h +++ b/include/gcc/aarch64/ck_pr.h @@ -92,7 +92,7 @@ CK_PR_FENCE(unlock, CK_DMB_SY) ck_pr_md_load_##S(const M *target) \ { \ long r = 0; \ - __asm__ __volatile__(I " %w0, [%1];" \ + __asm__ __volatile__(I " %w0, [%1]\n" \ : "=r" (r) \ : "r" (target) \ : "memory"); \ @@ -103,7 +103,7 @@ CK_PR_FENCE(unlock, CK_DMB_SY) ck_pr_md_load_##S(const M *target) \ { \ long r = 0; \ - __asm__ __volatile__(I " %0, [%1];" \ + __asm__ __volatile__(I " %0, [%1]\n" \ : "=r" (r) \ : "r" (target) \ : "memory"); \ @@ -195,10 +195,10 @@ CK_PR_STORE_S_64(double, double, "str") T previous = 0; \ T tmp = 0; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "neg %" R "0, %" R "0;" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + "neg %" R "0, %" R "0\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target) \ diff --git a/include/gcc/aarch64/ck_pr_llsc.h b/include/gcc/aarch64/ck_pr_llsc.h index aa4e309..6500d96 100644 --- a/include/gcc/aarch64/ck_pr_llsc.h +++ b/include/gcc/aarch64/ck_pr_llsc.h @@ -38,17 +38,17 @@ ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], u uint64_t tmp1, tmp2; __asm__ __volatile__("1:" - "ldxp %0, %1, [%4];" - "mov %2, %0;" - "mov %3, %1;" - "eor %0, %0, %5;" - "eor %1, %1, %6;" - "orr %1, %0, %1;" - "mov %w0, #0;" - "cbnz %1, 2f;" - "stxp %w0, %7, %8, [%4];" - "cbnz %w0, 1b;" - "mov %w0, #1;" + "ldxp %0, %1, [%4]\n" + "mov %2, %0\n" + "mov %3, %1\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %7, %8, [%4]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" "2:" : "=&r" (tmp1), "=&r" (tmp2), "=&r" (value[0]), "=&r" (value[1]) : "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1]) @@ -72,15 +72,15 @@ ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) uint64_t tmp1, tmp2; __asm__ __volatile__("1:" - "ldxp %0, %1, [%2];" - "eor %0, %0, %3;" - "eor %1, %1, %4;" - "orr %1, %0, %1;" - "mov %w0, #0;" - "cbnz %1, 2f;" - "stxp %w0, %5, %6, [%2];" - "cbnz %w0, 1b;" - "mov %w0, #1;" + "ldxp %0, %1, [%2]\n" + "eor %0, %0, %3\n" + "eor %1, %1, %4\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %5, %6, [%2]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" "2:" : "=&r" (tmp1), "=&r" (tmp2) : "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1]) @@ -103,12 +103,12 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) { \ T previous; \ T tmp; \ - __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "cmp %" R "0, %" R "4;" \ - "b.ne 2f;" \ - "stxr" W " %w1, %" R "3, [%2];" \ - "cbnz %w1, 1b;" \ + __asm__ __volatile__("1:\n" \ + "ldxr" W " %" R "0, [%2]\n" \ + "cmp %" R "0, %" R "4\n" \ + "b.ne 2f\n" \ + "stxr" W " %w1, %" R "3, [%2]\n" \ + "cbnz %w1, 1b\n" \ "2:" \ : "=&r" (previous), \ "=&r" (tmp) \ @@ -126,11 +126,11 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) T tmp; \ __asm__ __volatile__( \ "1:" \ - "ldxr" W " %" R "0, [%2];" \ - "cmp %" R "0, %" R "4;" \ - "b.ne 2f;" \ - "stxr" W " %w1, %" R "3, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n" \ + "cmp %" R "0, %" R "4\n" \ + "b.ne 2f\n" \ + "stxr" W " %w1, %" R "3, [%2]\n" \ + "cbnz %w1, 1b\n" \ "2:" \ : "=&r" (previous), \ "=&r" (tmp) \ @@ -167,9 +167,9 @@ CK_PR_CAS_S(char, char, "b", "w") T previous; \ T tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "stxr" W " %w1, %" R "3, [%2];"\ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + "stxr" W " %w1, %" R "3, [%2]\n"\ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ @@ -198,10 +198,10 @@ CK_PR_FAS(char, char, char, "b", "w") T previous = 0; \ T tmp = 0; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - I ";" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + I "\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target) \ @@ -239,10 +239,10 @@ CK_PR_UNARY_S(char, char, "b") T previous; \ T tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];"\ - I " %" R "0, %" R "0, %" R "3;" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + I " %" R "0, %" R "0, %" R "3\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ @@ -286,10 +286,10 @@ ck_pr_faa_ptr(void *target, uintptr_t delta) uintptr_t previous, r, tmp; __asm__ __volatile__("1:" - "ldxr %0, [%3];" - "add %1, %4, %0;" - "stxr %w2, %1, [%3];" - "cbnz %w2, 1b;" + "ldxr %0, [%3]\n" + "add %1, %4, %0\n" + "stxr %w2, %1, [%3]\n" + "cbnz %w2, 1b\n" : "=&r" (previous), "=&r" (r), "=&r" (tmp) @@ -306,9 +306,9 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) uint64_t previous, r, tmp; __asm__ __volatile__("1:" - "ldxr %0, [%3];" - "add %1, %4, %0;" - "stxr %w2, %1, [%3];" + "ldxr %0, [%3]\n" + "add %1, %4, %0\n" + "stxr %w2, %1, [%3]\n" "cbnz %w2, 1b;" : "=&r" (previous), "=&r" (r), @@ -326,10 +326,10 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) { \ T previous, r, tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %w0, [%3];" \ - "add %w1, %w4, %w0;" \ - "stxr" W " %w2, %w1, [%3];" \ - "cbnz %w2, 1b;" \ + "ldxr" W " %w0, [%3]\n" \ + "add %w1, %w4, %w0\n" \ + "stxr" W " %w2, %w1, [%3]\n" \ + "cbnz %w2, 1b\n" \ : "=&r" (previous), \ "=&r" (r), \ "=&r" (tmp) \ diff --git a/include/gcc/aarch64/ck_pr_lse.h b/include/gcc/aarch64/ck_pr_lse.h index e2c9554..e450e72 100644 --- a/include/gcc/aarch64/ck_pr_lse.h +++ b/include/gcc/aarch64/ck_pr_lse.h @@ -29,6 +29,7 @@ #ifndef CK_PR_AARCH64_LSE_H #define CK_PR_AARCH64_LSE_H +#error bite #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif @@ -43,10 +44,10 @@ ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], u register uint64_t x2 __asm__ ("x2") = set[0]; register uint64_t x3 __asm__ ("x3") = set[1]; - __asm__ __volatile__("casp %0, %1, %4, %5, [%6];" - "eor %2, %0, %7;" - "eor %3, %1, %8;" - "orr %2, %2, %3;" + __asm__ __volatile__("casp %0, %1, %4, %5, [%6]\n" + "eor %2, %0, %7\n" + "eor %3, %1, %8\n" + "orr %2, %2, %3\n" : "+&r" (x0), "+&r" (x1), "=&r" (tmp1), "=&r" (tmp2) : "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1]) : "memory"); @@ -74,10 +75,10 @@ ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) register uint64_t x2 __asm__ ("x2") = set[0]; register uint64_t x3 __asm__ ("x3") = set[1]; - __asm__ __volatile__("casp %0, %1, %2, %3, [%4];" - "eor %0, %0, %5;" - "eor %1, %1, %6;" - "orr %0, %0, %1;" + __asm__ __volatile__("casp %0, %1, %2, %3, [%4]\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %0, %0, %1\n" : "+&r" (x0), "+&r" (x1) : "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1]) : "memory"); @@ -99,7 +100,7 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) { \ *(T *)value = compare; \ __asm__ __volatile__( \ - "cas" W " %" R "0, %" R "2, [%1];" \ + "cas" W " %" R "0, %" R "2, [%1]\n"\ : "+&r" (*(T *)value) \ : "r" (target), \ "r" (set) \ @@ -111,7 +112,7 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) { \ T previous = compare; \ __asm__ __volatile__( \ - "cas" W " %" R "0, %" R "2, [%1];" \ + "cas" W " %" R "0, %" R "2, [%1]\n"\ : "+&r" (previous) \ : "r" (target), \ "r" (set) \ @@ -144,7 +145,7 @@ CK_PR_CAS_S(char, char, "b", "w") { \ T previous; \ __asm__ __volatile__( \ - "swp" W " %" R "2, %" R "0, [%1];" \ + "swp" W " %" R "2, %" R "0, [%1]\n"\ : "=&r" (previous) \ : "r" (target), \ "r" (v) \ @@ -169,8 +170,8 @@ CK_PR_FAS(char, char, char, "b", "w") CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target) \ { \ - __asm__ __volatile__(I ";" \ - "st" S W " " R "0, [%0];" \ + __asm__ __volatile__(I "\n" \ + "st" S W " " R "0, [%0]\n" \ : \ : "r" (target) \ : "x0", "memory"); \ @@ -204,8 +205,8 @@ CK_PR_UNARY_S(char, char, "b") CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target, T delta) \ { \ - __asm__ __volatile__(I ";" \ - "st" S W " %" R "0, [%1];" \ + __asm__ __volatile__(I "\n" \ + "st" S W " %" R "0, [%1]\n"\ : "+&r" (delta) \ : "r" (target) \ : "memory"); \ @@ -247,7 +248,7 @@ ck_pr_faa_ptr(void *target, uintptr_t delta) uintptr_t previous; __asm__ __volatile__( - "ldadd %2, %0, [%1];" + "ldadd %2, %0, [%1]\n" : "=r" (previous) : "r" (target), "r" (delta) @@ -262,7 +263,7 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) uint64_t previous; __asm__ __volatile__( - "ldadd %2, %0, [%1];" + "ldadd %2, %0, [%1]\n" : "=r" (previous) : "r" (target), "r" (delta) @@ -277,7 +278,7 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) { \ T previous; \ __asm__ __volatile__( \ - "ldadd" W " %w2, %w0, [%1];" \ + "ldadd" W " %w2, %w0, [%1]\n" \ : "=r" (previous) \ : "r" (target), \ "r" (delta) \ diff --git a/include/gcc/ck_cc.h b/include/gcc/ck_cc.h index a14a4b5..0a6d17b 100644 --- a/include/gcc/ck_cc.h +++ b/include/gcc/ck_cc.h @@ -39,6 +39,15 @@ #define CK_CC_UNUSED __attribute__((unused)) #define CK_CC_USED __attribute__((used)) #define CK_CC_IMM "i" + +#define CK_CC_CONTAINER(F, T, M, N) \ + CK_CC_INLINE static T * \ + N(F *p) \ + { \ + \ + return (T *)(void *)((char *)p - __builtin_offsetof(T, M)); \ + } + #if defined(__x86_64__) || defined(__x86__) #define CK_CC_IMM_U32 "Z" #define CK_CC_IMM_S32 "e" @@ -103,28 +112,26 @@ #define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X) /* - * Portability wrappers for bitwise ops. + * Portability wrappers for bitwise operations. */ - +#ifndef CK_MD_CC_BUILTIN_DISABLE #define CK_F_CC_FFS -#define CK_F_CC_CLZ -#define CK_F_CC_CTZ -#define CK_F_CC_POPCOUNT - CK_CC_INLINE static int ck_cc_ffs(unsigned int x) { - return __builtin_ffs(x); + return __builtin_ffsl(x); } +#define CK_F_CC_FFSL CK_CC_INLINE static int -ck_cc_clz(unsigned int x) +ck_cc_ffsl(unsigned long x) { - return __builtin_clz(x); + return __builtin_ffsll(x); } +#define CK_F_CC_CTZ CK_CC_INLINE static int ck_cc_ctz(unsigned int x) { @@ -132,11 +139,12 @@ ck_cc_ctz(unsigned int x) return __builtin_ctz(x); } +#define CK_F_CC_POPCOUNT CK_CC_INLINE static int ck_cc_popcount(unsigned int x) { return __builtin_popcount(x); } - +#endif /* CK_MD_CC_BUILTIN_DISABLE */ #endif /* CK_GCC_CC_H */ diff --git a/include/gcc/ck_pr.h b/include/gcc/ck_pr.h index 084d423..108e983 100644 --- a/include/gcc/ck_pr.h +++ b/include/gcc/ck_pr.h @@ -80,7 +80,7 @@ ck_pr_md_load_ptr(const void *target) void *r; ck_pr_barrier(); - r = CK_CC_DECONST_PTR(CK_PR_ACCESS(target)); + r = CK_CC_DECONST_PTR(*(volatile void *const*)(target)); ck_pr_barrier(); return r; @@ -91,7 +91,7 @@ ck_pr_md_store_ptr(void *target, const void *v) { ck_pr_barrier(); - CK_PR_ACCESS(target) = CK_CC_DECONST_PTR(v); + *(volatile void **)target = CK_CC_DECONST_PTR(v); ck_pr_barrier(); return; } diff --git a/include/gcc/ppc/ck_pr.h b/include/gcc/ppc/ck_pr.h index cd7935d..73f0cb7 100644 --- a/include/gcc/ppc/ck_pr.h +++ b/include/gcc/ppc/ck_pr.h @@ -67,21 +67,29 @@ ck_pr_stall(void) __asm__ __volatile__(I ::: "memory"); \ } -CK_PR_FENCE(atomic, "lwsync") -CK_PR_FENCE(atomic_store, "lwsync") +#ifdef CK_MD_PPC32_LWSYNC +#define CK_PR_LWSYNCOP "lwsync" +#else /* CK_MD_PPC32_LWSYNC_DISABLE */ +#define CK_PR_LWSYNCOP "sync" +#endif + +CK_PR_FENCE(atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(atomic_store, CK_PR_LWSYNCOP) CK_PR_FENCE(atomic_load, "sync") -CK_PR_FENCE(store_atomic, "lwsync") -CK_PR_FENCE(load_atomic, "lwsync") -CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(load_atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(store, CK_PR_LWSYNCOP) CK_PR_FENCE(store_load, "sync") -CK_PR_FENCE(load, "lwsync") -CK_PR_FENCE(load_store, "lwsync") +CK_PR_FENCE(load, CK_PR_LWSYNCOP) +CK_PR_FENCE(load_store, CK_PR_LWSYNCOP) CK_PR_FENCE(memory, "sync") -CK_PR_FENCE(acquire, "lwsync") -CK_PR_FENCE(release, "lwsync") -CK_PR_FENCE(acqrel, "lwsync") -CK_PR_FENCE(lock, "lwsync") -CK_PR_FENCE(unlock, "lwsync") +CK_PR_FENCE(acquire, CK_PR_LWSYNCOP) +CK_PR_FENCE(release, CK_PR_LWSYNCOP) +CK_PR_FENCE(acqrel, CK_PR_LWSYNCOP) +CK_PR_FENCE(lock, CK_PR_LWSYNCOP) +CK_PR_FENCE(unlock, CK_PR_LWSYNCOP) + +#undef CK_PR_LWSYNCOP #undef CK_PR_FENCE diff --git a/include/gcc/s390x/ck_f_pr.h b/include/gcc/s390x/ck_f_pr.h new file mode 100644 index 0000000..cd54a28 --- /dev/null +++ b/include/gcc/s390x/ck_f_pr.h @@ -0,0 +1,97 @@ +/* DO NOT EDIT. This is auto-generated from feature.sh */ +#define CK_F_PR_ADD_32 +#define CK_F_PR_ADD_64 +#define CK_F_PR_ADD_INT +#define CK_F_PR_ADD_PTR +#define CK_F_PR_ADD_UINT +#define CK_F_PR_AND_32 +#define CK_F_PR_AND_64 +#define CK_F_PR_AND_INT +#define CK_F_PR_AND_PTR +#define CK_F_PR_AND_UINT +#define CK_F_PR_CAS_32 +#define CK_F_PR_CAS_32_VALUE +#define CK_F_PR_CAS_64 +#define CK_F_PR_CAS_64_VALUE +#define CK_F_PR_CAS_INT +#define CK_F_PR_CAS_INT_VALUE +#define CK_F_PR_CAS_PTR +#define CK_F_PR_CAS_PTR_VALUE +#define CK_F_PR_CAS_UINT +#define CK_F_PR_CAS_UINT_VALUE +#define CK_F_PR_DEC_32 +#define CK_F_PR_DEC_64 +#define CK_F_PR_DEC_INT +#define CK_F_PR_DEC_PTR +#define CK_F_PR_DEC_UINT +#define CK_F_PR_FAA_32 +#define CK_F_PR_FAA_64 +#define CK_F_PR_FAA_INT +#define CK_F_PR_FAA_PTR +#define CK_F_PR_FAA_UINT +#define CK_F_PR_FAS_32 +#define CK_F_PR_FAS_64 +#define CK_F_PR_FAS_INT +#define CK_F_PR_FAS_PTR +#define CK_F_PR_FAS_UINT +#define CK_F_PR_FAS_DOUBLE +#define CK_F_PR_FENCE_LOAD +#define CK_F_PR_FENCE_LOAD_DEPENDS +#define CK_F_PR_FENCE_MEMORY +#define CK_F_PR_FENCE_STORE +#define CK_F_PR_FENCE_STRICT_LOAD +#define CK_F_PR_FENCE_STRICT_LOAD_DEPENDS +#define CK_F_PR_FENCE_STRICT_MEMORY +#define CK_F_PR_FENCE_STRICT_STORE +#define CK_F_PR_INC_32 +#define CK_F_PR_INC_64 +#define CK_F_PR_INC_INT +#define CK_F_PR_INC_PTR +#define CK_F_PR_INC_UINT +#define CK_F_PR_LOAD_16 +#define CK_F_PR_LOAD_32 +#define CK_F_PR_LOAD_64 +#define CK_F_PR_LOAD_8 +#define CK_F_PR_LOAD_CHAR +#define CK_F_PR_LOAD_DOUBLE +#define CK_F_PR_LOAD_INT +#define CK_F_PR_LOAD_PTR +#define CK_F_PR_LOAD_SHORT +#define CK_F_PR_LOAD_UINT +#define CK_F_PR_NEG_32 +#define CK_F_PR_NEG_64 +#define CK_F_PR_NEG_INT +#define CK_F_PR_NEG_PTR +#define CK_F_PR_NEG_UINT +#define CK_F_PR_NOT_32 +#define CK_F_PR_NOT_64 +#define CK_F_PR_NOT_INT +#define CK_F_PR_NOT_PTR +#define CK_F_PR_NOT_UINT +#define CK_F_PR_OR_32 +#define CK_F_PR_OR_64 +#define CK_F_PR_OR_INT +#define CK_F_PR_OR_PTR +#define CK_F_PR_OR_UINT +#define CK_F_PR_STALL +#define CK_F_PR_STORE_16 +#define CK_F_PR_STORE_32 +#define CK_F_PR_STORE_64 +#define CK_F_PR_STORE_8 +#define CK_F_PR_STORE_CHAR +#define CK_F_PR_STORE_DOUBLE +#define CK_F_PR_STORE_INT +#define CK_F_PR_STORE_PTR +#define CK_F_PR_STORE_SHORT +#define CK_F_PR_STORE_UINT +#define CK_F_PR_SUB_32 +#define CK_F_PR_SUB_64 +#define CK_F_PR_SUB_INT +#define CK_F_PR_SUB_PTR +#define CK_F_PR_SUB_UINT +#define CK_F_PR_XOR_32 +#define CK_F_PR_XOR_64 +#define CK_F_PR_XOR_INT +#define CK_F_PR_XOR_PTR +#define CK_F_PR_XOR_UINT + diff --git a/include/gcc/s390x/ck_pr.h b/include/gcc/s390x/ck_pr.h new file mode 100644 index 0000000..8ad22b2 --- /dev/null +++ b/include/gcc/s390x/ck_pr.h @@ -0,0 +1,373 @@ +/* + * Copyright 2009-2015 Samy Al Bahra. + * Copyright 2017 Neale Ferguson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CK_PR_S390X_H +#define CK_PR_S390X_H + +#ifndef CK_PR_H +#error Do not include this file directly, use ck_pr.h +#endif + +#include +#include + +/* + * The following represent supported atomic operations. + * These operations may be emulated. + */ +#include "ck_f_pr.h" + +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + +/* + * This bounces the hardware thread from low to medium + * priority. I am unsure of the benefits of this approach + * but it is used by the Linux kernel. + */ +CK_CC_INLINE static void +ck_pr_stall(void) +{ + __sync_synchronize(); + return; +} + +#define CK_PR_FENCE(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __sync_synchronize(); \ + } + +/* + * These are derived from: + * http://www.ibm.com/developerworks/systems/articles/powerpc.html + */ +CK_PR_FENCE(atomic) +CK_PR_FENCE(atomic_store) +CK_PR_FENCE(atomic_load) +CK_PR_FENCE(store_atomic) +CK_PR_FENCE(load_atomic) +CK_PR_FENCE(store) +CK_PR_FENCE(store_load) +CK_PR_FENCE(load) +CK_PR_FENCE(load_store) +CK_PR_FENCE(memory) +CK_PR_FENCE(acquire) +CK_PR_FENCE(release) +CK_PR_FENCE(acqrel) +CK_PR_FENCE(lock) +CK_PR_FENCE(unlock) + +#undef CK_PR_FENCE + +#define CK_PR_LOAD(S, M, T, C, I) \ + CK_CC_INLINE static T \ + ck_pr_md_load_##S(const M *target) \ + { \ + T r; \ + __asm__ __volatile__(I "\t%0, %1\n" \ + : "=r" (r) \ + : "Q" (*(const C *)target) \ + : "memory"); \ + return (r); \ + } + +CK_PR_LOAD(ptr, void, void *, uint64_t, "lg") + +#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) + +CK_PR_LOAD_S(64, uint64_t, "lg") +CK_PR_LOAD_S(32, uint32_t, "llgf") +CK_PR_LOAD_S(16, uint16_t, "llgh") +CK_PR_LOAD_S(8, uint8_t, "llgc") +CK_PR_LOAD_S(uint, unsigned int, "llgf") +CK_PR_LOAD_S(int, int, "llgf") +CK_PR_LOAD_S(short, short, "lgh") +CK_PR_LOAD_S(char, char, "lgb") +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static double +ck_pr_md_load_double(const double *target) +{ + double r; + __asm__ __volatile__("ld %0, %1\n" + : "=f" (r) + : "Q" (*(const double *)target) + : "memory"); + return (r); +} +#endif + +#undef CK_PR_LOAD_S +#undef CK_PR_LOAD + +#define CK_PR_STORE(S, M, T, C, I) \ + CK_CC_INLINE static void \ + ck_pr_md_store_##S(M *target, T v) \ + { \ + __asm__ __volatile__(I "\t%1, %0\n" \ + : "=Q" (*(C *)target) \ + : "r" (v) \ + : "memory"); \ + return; \ + } + +CK_PR_STORE(ptr, void, const void *, uint64_t, "stg") + +#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) + +CK_PR_STORE_S(64, uint64_t, "stg") +CK_PR_STORE_S(32, uint32_t, "st") +CK_PR_STORE_S(16, uint16_t, "sth") +CK_PR_STORE_S(8, uint8_t, "stc") +CK_PR_STORE_S(uint, unsigned int, "st") +CK_PR_STORE_S(int, int, "st") +CK_PR_STORE_S(short, short, "sth") +CK_PR_STORE_S(char, char, "stc") +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static void +ck_pr_md_store_double(double *target, double v) +{ + __asm__ __volatile__(" std %1, %0\n" + : "=Q" (*(double *)target) + : "f" (v) + : "0", "memory"); +} +#endif + +#undef CK_PR_STORE_S +#undef CK_PR_STORE + +CK_CC_INLINE static bool +ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value) +{ + *value = __sync_val_compare_and_swap(target,compare,set); + return (*value == compare); +} + +CK_CC_INLINE static bool +ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *value) +{ + uintptr_t previous; + + previous = __sync_val_compare_and_swap((uintptr_t *) target, + (uintptr_t) compare, + (uintptr_t) set); + *((uintptr_t *) value) = previous; + return (previous == (uintptr_t) compare); +} + +CK_CC_INLINE static bool +ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set) +{ + return(__sync_bool_compare_and_swap(target,compare,set)); +} + +CK_CC_INLINE static bool +ck_pr_cas_ptr(void *target, void *compare, void *set) +{ + return(__sync_bool_compare_and_swap((uintptr_t *) target, + (uintptr_t) compare, + (uintptr_t) set)); +} + +#define CK_PR_CAS(N, T) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \ + { \ + *value = __sync_val_compare_and_swap(target, \ + compare, \ + set); \ + return(*value == compare); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N(T *target, T compare, T set) \ + { \ + return(__sync_bool_compare_and_swap(target, \ + compare, \ + set)); \ + } + +CK_PR_CAS(32, uint32_t) +CK_PR_CAS(uint, unsigned int) +CK_PR_CAS(int, int) + +#undef CK_PR_CAS + +CK_CC_INLINE static void * +ck_pr_fas_ptr(void *target, void *v) +{ + return((void *)__atomic_exchange_n((uintptr_t *) target, (uintptr_t) v, __ATOMIC_ACQUIRE)); +} + +#define CK_PR_FAS(N, M, T) \ + CK_CC_INLINE static T \ + ck_pr_fas_##N(M *target, T v) \ + { \ + return(__atomic_exchange_n(target, v, __ATOMIC_ACQUIRE)); \ + } + +CK_PR_FAS(64, uint64_t, uint64_t) +CK_PR_FAS(32, uint32_t, uint32_t) +CK_PR_FAS(int, int, int) +CK_PR_FAS(uint, unsigned int, unsigned int) + +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static double +ck_pr_fas_double(double *target, double *v) +{ + double previous; + + __asm__ __volatile__ (" lg 1,%2\n" + "0: lg 0,%1\n" + " csg 0,1,%1\n" + " jnz 0b\n" + " ldgr %0,0\n" + : "=f" (previous) + : "Q" (target), "Q" (v) + : "0", "1", "cc", "memory"); + return (previous); +} +#endif + +#undef CK_PR_FAS + +/* + * Atomic store-only binary operations. + */ +#define CK_PR_BINARY(K, S, M, T) \ + CK_CC_INLINE static void \ + ck_pr_##K##_##S(M *target, T d) \ + { \ + d = __sync_fetch_and_##K((T *)target, d); \ + return; \ + } + +#define CK_PR_BINARY_S(K, S, T) CK_PR_BINARY(K, S, T, T) + +#define CK_PR_GENERATE(K) \ + CK_PR_BINARY(K, ptr, void, void *) \ + CK_PR_BINARY_S(K, char, char) \ + CK_PR_BINARY_S(K, int, int) \ + CK_PR_BINARY_S(K, uint, unsigned int) \ + CK_PR_BINARY_S(K, 64, uint64_t) \ + CK_PR_BINARY_S(K, 32, uint32_t) \ + CK_PR_BINARY_S(K, 16, uint16_t) \ + CK_PR_BINARY_S(K, 8, uint8_t) + +CK_PR_GENERATE(add) +CK_PR_GENERATE(sub) +CK_PR_GENERATE(and) +CK_PR_GENERATE(or) +CK_PR_GENERATE(xor) + +#undef CK_PR_GENERATE +#undef CK_PR_BINARY_S +#undef CK_PR_BINARY + +#define CK_PR_UNARY(S, M, T) \ + CK_CC_INLINE static void \ + ck_pr_inc_##S(M *target) \ + { \ + ck_pr_add_##S(target, (T)1); \ + return; \ + } \ + CK_CC_INLINE static void \ + ck_pr_dec_##S(M *target) \ + { \ + ck_pr_sub_##S(target, (T)1); \ + return; \ + } + +#define CK_PR_UNARY_X(S, M) \ + CK_CC_INLINE static void \ + ck_pr_not_##S(M *target) \ + { \ + M newval; \ + do { \ + newval = ~(*target); \ + } while (!__sync_bool_compare_and_swap(target, \ + *target, \ + newval)); \ + } \ + CK_CC_INLINE static void \ + ck_pr_neg_##S(M *target) \ + { \ + M newval; \ + do { \ + newval = -(*target); \ + } while (!__sync_bool_compare_and_swap(target, \ + *target, \ + newval)); \ + } + +#define CK_PR_UNARY_S(S, M) CK_PR_UNARY(S, M, M) \ + CK_PR_UNARY_X(S, M) + +CK_PR_UNARY(ptr, void, void *) +CK_PR_UNARY_S(char, char) +CK_PR_UNARY_S(int, int) +CK_PR_UNARY_S(uint, unsigned int) +CK_PR_UNARY_S(64, uint64_t) +CK_PR_UNARY_S(32, uint32_t) +CK_PR_UNARY_S(16, uint16_t) +CK_PR_UNARY_S(8, uint8_t) + +#undef CK_PR_UNARY_S +#undef CK_PR_UNARY + +CK_CC_INLINE static void * +ck_pr_faa_ptr(void *target, uintptr_t delta) +{ + uintptr_t previous; + + previous = __sync_fetch_and_add((uintptr_t *) target, delta); + + return (void *)(previous); +} + +#define CK_PR_FAA(S, T) \ + CK_CC_INLINE static T \ + ck_pr_faa_##S(T *target, T delta) \ + { \ + T previous; \ + \ + previous = __sync_fetch_and_add(target, delta); \ + \ + return (previous); \ + } + +CK_PR_FAA(64, uint64_t) +CK_PR_FAA(32, uint32_t) +CK_PR_FAA(uint, unsigned int) +CK_PR_FAA(int, int) + +#undef CK_PR_FAA + +#endif /* CK_PR_S390X_H */ diff --git a/include/gcc/sparcv9/ck_pr.h b/include/gcc/sparcv9/ck_pr.h index 767af6a..b60e199 100644 --- a/include/gcc/sparcv9/ck_pr.h +++ b/include/gcc/sparcv9/ck_pr.h @@ -76,7 +76,7 @@ CK_PR_FENCE(store, "membar #StoreStore") CK_PR_FENCE(store_load, "membar #StoreLoad") CK_PR_FENCE(load, "membar #LoadLoad") CK_PR_FENCE(load_store, "membar #LoadStore") -CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad") +CK_PR_FENCE(memory, "membar #MemIssue") CK_PR_FENCE(acquire, "membar #LoadLoad | #LoadStore") CK_PR_FENCE(release, "membar #LoadStore | #StoreStore") CK_PR_FENCE(acqrel, "membar #LoadLoad | #LoadStore | #StoreStore") @@ -136,11 +136,26 @@ CK_PR_STORE_S(int, int, "stsw") #undef CK_PR_STORE_S #undef CK_PR_STORE +/* Use the appropriate address space for atomics within the FreeBSD kernel. */ +#if defined(__FreeBSD__) && defined(_KERNEL) +#include +#include +#define CK_PR_INS_CAS "casa" +#define CK_PR_INS_CASX "casxa" +#define CK_PR_INS_SWAP "swapa" +#define CK_PR_ASI_ATOMIC __XSTRING(__ASI_ATOMIC) +#else +#define CK_PR_INS_CAS "cas" +#define CK_PR_INS_CASX "casx" +#define CK_PR_INS_SWAP "swap" +#define CK_PR_ASI_ATOMIC "" +#endif + CK_CC_INLINE static bool ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value) { - __asm__ __volatile__("casx [%1], %2, %0" + __asm__ __volatile__(CK_PR_INS_CASX " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" : "+&r" (set) : "r" (target), "r" (compare) @@ -154,7 +169,7 @@ CK_CC_INLINE static bool ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set) { - __asm__ __volatile__("casx [%1], %2, %0" + __asm__ __volatile__(CK_PR_INS_CASX " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" : "+&r" (set) : "r" (target), "r" (compare) @@ -181,7 +196,7 @@ ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *previous) CK_CC_INLINE static bool \ ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \ { \ - __asm__ __volatile__("cas [%1], %2, %0" \ + __asm__ __volatile__(CK_PR_INS_CAS " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" \ : "+&r" (set) \ : "r" (target), \ "r" (compare) \ @@ -192,7 +207,7 @@ ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *previous) CK_CC_INLINE static bool \ ck_pr_cas_##N(T *target, T compare, T set) \ { \ - __asm__ __volatile__("cas [%1], %2, %0" \ + __asm__ __volatile__(CK_PR_INS_CAS " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" \ : "+&r" (set) \ : "r" (target), \ "r" (compare) \ @@ -211,7 +226,7 @@ CK_PR_CAS(int, int) ck_pr_fas_##N(T *target, T update) \ { \ \ - __asm__ __volatile__("swap [%1], %0" \ + __asm__ __volatile__(CK_PR_INS_SWAP " [%1] " CK_PR_ASI_ATOMIC ", %0" \ : "+&r" (update) \ : "r" (target) \ : "memory"); \ @@ -224,5 +239,10 @@ CK_PR_FAS(32, uint32_t) #undef CK_PR_FAS +#undef CK_PR_INS_CAS +#undef CK_PR_INS_CASX +#undef CK_PR_INS_SWAP +#undef CK_PR_ASI_ATOMIC + #endif /* CK_PR_SPARCV9_H */ diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index a04cebf..5194dee 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -45,15 +45,9 @@ /* Minimum requirements for the CK_PR interface are met. */ #define CK_F_PR -#ifdef CK_MD_UMP -#define CK_PR_LOCK_PREFIX -#else -#define CK_PR_LOCK_PREFIX "lock " -#endif - /* - * Prevent speculative execution in busy-wait loops (P4 <=) - * or "predefined delay". + * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined + * delay". */ CK_CC_INLINE static void ck_pr_stall(void) @@ -62,28 +56,52 @@ ck_pr_stall(void) return; } +#ifdef CK_MD_UMP +#define CK_PR_LOCK_PREFIX +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__("" ::: "memory"); \ + return; \ + } +#else +#define CK_PR_LOCK_PREFIX "lock " #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ + return; \ } +#endif /* CK_MD_UMP */ -CK_PR_FENCE(atomic, "sfence") -CK_PR_FENCE(atomic_store, "sfence") -CK_PR_FENCE(atomic_load, "mfence") -CK_PR_FENCE(store_atomic, "sfence") -CK_PR_FENCE(load_atomic, "mfence") -CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_store, "mfence") -CK_PR_FENCE(store, "sfence") -CK_PR_FENCE(store_load, "mfence") -CK_PR_FENCE(memory, "mfence") -CK_PR_FENCE(release, "mfence") -CK_PR_FENCE(acquire, "mfence") -CK_PR_FENCE(acqrel, "mfence") -CK_PR_FENCE(lock, "mfence") -CK_PR_FENCE(unlock, "mfence") +#if defined(CK_MD_SSE_DISABLE) +/* If SSE is disabled, then use atomic operations for serialization. */ +#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)" +#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE +#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE +#else +#define CK_MD_X86_SFENCE "sfence" +#define CK_MD_X86_LFENCE "lfence" +#define CK_MD_X86_MFENCE "mfence" +#endif /* !CK_MD_SSE_DISABLE */ + +CK_PR_FENCE(atomic, "") +CK_PR_FENCE(atomic_store, "") +CK_PR_FENCE(atomic_load, "") +CK_PR_FENCE(store_atomic, "") +CK_PR_FENCE(load_atomic, "") +CK_PR_FENCE(load, CK_MD_X86_LFENCE) +CK_PR_FENCE(load_store, CK_MD_X86_MFENCE) +CK_PR_FENCE(store, CK_MD_X86_SFENCE) +CK_PR_FENCE(store_load, CK_MD_X86_MFENCE) +CK_PR_FENCE(memory, CK_MD_X86_MFENCE) +CK_PR_FENCE(release, CK_MD_X86_MFENCE) +CK_PR_FENCE(acquire, CK_MD_X86_MFENCE) +CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE) +CK_PR_FENCE(lock, CK_MD_X86_MFENCE) +CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) #undef CK_PR_FENCE @@ -215,18 +233,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") } #define CK_PR_UNARY_V(K, S, T, C, I) \ - CK_CC_INLINE static void \ - ck_pr_##K##_##S##_zero(T *target, bool *r) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S##_is_zero(T *target) \ { \ + bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ - "=m" (*r) \ + "=qm" (ret) \ : \ : "memory", "cc"); \ - return; \ + return ret; \ } - #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ @@ -289,8 +307,38 @@ CK_PR_GENERATE(xor) #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ @@ -303,7 +351,23 @@ CK_PR_GENERATE(xor) "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif CK_PR_CAS(ptr, void, void *, char, "cmpxchgl") @@ -319,41 +383,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS_S #undef CK_PR_CAS -/* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return (bool)z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "l", "eax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - /* * Atomic bit test operations. */ diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 532d593..4222729 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -58,8 +58,8 @@ #endif /* - * Prevent speculative execution in busy-wait loops (P4 <=) - * or "predefined delay". + * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined + * delay". */ CK_CC_INLINE static void ck_pr_stall(void) @@ -75,18 +75,39 @@ ck_pr_stall(void) __asm__ __volatile__(I ::: "memory"); \ } -CK_PR_FENCE(atomic, "sfence") -CK_PR_FENCE(atomic_store, "sfence") -CK_PR_FENCE(atomic_load, "mfence") -CK_PR_FENCE(store_atomic, "sfence") -CK_PR_FENCE(load_atomic, "mfence") +/* Atomic operations are always serializing. */ +CK_PR_FENCE(atomic, "") +CK_PR_FENCE(atomic_store, "") +CK_PR_FENCE(atomic_load, "") +CK_PR_FENCE(store_atomic, "") +CK_PR_FENCE(load_atomic, "") + +/* Traditional fence interface. */ CK_PR_FENCE(load, "lfence") CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") + +/* Below are stdatomic-style fences. */ + +/* + * Provides load-store and store-store ordering. However, Intel specifies that + * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in + * particular, stores are not re-ordered with respect to prior loads and it is + * really just the stores that are subject to re-ordering). However, we take + * the conservative route as the manuals are too ambiguous for my taste. + */ CK_PR_FENCE(release, "mfence") + +/* + * Provides load-load and load-store ordering. The lfence instruction ensures + * all prior load operations are complete before any subsequent instructions + * actually begin execution. However, the manual also ends up going to describe + * WC memory as a relaxed model. + */ CK_PR_FENCE(acquire, "mfence") + CK_PR_FENCE(acqrel, "mfence") CK_PR_FENCE(lock, "mfence") CK_PR_FENCE(unlock, "mfence") @@ -311,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") } #define CK_PR_UNARY_V(K, S, T, C, I) \ - CK_CC_INLINE static void \ - ck_pr_##K##_##S##_zero(T *target, bool *r) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S##_is_zero(T *target) \ { \ + bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ - "=m" (*r) \ + "=rm" (ret) \ : \ : "memory", "cc"); \ - return; \ + return ret; \ } - #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ @@ -387,8 +408,38 @@ CK_PR_GENERATE(xor) #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ @@ -401,7 +452,23 @@ CK_PR_GENERATE(xor) "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif CK_PR_CAS(ptr, void, void *, char, "cmpxchgq") @@ -421,45 +488,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS_S #undef CK_PR_CAS -/* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "q", "rax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -#ifndef CK_PR_DISABLE_DOUBLE -CK_PR_CAS_O_S(double, double, "q", "rax") -#endif -CK_PR_CAS_O_S(64, uint64_t, "q", "rax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - /* * Contrary to C-interface, alignment requirements are that of uint64_t[2]. */ -- cgit v1.2.3