From cca122ce9a0e5c8ebe745b6ae785b5b5c80f4fc6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 23 Jul 2021 13:28:52 +0200 Subject: Adding upstream version 0.7.1. Signed-off-by: Daniel Baumann --- include/gcc/x86_64/ck_pr.h | 132 +++++++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 52 deletions(-) (limited to 'include/gcc/x86_64/ck_pr.h') diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 532d593..4222729 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -58,8 +58,8 @@ #endif /* - * Prevent speculative execution in busy-wait loops (P4 <=) - * or "predefined delay". + * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined + * delay". */ CK_CC_INLINE static void ck_pr_stall(void) @@ -75,18 +75,39 @@ ck_pr_stall(void) __asm__ __volatile__(I ::: "memory"); \ } -CK_PR_FENCE(atomic, "sfence") -CK_PR_FENCE(atomic_store, "sfence") -CK_PR_FENCE(atomic_load, "mfence") -CK_PR_FENCE(store_atomic, "sfence") -CK_PR_FENCE(load_atomic, "mfence") +/* Atomic operations are always serializing. */ +CK_PR_FENCE(atomic, "") +CK_PR_FENCE(atomic_store, "") +CK_PR_FENCE(atomic_load, "") +CK_PR_FENCE(store_atomic, "") +CK_PR_FENCE(load_atomic, "") + +/* Traditional fence interface. */ CK_PR_FENCE(load, "lfence") CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") + +/* Below are stdatomic-style fences. */ + +/* + * Provides load-store and store-store ordering. However, Intel specifies that + * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in + * particular, stores are not re-ordered with respect to prior loads and it is + * really just the stores that are subject to re-ordering). However, we take + * the conservative route as the manuals are too ambiguous for my taste. + */ CK_PR_FENCE(release, "mfence") + +/* + * Provides load-load and load-store ordering. The lfence instruction ensures + * all prior load operations are complete before any subsequent instructions + * actually begin execution. However, the manual also ends up going to describe + * WC memory as a relaxed model. + */ CK_PR_FENCE(acquire, "mfence") + CK_PR_FENCE(acqrel, "mfence") CK_PR_FENCE(lock, "mfence") CK_PR_FENCE(unlock, "mfence") @@ -311,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") } #define CK_PR_UNARY_V(K, S, T, C, I) \ - CK_CC_INLINE static void \ - ck_pr_##K##_##S##_zero(T *target, bool *r) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S##_is_zero(T *target) \ { \ + bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ - "=m" (*r) \ + "=rm" (ret) \ : \ : "memory", "cc"); \ - return; \ + return ret; \ } - #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ @@ -387,8 +408,38 @@ CK_PR_GENERATE(xor) #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ @@ -401,7 +452,23 @@ CK_PR_GENERATE(xor) "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif CK_PR_CAS(ptr, void, void *, char, "cmpxchgq") @@ -421,45 +488,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS_S #undef CK_PR_CAS -/* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "q", "rax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -#ifndef CK_PR_DISABLE_DOUBLE -CK_PR_CAS_O_S(double, double, "q", "rax") -#endif -CK_PR_CAS_O_S(64, uint64_t, "q", "rax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - /* * Contrary to C-interface, alignment requirements are that of uint64_t[2]. */ -- cgit v1.2.3