summaryrefslogtreecommitdiffstats
path: root/include/gcc/x86_64/ck_pr.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/gcc/x86_64/ck_pr.h')
-rw-r--r--include/gcc/x86_64/ck_pr.h132
1 files changed, 80 insertions, 52 deletions
diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h
index 532d593..4222729 100644
--- a/include/gcc/x86_64/ck_pr.h
+++ b/include/gcc/x86_64/ck_pr.h
@@ -58,8 +58,8 @@
#endif
/*
- * Prevent speculative execution in busy-wait loops (P4 <=)
- * or "predefined delay".
+ * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
+ * delay".
*/
CK_CC_INLINE static void
ck_pr_stall(void)
@@ -75,18 +75,39 @@ ck_pr_stall(void)
__asm__ __volatile__(I ::: "memory"); \
}
-CK_PR_FENCE(atomic, "sfence")
-CK_PR_FENCE(atomic_store, "sfence")
-CK_PR_FENCE(atomic_load, "mfence")
-CK_PR_FENCE(store_atomic, "sfence")
-CK_PR_FENCE(load_atomic, "mfence")
+/* Atomic operations are always serializing. */
+CK_PR_FENCE(atomic, "")
+CK_PR_FENCE(atomic_store, "")
+CK_PR_FENCE(atomic_load, "")
+CK_PR_FENCE(store_atomic, "")
+CK_PR_FENCE(load_atomic, "")
+
+/* Traditional fence interface. */
CK_PR_FENCE(load, "lfence")
CK_PR_FENCE(load_store, "mfence")
CK_PR_FENCE(store, "sfence")
CK_PR_FENCE(store_load, "mfence")
CK_PR_FENCE(memory, "mfence")
+
+/* Below are stdatomic-style fences. */
+
+/*
+ * Provides load-store and store-store ordering. However, Intel specifies that
+ * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in
+ * particular, stores are not re-ordered with respect to prior loads and it is
+ * really just the stores that are subject to re-ordering). However, we take
+ * the conservative route as the manuals are too ambiguous for my taste.
+ */
CK_PR_FENCE(release, "mfence")
+
+/*
+ * Provides load-load and load-store ordering. The lfence instruction ensures
+ * all prior load operations are complete before any subsequent instructions
+ * actually begin execution. However, the manual also ends up going to describe
+ * WC memory as a relaxed model.
+ */
CK_PR_FENCE(acquire, "mfence")
+
CK_PR_FENCE(acqrel, "mfence")
CK_PR_FENCE(lock, "mfence")
CK_PR_FENCE(unlock, "mfence")
@@ -311,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb")
}
#define CK_PR_UNARY_V(K, S, T, C, I) \
- CK_CC_INLINE static void \
- ck_pr_##K##_##S##_zero(T *target, bool *r) \
+ CK_CC_INLINE static bool \
+ ck_pr_##K##_##S##_is_zero(T *target) \
{ \
+ bool ret; \
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
: "+m" (*(C *)target), \
- "=m" (*r) \
+ "=rm" (ret) \
: \
: "memory", "cc"); \
- return; \
+ return ret; \
}
-
#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
#define CK_PR_GENERATE(K) \
@@ -387,8 +408,38 @@ CK_PR_GENERATE(xor)
#undef CK_PR_BINARY
/*
- * Atomic compare and swap.
+ * Atomic compare and swap, with a variant that sets *v to the old value of target.
*/
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+#define CK_PR_CAS(S, M, T, C, I) \
+ CK_CC_INLINE static bool \
+ ck_pr_cas_##S(M *target, T compare, T set) \
+ { \
+ bool z; \
+ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \
+ : "+m" (*(C *)target), \
+ "=@ccz" (z), \
+ /* RAX is clobbered by cmpxchg. */ \
+ "+a" (compare) \
+ : "q" (set) \
+ : "memory", "cc"); \
+ return z; \
+ } \
+ \
+ CK_CC_INLINE static bool \
+ ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
+ { \
+ bool z; \
+ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
+ : "+m" (*(C *)target), \
+ "=@ccz" (z), \
+ "+a" (compare) \
+ : "q" (set) \
+ : "memory", "cc"); \
+ *(T *)v = compare; \
+ return z; \
+ }
+#else
#define CK_PR_CAS(S, M, T, C, I) \
CK_CC_INLINE static bool \
ck_pr_cas_##S(M *target, T compare, T set) \
@@ -401,7 +452,23 @@ CK_PR_GENERATE(xor)
"a" (compare) \
: "memory", "cc"); \
return z; \
+ } \
+ \
+ CK_CC_INLINE static bool \
+ ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
+ { \
+ bool z; \
+ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
+ "setz %1;" \
+ : "+m" (*(C *)target), \
+ "=q" (z), \
+ "+a" (compare) \
+ : "q" (set) \
+ : "memory", "cc"); \
+ *(T *)v = compare; \
+ return z; \
}
+#endif
CK_PR_CAS(ptr, void, void *, char, "cmpxchgq")
@@ -422,45 +489,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb")
#undef CK_PR_CAS
/*
- * Compare and swap, set *v to old value of target.
- */
-#define CK_PR_CAS_O(S, M, T, C, I, R) \
- CK_CC_INLINE static bool \
- ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
- { \
- bool z; \
- __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \
- "mov %% " R ", %2;" \
- "setz %1;" \
- : "+m" (*(C *)target), \
- "=a" (z), \
- "=m" (*(C *)v) \
- : "q" (set), \
- "a" (compare) \
- : "memory", "cc"); \
- return z; \
- }
-
-CK_PR_CAS_O(ptr, void, void *, char, "q", "rax")
-
-#define CK_PR_CAS_O_S(S, T, I, R) \
- CK_PR_CAS_O(S, T, T, T, I, R)
-
-CK_PR_CAS_O_S(char, char, "b", "al")
-CK_PR_CAS_O_S(int, int, "l", "eax")
-CK_PR_CAS_O_S(uint, unsigned int, "l", "eax")
-#ifndef CK_PR_DISABLE_DOUBLE
-CK_PR_CAS_O_S(double, double, "q", "rax")
-#endif
-CK_PR_CAS_O_S(64, uint64_t, "q", "rax")
-CK_PR_CAS_O_S(32, uint32_t, "l", "eax")
-CK_PR_CAS_O_S(16, uint16_t, "w", "ax")
-CK_PR_CAS_O_S(8, uint8_t, "b", "al")
-
-#undef CK_PR_CAS_O_S
-#undef CK_PR_CAS_O
-
-/*
* Contrary to C-interface, alignment requirements are that of uint64_t[2].
*/
CK_CC_INLINE static bool