From 607b673266aaa5adb6e54cbebd50bfad237ba3a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Apr 2024 09:50:17 +0200 Subject: Merging upstream version 0.7.2. Signed-off-by: Daniel Baumann --- include/ck_pr.h | 2 + include/ck_queue.h | 15 ++ include/ck_ring.h | 2 +- include/gcc/aarch64/ck_pr.h | 8 +- include/gcc/arm/ck_f_pr.h | 8 +- include/gcc/arm/ck_pr.h | 6 +- include/gcc/riscv64/ck_f_pr.h | 134 +++++++++++ include/gcc/riscv64/ck_pr.h | 548 ++++++++++++++++++++++++++++++++++++++++++ include/gcc/x86/ck_pr.h | 24 +- include/gcc/x86_64/ck_pr.h | 28 +-- 10 files changed, 737 insertions(+), 38 deletions(-) create mode 100644 include/gcc/riscv64/ck_f_pr.h create mode 100644 include/gcc/riscv64/ck_pr.h (limited to 'include') diff --git a/include/ck_pr.h b/include/ck_pr.h index 8ebf855..cd2a180 100644 --- a/include/ck_pr.h +++ b/include/ck_pr.h @@ -64,6 +64,8 @@ #include "gcc/arm/ck_pr.h" #elif defined(__aarch64__) #include "gcc/aarch64/ck_pr.h" +#elif defined(__riscv) && __riscv_xlen == 64 +#include "gcc/riscv64/ck_pr.h" #elif !defined(__GNUC__) #error Your platform is unsupported #endif diff --git a/include/ck_queue.h b/include/ck_queue.h index fd38d8a..3472b0e 100644 --- a/include/ck_queue.h +++ b/include/ck_queue.h @@ -153,6 +153,11 @@ struct { \ (var); \ (var) = CK_SLIST_NEXT((var), field)) +#define CK_SLIST_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) != NULL ? (var) : CK_SLIST_FIRST((head))); \ + (var); \ + (var) = CK_SLIST_NEXT((var), field)) + #define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_SLIST_FIRST(head); \ (var) && ((tvar) = CK_SLIST_NEXT(var, field), 1); \ @@ -262,6 +267,11 @@ struct { \ (var); \ (var) = CK_STAILQ_NEXT((var), field)) +#define CK_STAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) != NULL ? (var) : CK_STAILQ_FIRST((head))); \ + (var); \ + (var) = CK_STAILQ_NEXT((var), field)) + #define CK_STAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_STAILQ_FIRST((head)); \ (var) && ((tvar) = \ @@ -374,6 +384,11 @@ struct { \ (var); \ (var) = CK_LIST_NEXT((var), field)) +#define CK_LIST_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) != NULL ? (var) : CK_LIST_FIRST((head))); \ + (var); \ + (var) = CK_LIST_NEXT((var), field)) + #define CK_LIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_LIST_FIRST((head)); \ (var) && ((tvar) = CK_LIST_NEXT((var), field), 1); \ diff --git a/include/ck_ring.h b/include/ck_ring.h index 9f6754e..3a52276 100644 --- a/include/ck_ring.h +++ b/include/ck_ring.h @@ -282,7 +282,7 @@ _ck_ring_enqueue_reserve_mp(struct ck_ring *ring, if (size != NULL) *size = (producer - consumer) & mask; - return false; + return NULL; } producer = new_producer; diff --git a/include/gcc/aarch64/ck_pr.h b/include/gcc/aarch64/ck_pr.h index 0a47307..3d269a5 100644 --- a/include/gcc/aarch64/ck_pr.h +++ b/include/gcc/aarch64/ck_pr.h @@ -137,8 +137,8 @@ CK_PR_LOAD_S_64(double, double, "ldr") CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ - __asm__ __volatile__(I " %w1, [%0]" \ - : \ + __asm__ __volatile__(I " %w2, [%1]" \ + : "=m" (*(T *)target) \ : "r" (target), \ "r" (v) \ : "memory"); \ @@ -148,8 +148,8 @@ CK_PR_LOAD_S_64(double, double, "ldr") CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ - __asm__ __volatile__(I " %1, [%0]" \ - : \ + __asm__ __volatile__(I " %2, [%1]" \ + : "=m" (*(T *)target) \ : "r" (target), \ "r" (v) \ : "memory"); \ diff --git a/include/gcc/arm/ck_f_pr.h b/include/gcc/arm/ck_f_pr.h index c508f85..95770e0 100644 --- a/include/gcc/arm/ck_f_pr.h +++ b/include/gcc/arm/ck_f_pr.h @@ -20,7 +20,7 @@ #define CK_F_PR_CAS_16_VALUE #define CK_F_PR_CAS_32 #define CK_F_PR_CAS_32_VALUE -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_F_PR_CAS_64 #define CK_F_PR_CAS_64_VALUE #define CK_F_PR_CAS_DOUBLE @@ -33,7 +33,7 @@ #define CK_F_PR_CAS_INT #define CK_F_PR_CAS_INT_VALUE #define CK_F_PR_CAS_PTR -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_F_PR_CAS_PTR_2 #define CK_F_PR_CAS_PTR_2_VALUE #endif @@ -97,7 +97,7 @@ #define CK_F_PR_INC_UINT #define CK_F_PR_LOAD_16 #define CK_F_PR_LOAD_32 -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_F_PR_LOAD_64 #define CK_F_PR_LOAD_DOUBLE #endif @@ -134,7 +134,7 @@ #define CK_F_PR_STALL #define CK_F_PR_STORE_16 #define CK_F_PR_STORE_32 -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_F_PR_STORE_64 #define CK_F_PR_STORE_DOUBLE #endif diff --git a/include/gcc/arm/ck_pr.h b/include/gcc/arm/ck_pr.h index 841ca21..b69d6c4 100644 --- a/include/gcc/arm/ck_pr.h +++ b/include/gcc/arm/ck_pr.h @@ -54,7 +54,7 @@ ck_pr_stall(void) return; } -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_ISB __asm __volatile("isb" : : "r" (0) : "memory") #define CK_DMB __asm __volatile("dmb" : : "r" (0) : "memory") #define CK_DSB __asm __volatile("dsb" : : "r" (0) : "memory") @@ -132,7 +132,7 @@ CK_PR_LOAD_S(char, char, "ldrb") #undef CK_PR_LOAD_S #undef CK_PR_LOAD -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_PR_DOUBLE_LOAD(T, N) \ CK_CC_INLINE static T \ @@ -181,7 +181,7 @@ CK_PR_STORE_S(char, char, "strb") #undef CK_PR_STORE_S #undef CK_PR_STORE -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define CK_PR_DOUBLE_STORE(T, N) \ CK_CC_INLINE static void \ diff --git a/include/gcc/riscv64/ck_f_pr.h b/include/gcc/riscv64/ck_f_pr.h new file mode 100644 index 0000000..72b8373 --- /dev/null +++ b/include/gcc/riscv64/ck_f_pr.h @@ -0,0 +1,134 @@ +/* DO NOT EDIT. This is auto-generated from feature.sh */ +#define CK_F_PR_ADD_32 +#define CK_F_PR_ADD_64 +#define CK_F_PR_ADD_INT +#define CK_F_PR_ADD_PTR +#define CK_F_PR_ADD_UINT +#define CK_F_PR_AND_32 +#define CK_F_PR_AND_64 +#define CK_F_PR_AND_INT +#define CK_F_PR_AND_PTR +#define CK_F_PR_AND_UINT +#define CK_F_PR_BTC_32 +#define CK_F_PR_BTC_64 +#define CK_F_PR_BTC_INT +#define CK_F_PR_BTC_PTR +#define CK_F_PR_BTC_UINT +#define CK_F_PR_BTR_32 +#define CK_F_PR_BTR_64 +#define CK_F_PR_BTR_INT +#define CK_F_PR_BTR_PTR +#define CK_F_PR_BTR_UINT +#define CK_F_PR_BTS_32 +#define CK_F_PR_BTS_64 +#define CK_F_PR_BTS_INT +#define CK_F_PR_BTS_PTR +#define CK_F_PR_BTS_UINT +#define CK_F_PR_CAS_32 +#define CK_F_PR_CAS_32_VALUE +#define CK_F_PR_CAS_64 +#define CK_F_PR_CAS_64_VALUE +#define CK_F_PR_CAS_DOUBLE +#define CK_F_PR_CAS_DOUBLE_VALUE +#define CK_F_PR_CAS_INT +#define CK_F_PR_CAS_INT_VALUE +#define CK_F_PR_CAS_PTR +#define CK_F_PR_CAS_PTR_VALUE +#define CK_F_PR_CAS_UINT +#define CK_F_PR_CAS_UINT_VALUE +#define CK_F_PR_DEC_32 +#define CK_F_PR_DEC_32_ZERO +#define CK_F_PR_DEC_64 +#define CK_F_PR_DEC_64_ZERO +#define CK_F_PR_DEC_INT +#define CK_F_PR_DEC_INT_ZERO +#define CK_F_PR_DEC_PTR +#define CK_F_PR_DEC_PTR_ZERO +#define CK_F_PR_DEC_UINT +#define CK_F_PR_DEC_UINT_ZERO +#define CK_F_PR_FAA_32 +#define CK_F_PR_FAA_64 +#define CK_F_PR_FAA_INT +#define CK_F_PR_FAA_PTR +#define CK_F_PR_FAA_UINT +#define CK_F_PR_FAS_32 +#define CK_F_PR_FAS_64 +#define CK_F_PR_FAS_INT +#define CK_F_PR_FAS_PTR +#define CK_F_PR_FAS_UINT +#define CK_F_PR_FENCE_STRICT_ACQREL +#define CK_F_PR_FENCE_STRICT_ACQUIRE +#define CK_F_PR_FENCE_STRICT_ATOMIC +#define CK_F_PR_FENCE_STRICT_ATOMIC_LOAD +#define CK_F_PR_FENCE_STRICT_ATOMIC_STORE +#define CK_F_PR_FENCE_STRICT_LOAD +#define CK_F_PR_FENCE_STRICT_LOAD_ATOMIC +#define CK_F_PR_FENCE_STRICT_LOAD_STORE +#define CK_F_PR_FENCE_STRICT_LOCK +#define CK_F_PR_FENCE_STRICT_MEMORY +#define CK_F_PR_FENCE_STRICT_RELEASE +#define CK_F_PR_FENCE_STRICT_STORE +#define CK_F_PR_FENCE_STRICT_STORE_ATOMIC +#define CK_F_PR_FENCE_STRICT_STORE_LOAD +#define CK_F_PR_FENCE_STRICT_UNLOCK +#define CK_F_PR_INC_32 +#define CK_F_PR_INC_32_ZERO +#define CK_F_PR_INC_64 +#define CK_F_PR_INC_64_ZERO +#define CK_F_PR_INC_INT +#define CK_F_PR_INC_INT_ZERO +#define CK_F_PR_INC_PTR +#define CK_F_PR_INC_PTR_ZERO +#define CK_F_PR_INC_UINT +#define CK_F_PR_INC_UINT_ZERO +#define CK_F_PR_LOAD_16 +#define CK_F_PR_LOAD_32 +#define CK_F_PR_LOAD_64 +#define CK_F_PR_LOAD_8 +#define CK_F_PR_LOAD_CHAR +#define CK_F_PR_LOAD_DOUBLE +#define CK_F_PR_LOAD_INT +#define CK_F_PR_LOAD_PTR +#define CK_F_PR_LOAD_SHORT +#define CK_F_PR_LOAD_UINT +#define CK_F_PR_NEG_32 +#define CK_F_PR_NEG_32_ZERO +#define CK_F_PR_NEG_64 +#define CK_F_PR_NEG_64_ZERO +#define CK_F_PR_NEG_INT +#define CK_F_PR_NEG_INT_ZERO +#define CK_F_PR_NEG_PTR +#define CK_F_PR_NEG_PTR_ZERO +#define CK_F_PR_NEG_UINT +#define CK_F_PR_NEG_UINT_ZERO +#define CK_F_PR_NOT_32 +#define CK_F_PR_NOT_64 +#define CK_F_PR_NOT_INT +#define CK_F_PR_NOT_PTR +#define CK_F_PR_NOT_UINT +#define CK_F_PR_OR_32 +#define CK_F_PR_OR_64 +#define CK_F_PR_OR_INT +#define CK_F_PR_OR_PTR +#define CK_F_PR_OR_UINT +#define CK_F_PR_STALL +#define CK_F_PR_STORE_16 +#define CK_F_PR_STORE_32 +#define CK_F_PR_STORE_64 +#define CK_F_PR_STORE_8 +#define CK_F_PR_STORE_CHAR +#define CK_F_PR_STORE_DOUBLE +#define CK_F_PR_STORE_INT +#define CK_F_PR_STORE_PTR +#define CK_F_PR_STORE_SHORT +#define CK_F_PR_STORE_UINT +#define CK_F_PR_SUB_32 +#define CK_F_PR_SUB_64 +#define CK_F_PR_SUB_INT +#define CK_F_PR_SUB_PTR +#define CK_F_PR_SUB_UINT +#define CK_F_PR_XOR_32 +#define CK_F_PR_XOR_64 +#define CK_F_PR_XOR_INT +#define CK_F_PR_XOR_PTR +#define CK_F_PR_XOR_UINT diff --git a/include/gcc/riscv64/ck_pr.h b/include/gcc/riscv64/ck_pr.h new file mode 100644 index 0000000..9193ee8 --- /dev/null +++ b/include/gcc/riscv64/ck_pr.h @@ -0,0 +1,548 @@ +/* + * Copyright 2009-2016 Samy Al Bahra. + * Copyright 2013-2016 Olivier Houchard. + * All rights reserved. + * Copyright 2022 The FreeBSD Foundation. + * + * Portions of this software were developed by Mitchell Horne + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CK_PR_RISCV64_H +#define CK_PR_RISCV64_H + +#ifndef CK_PR_H +#error Do not include this file directly, use ck_pr.h +#endif + +#include +#include + +#if !defined(__riscv_xlen) || __riscv_xlen != 64 +#error "only for riscv64!" +#endif + +/* + * The following represent supported atomic operations. + * These operations may be emulated. + */ +#include "ck_f_pr.h" + +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + +CK_CC_INLINE static void +ck_pr_stall(void) +{ + + __asm__ __volatile__("" ::: "memory"); + return; +} + +/* + * The FENCE instruction is defined in terms of predecessor and successor bits. + * This allows for greater granularity in specifying whether reads (loads) or + * writes (stores) may pass over either side of the fence. + * + * e.g. "fence r,rw" creates a barrier with acquire semantics. + * + * Note that atomic memory operations (AMOs) are defined by the RISC-V spec to + * act as both a load and store memory operation (read-modify-write, in other + * words). Thus, any of r, w, or rw will enforce ordering on an AMO. + */ +#define CK_FENCE(p, s) __asm __volatile("fence " #p "," #s ::: "memory"); +#define CK_FENCE_RW_RW CK_FENCE(rw,rw) + +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + I; \ + } + +CK_PR_FENCE(atomic, CK_FENCE_RW_RW) +CK_PR_FENCE(atomic_store, CK_FENCE(rw,w)) +CK_PR_FENCE(atomic_load, CK_FENCE(rw,r)) +CK_PR_FENCE(store_atomic, CK_FENCE(w,rw)) +CK_PR_FENCE(load_atomic, CK_FENCE(r,rw)) +CK_PR_FENCE(store, CK_FENCE(w,w)) +CK_PR_FENCE(store_load, CK_FENCE(w,r)) +CK_PR_FENCE(load, CK_FENCE(r,r)) +CK_PR_FENCE(load_store, CK_FENCE(r,w)) +CK_PR_FENCE(memory, CK_FENCE_RW_RW) +CK_PR_FENCE(acquire, CK_FENCE(r,rw)) +CK_PR_FENCE(release, CK_FENCE(rw,w)) +CK_PR_FENCE(acqrel, CK_FENCE_RW_RW) +CK_PR_FENCE(lock, CK_FENCE_RW_RW) +CK_PR_FENCE(unlock, CK_FENCE_RW_RW) + +#undef CK_PR_FENCE + +#undef CK_FENCE_RW_RW +#undef CK_FENCE + +/* + * ck_pr_load(3) + */ +#define CK_PR_LOAD(S, M, T, I) \ + CK_CC_INLINE static T \ + ck_pr_md_load_##S(const M *target) \ + { \ + long r = 0; \ + __asm__ __volatile__(I " %0, 0(%1)\n" \ + : "=r" (r) \ + : "r" (target) \ + : "memory"); \ + return ((T)r); \ + } +#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, I) + +CK_PR_LOAD(ptr, void, void *, "ld") +CK_PR_LOAD_S(64, uint64_t, "ld") +CK_PR_LOAD_S(32, uint32_t, "lwu") +CK_PR_LOAD_S(16, uint16_t, "lhu") +CK_PR_LOAD_S(8, uint8_t, "lbu") +CK_PR_LOAD_S(uint, unsigned int, "lwu") +CK_PR_LOAD_S(int, int, "lw") +CK_PR_LOAD_S(short, short, "lh") +CK_PR_LOAD_S(char, char, "lb") +#ifndef CK_PR_DISABLE_DOUBLE +CK_PR_LOAD_S(double, double, "ld") +#endif + +#undef CK_PR_LOAD_S +#undef CK_PR_LOAD + +/* + * ck_pr_store(3) + */ +#define CK_PR_STORE(S, M, T, I) \ + CK_CC_INLINE static void \ + ck_pr_md_store_##S(M *target, T val) \ + { \ + __asm__ __volatile__(I " %1, 0(%0)" \ + : \ + : "r" (target), \ + "r" (val) \ + : "memory"); \ + } +#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, I) + +CK_PR_STORE(ptr, void, const void *, "sd") +CK_PR_STORE_S(64, uint64_t, "sd") +CK_PR_STORE_S(32, uint32_t, "sw") +CK_PR_STORE_S(16, uint16_t, "sh") +CK_PR_STORE_S(8, uint8_t, "sb") +CK_PR_STORE_S(uint, unsigned int, "sw") +CK_PR_STORE_S(int, int, "sw") +CK_PR_STORE_S(short, short, "sh") +CK_PR_STORE_S(char, char, "sb") +#ifndef CK_PR_DISABLE_DOUBLE +CK_PR_STORE_S(double, double, "sd") +#endif + +#undef CK_PR_STORE_S +#undef CK_PR_STORE + +/* + * ck_pr_cas(3) + * + * NB: 'S' is to cast compare to a signed 32-bit integer, so the value will be + * sign-extended when passed to inline asm. GCC does this sign extension + * implicitly, while clang does not. It is necessary because lr.w sign-extends + * the value read from memory, so compare must match that to avoid looping + * unconditionally. + */ +#define CK_PR_CAS(N, M, T, C, S, W) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N##_value(M *target, T compare, T set, M *value) \ + { \ + T previous; \ + int tmp; \ + __asm__ __volatile__("1:" \ + "li %[tmp], 1\n" \ + "lr." W " %[p], %[t]\n" \ + "bne %[p], %[c], 2f\n" \ + "sc." W " %[tmp], %[s], %[t]\n" \ + "bnez %[tmp], 1b\n" \ + "2:" \ + : [p]"=&r" (previous), \ + [tmp]"=&r" (tmp), \ + [t]"+A" (*(C *)target) \ + : [s]"r" (set), \ + [c]"r" ((long)(S)compare) \ + : "memory"); \ + *(T *)value = previous; \ + return (tmp == 0); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N(M *target, T compare, T set) \ + { \ + T previous; \ + int tmp; \ + __asm__ __volatile__("1:" \ + "li %[tmp], 1\n" \ + "lr." W " %[p], %[t]\n" \ + "bne %[p], %[c], 2f\n" \ + "sc." W " %[tmp], %[s], %[t]\n" \ + "bnez %[tmp], 1b\n" \ + "2:" \ + : [p]"=&r" (previous), \ + [tmp]"=&r" (tmp), \ + [t]"+A" (*(C *)target) \ + : [s]"r" (set), \ + [c]"r" ((long)(S)compare) \ + : "memory"); \ + return (tmp == 0); \ + } +#define CK_PR_CAS_S(N, T, W) CK_PR_CAS(N, T, T, T, T, W) +#define CK_PR_CAS_32_S(N, T, W) CK_PR_CAS(N, T, T, T, int32_t, W) + +CK_PR_CAS(ptr, void, void *, uint64_t, uint64_t, "d") +CK_PR_CAS_S(64, uint64_t, "d") +CK_PR_CAS_32_S(32, uint32_t, "w") +CK_PR_CAS_32_S(uint, unsigned int, "w") +CK_PR_CAS_32_S(int, int, "w") +#ifndef CK_PR_DISABLE_DOUBLE +CK_PR_CAS_S(double, double, "d") +#endif + +#undef CK_PR_CAS_S +#undef CK_PR_CAS + +/* + * ck_pr_faa(3) + */ +#define CK_PR_FAA(N, M, T, C, W) \ + CK_CC_INLINE static T \ + ck_pr_faa_##N(M *target, T delta) \ + { \ + T previous; \ + __asm__ __volatile__("amoadd." W " %0, %2, %1\n" \ + : "=&r" (previous), \ + "+A" (*(C *)target) \ + : "r" (delta) \ + : "memory"); \ + return (previous); \ + } +#define CK_PR_FAA_S(N, T, W) CK_PR_FAA(N, T, T, T, W) + +CK_PR_FAA(ptr, void, void *, uint64_t, "d") +CK_PR_FAA_S(64, uint64_t, "d") +CK_PR_FAA_S(32, uint32_t, "w") +CK_PR_FAA_S(uint, unsigned int, "w") +CK_PR_FAA_S(int, int, "w") + +#undef CK_PR_FAA_S +#undef CK_PR_FAA + +/* + * ck_pr_fas(3) + */ +#define CK_PR_FAS(N, M, T, C, W) \ + CK_CC_INLINE static T \ + ck_pr_fas_##N(M *target, T val) \ + { \ + T previous; \ + __asm__ __volatile__("amoswap." W " %0, %2, %1\n" \ + : "=&r" (previous), \ + "+A" (*(C *)target) \ + : "r" (val) \ + : "memory"); \ + return (previous); \ + } +#define CK_PR_FAS_S(N, T, W) CK_PR_FAS(N, T, T, T, W) + +CK_PR_FAS(ptr, void, void *, uint64_t, "d") +CK_PR_FAS_S(64, uint64_t, "d") +CK_PR_FAS_S(32, uint32_t, "w") +CK_PR_FAS_S(int, int, "w") +CK_PR_FAS_S(uint, unsigned int, "w") + +#undef CK_PR_FAS_S +#undef CK_PR_FAS + +/* + * ck_pr_add(3) + */ +#define CK_PR_ADD(N, M, T, C, W) \ + CK_CC_INLINE static void \ + ck_pr_add_##N(M *target, T val) \ + { \ + __asm__ __volatile__("amoadd." W " zero, %1, %0\n" \ + : "+A" (*(C *)target) \ + : "r" (val) \ + : "memory"); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_add_##N##_is_zero(M *target, T val) \ + { \ + T previous; \ + __asm__ __volatile__("amoadd." W " %0, %2, %1\n" \ + : "=&r" (previous), \ + "+A" (*(C *)target) \ + : "r" (val) \ + : "memory"); \ + return (((C)previous + (C)val) == 0); \ + } +#define CK_PR_ADD_S(N, T, W) CK_PR_ADD(N, T, T, T, W) + +CK_PR_ADD(ptr, void, void *, uint64_t, "d") +CK_PR_ADD_S(64, uint64_t, "d") +CK_PR_ADD_S(32, uint32_t, "w") +CK_PR_ADD_S(uint, unsigned int, "w") +CK_PR_ADD_S(int, int, "w") + +#undef CK_PR_ADD_S +#undef CK_PR_ADD + +/* + * ck_pr_inc(3) + * + * Implemented in terms of ck_pr_add(3); RISC-V has no atomic inc or dec + * instructions. + */ +#define CK_PR_INC(N, M, T, W) \ + CK_CC_INLINE static void \ + ck_pr_inc_##N(M *target) \ + { \ + ck_pr_add_##N(target, (T)1); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_inc_##N##_is_zero(M *target) \ + { \ + return (ck_pr_add_##N##_is_zero(target, (T)1)); \ + } +#define CK_PR_INC_S(N, T, W) CK_PR_INC(N, T, T, W) + +CK_PR_INC(ptr, void, void *, "d") +CK_PR_INC_S(64, uint64_t, "d") +CK_PR_INC_S(32, uint32_t, "w") +CK_PR_INC_S(uint, unsigned int, "w") +CK_PR_INC_S(int, int, "w") + +#undef CK_PR_INC_S +#undef CK_PR_INC + +/* + * ck_pr_sub(3) + */ +#define CK_PR_SUB(N, M, T, C, W) \ + CK_CC_INLINE static void \ + ck_pr_sub_##N(M *target, T val) \ + { \ + __asm__ __volatile__("amoadd." W " zero, %1, %0\n" \ + : "+A" (*(C *)target) \ + : "r" (-(C)val) \ + : "memory"); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_sub_##N##_is_zero(M *target, T val) \ + { \ + T previous; \ + __asm__ __volatile__("amoadd." W " %0, %2, %1\n" \ + : "=&r" (previous), \ + "+A" (*(C *)target) \ + : "r" (-(C)val) \ + : "memory"); \ + return (((C)previous - (C)val) == 0); \ + } +#define CK_PR_SUB_S(N, T, W) CK_PR_SUB(N, T, T, T, W) + +CK_PR_SUB(ptr, void, void *, uint64_t, "d") +CK_PR_SUB_S(64, uint64_t, "d") +CK_PR_SUB_S(32, uint32_t, "w") +CK_PR_SUB_S(uint, unsigned int, "w") +CK_PR_SUB_S(int, int, "w") + +#undef CK_PR_SUB_S +#undef CK_PR_SUB + +/* + * ck_pr_dec(3) + */ +#define CK_PR_DEC(N, M, T, W) \ + CK_CC_INLINE static void \ + ck_pr_dec_##N(M *target) \ + { \ + ck_pr_sub_##N(target, (T)1); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_dec_##N##_is_zero(M *target) \ + { \ + return (ck_pr_sub_##N##_is_zero(target, (T)1)); \ + } +#define CK_PR_DEC_S(N, T, W) CK_PR_DEC(N, T, T, W) + +CK_PR_DEC(ptr, void, void *, "d") +CK_PR_DEC_S(64, uint64_t, "d") +CK_PR_DEC_S(32, uint32_t, "w") +CK_PR_DEC_S(uint, unsigned int, "w") +CK_PR_DEC_S(int, int, "w") + +#undef CK_PR_DEC_S +#undef CK_PR_DEC + +/* + * ck_pr_neg(3) + */ +#define CK_PR_NEG(N, M, T, C, W) \ + CK_CC_INLINE static void \ + ck_pr_neg_##N(M *target) \ + { \ + __asm__ __volatile__("1:" \ + "lr." W " t0, %0\n" \ + "sub t0, zero, t0\n" \ + "sc." W " t1, t0, %0\n" \ + "bnez t1, 1b\n" \ + : "+A" (*(C *)target) \ + : \ + : "t0", "t1", "memory"); \ + } +#define CK_PR_NEG_S(N, T, W) CK_PR_NEG(N, T, T, T, W) + +CK_PR_NEG(ptr, void, void *, uint64_t, "d") +CK_PR_NEG_S(64, uint64_t, "d") +CK_PR_NEG_S(32, uint32_t, "w") +CK_PR_NEG_S(uint, unsigned int, "w") +CK_PR_NEG_S(int, int, "w") + +#undef CK_PR_NEG_S +#undef CK_PR_NEG + +/* + * ck_pr_not(3) + */ +#define CK_PR_NOT(N, M, T, C, W) \ + CK_CC_INLINE static void \ + ck_pr_not_##N(M *target) \ + { \ + __asm__ __volatile__("1:" \ + "lr." W " t0, %0\n" \ + "not t0, t0\n" \ + "sc." W " t1, t0, %0\n" \ + "bnez t1, 1b\n" \ + : "+A" (*(C *)target) \ + : \ + : "t0", "t1", "memory"); \ + } +#define CK_PR_NOT_S(N, T, W) CK_PR_NOT(N, T, T, T, W) + +CK_PR_NOT(ptr, void, void *, uint64_t, "d") +CK_PR_NOT_S(64, uint64_t, "d") +CK_PR_NOT_S(32, uint32_t, "w") +CK_PR_NOT_S(uint, unsigned int, "w") +CK_PR_NOT_S(int, int, "w") + +#undef CK_PR_NOT_S +#undef CK_PR_NOT + +/* + * ck_pr_and(3), ck_pr_or(3), and ck_pr_xor(3) + */ +#define CK_PR_BINARY(O, N, M, T, C, I, W) \ + CK_CC_INLINE static void \ + ck_pr_##O##_##N(M *target, T delta) \ + { \ + __asm__ __volatile__(I "." W " zero, %1, %0\n" \ + : "+A" (*(C *)target) \ + : "r" (delta) \ + : "memory"); \ + } + +CK_PR_BINARY(and, ptr, void, void *, uint64_t, "amoand", "d") +CK_PR_BINARY(or, ptr, void, void *, uint64_t, "amoor", "d") +CK_PR_BINARY(xor, ptr, void, void *, uint64_t, "amoxor", "d") + +#define CK_PR_BINARY_S(S, T, W) \ + CK_PR_BINARY(and, S, T, T, T, "amoand", W) \ + CK_PR_BINARY(or, S, T, T, T, "amoor", W) \ + CK_PR_BINARY(xor, S, T, T, T, "amoxor", W) \ + +CK_PR_BINARY_S(64, uint64_t, "d") +CK_PR_BINARY_S(32, uint32_t, "w") +CK_PR_BINARY_S(uint, unsigned int, "w") +CK_PR_BINARY_S(int, int, "w") + +#undef CK_PR_BINARY_S +#undef CK_PR_BINARY + +/* + * ck_pr_btc(3), ck_pr_btr(3), and ck_pr_bts(3) + */ +#define CK_PR_BTX(K, S, I, W, M, C, O) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S(M *target, unsigned int idx) \ + { \ + C ret; \ + C mask = (C)0x1 << idx; \ + __asm__ __volatile__(I "." W " %1, %2, %0\n" \ + : "+A" (*(C *)target), \ + "=r" (ret) \ + : "r" (O(mask)) \ + : "memory", "cc"); \ + return ((ret & mask) != 0); \ + } + +#define CK_PR_BTC(S, W, M, C) CK_PR_BTX(btc, S, "amoxor", W, M, C, 0+) +#define CK_PR_BTC_S(S, W, T) CK_PR_BTC(S, W, T, T) + +CK_PR_BTC(ptr, "d", void, uint64_t) +CK_PR_BTC_S(64, "d", uint64_t) +CK_PR_BTC_S(32, "w", uint32_t) +CK_PR_BTC_S(uint, "w", unsigned int) +CK_PR_BTC_S(int, "w", int) + +#undef CK_PR_BTC_S +#undef CK_PR_BTC + +#define CK_PR_BTR(S, W, M, C) CK_PR_BTX(btr, S, "amoand", W, M, C, ~) +#define CK_PR_BTR_S(S, W, T) CK_PR_BTR(S, W, T, T) + +CK_PR_BTR(ptr, "d", void, uint64_t) +CK_PR_BTR_S(64, "d", uint64_t) +CK_PR_BTR_S(32, "w", uint32_t) +CK_PR_BTR_S(uint, "w", unsigned int) +CK_PR_BTR_S(int, "w", int) + +#undef CK_PR_BTR_S +#undef CK_PR_BTR + +#define CK_PR_BTS(S, W, M, C) CK_PR_BTX(bts, S, "amoor", W, M, C, 0+) +#define CK_PR_BTS_S(S, W, T) CK_PR_BTS(S, W, T, T) + +CK_PR_BTS(ptr, "d", void, uint64_t) +CK_PR_BTS_S(64, "d", uint64_t) +CK_PR_BTS_S(32, "w", uint32_t) +CK_PR_BTS_S(uint, "w", unsigned int) +CK_PR_BTS_S(int, "w", int) + +#undef CK_PR_BTS_S +#undef CK_PR_BTS + +#undef CK_PR_BTX + +#endif /* CK_PR_RISCV64_H */ diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index 5194dee..12291c8 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -120,7 +120,7 @@ CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) return v; \ } -CK_PR_FAS(ptr, void, void *, char, "xchgl") +CK_PR_FAS(ptr, void, void *, uint32_t, "xchgl") #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) @@ -146,7 +146,7 @@ CK_PR_FAS_S(8, uint8_t, "xchgb") return (r); \ } -CK_PR_LOAD(ptr, void, void *, char, "movl") +CK_PR_LOAD(ptr, void, void *, uint32_t, "movl") #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) @@ -171,7 +171,7 @@ CK_PR_LOAD_S(8, uint8_t, "movb") return; \ } -CK_PR_STORE(ptr, void, const void *, char, "movl") +CK_PR_STORE(ptr, void, const void *, uint32_t, "movl") #define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) @@ -200,7 +200,7 @@ CK_PR_STORE_S(8, uint8_t, "movb") return (d); \ } -CK_PR_FAA(ptr, void, uintptr_t, char, "xaddl") +CK_PR_FAA(ptr, void, uintptr_t, uint32_t, "xaddl") #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) @@ -248,7 +248,7 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ - CK_PR_UNARY(K, ptr, void, char, #K "l") \ + CK_PR_UNARY(K, ptr, void, uint32_t, #K "l") \ CK_PR_UNARY_S(K, char, char, #K "b") \ CK_PR_UNARY_S(K, int, int, #K "l") \ CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ @@ -288,7 +288,7 @@ CK_PR_GENERATE(not) #define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) #define CK_PR_GENERATE(K) \ - CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "l") \ + CK_PR_BINARY(K, ptr, void, uintptr_t, uint32_t, #K "l") \ CK_PR_BINARY_S(K, char, char, #K "b") \ CK_PR_BINARY_S(K, int, int, #K "l") \ CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ @@ -369,7 +369,7 @@ CK_PR_GENERATE(xor) } #endif -CK_PR_CAS(ptr, void, void *, char, "cmpxchgl") +CK_PR_CAS(ptr, void, void *, uint32_t, "cmpxchgl") #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) @@ -401,11 +401,11 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) -#define CK_PR_GENERATE(K) \ - CK_PR_BT(K, ptr, void, uint32_t, char, #K "l %2, %0") \ - CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ - CK_PR_BT_S(K, int, int, #K "l %2, %0") \ - CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ +#define CK_PR_GENERATE(K) \ + CK_PR_BT(K, ptr, void, uint32_t, uint32_t, #K "l %2, %0") \ + CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ + CK_PR_BT_S(K, int, int, #K "l %2, %0") \ + CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") CK_PR_GENERATE(btc) diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 4222729..b737c3a 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -149,7 +149,7 @@ ck_pr_rfo(const void *m) return v; \ } -CK_PR_FAS(ptr, void, void *, char, "xchgq") +CK_PR_FAS(ptr, void, void *, uint64_t, "xchgq") #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) @@ -182,7 +182,7 @@ CK_PR_FAS_S(8, uint8_t, "xchgb") return (r); \ } -CK_PR_LOAD(ptr, void, void *, char, "movq") +CK_PR_LOAD(ptr, void, void *, uint64_t, "movq") #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) @@ -223,7 +223,7 @@ ck_pr_load_ptr_2(const void *t, void *v) #define CK_PR_LOAD_2(S, W, T) \ CK_CC_INLINE static void \ - ck_pr_md_load_##S##_##W(const T t[2], T v[2]) \ + ck_pr_md_load_##S##_##W(const T t[W], T v[W]) \ { \ ck_pr_load_64_2((const uint64_t *)(const void *)t, \ (uint64_t *)(void *)v); \ @@ -264,7 +264,7 @@ CK_PR_LOAD_2(8, 16, uint8_t) return; \ } -CK_PR_STORE_IMM(ptr, void, const void *, char, "movq", CK_CC_IMM_U32) +CK_PR_STORE_IMM(ptr, void, const void *, uint64_t, "movq", CK_CC_IMM_U32) #ifndef CK_PR_DISABLE_DOUBLE CK_PR_STORE(double, double, double, double, "movq") #endif @@ -298,7 +298,7 @@ CK_PR_STORE_S(8, uint8_t, "movb", CK_CC_IMM_U32) return (d); \ } -CK_PR_FAA(ptr, void, uintptr_t, char, "xaddq") +CK_PR_FAA(ptr, void, uintptr_t, uint64_t, "xaddq") #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) @@ -347,7 +347,7 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ - CK_PR_UNARY(K, ptr, void, char, #K "q") \ + CK_PR_UNARY(K, ptr, void, uint64_t, #K "q") \ CK_PR_UNARY_S(K, char, char, #K "b") \ CK_PR_UNARY_S(K, int, int, #K "l") \ CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ @@ -388,7 +388,7 @@ CK_PR_GENERATE(not) #define CK_PR_BINARY_S(K, S, T, I, O) CK_PR_BINARY(K, S, T, T, T, I, O) #define CK_PR_GENERATE(K) \ - CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "q", CK_CC_IMM_U32) \ + CK_PR_BINARY(K, ptr, void, uintptr_t, uint64_t, #K "q", CK_CC_IMM_U32) \ CK_PR_BINARY_S(K, char, char, #K "b", CK_CC_IMM_S32) \ CK_PR_BINARY_S(K, int, int, #K "l", CK_CC_IMM_S32) \ CK_PR_BINARY_S(K, uint, unsigned int, #K "l", CK_CC_IMM_U32) \ @@ -470,7 +470,7 @@ CK_PR_GENERATE(xor) } #endif -CK_PR_CAS(ptr, void, void *, char, "cmpxchgq") +CK_PR_CAS(ptr, void, void *, uint64_t, "cmpxchgq") #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) @@ -594,12 +594,12 @@ CK_PR_CAS_V(8, 16, uint8_t) #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) -#define CK_PR_GENERATE(K) \ - CK_PR_BT(K, ptr, void, uint64_t, char, #K "q %2, %0") \ - CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ - CK_PR_BT_S(K, int, int, #K "l %2, %0") \ - CK_PR_BT_S(K, 64, uint64_t, #K "q %2, %0") \ - CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ +#define CK_PR_GENERATE(K) \ + CK_PR_BT(K, ptr, void, uint64_t, uint64_t, #K "q %2, %0") \ + CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ + CK_PR_BT_S(K, int, int, #K "l %2, %0") \ + CK_PR_BT_S(K, 64, uint64_t, #K "q %2, %0") \ + CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") CK_PR_GENERATE(btc) -- cgit v1.2.3