deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H

#define ATOMIC_INIT(...) {__VA_ARGS__}

typedef enum {
	atomic_memory_order_relaxed,
	atomic_memory_order_acquire,
	atomic_memory_order_release,
	atomic_memory_order_acq_rel,
	atomic_memory_order_seq_cst
} atomic_memory_order_t;

ATOMIC_INLINE void
atomic_fence(atomic_memory_order_t mo) {
	/* Easy cases first: no barrier, and full barrier. */
	if (mo == atomic_memory_order_relaxed) {
		asm volatile("" ::: "memory");
		return;
	}
	if (mo == atomic_memory_order_seq_cst) {
		asm volatile("" ::: "memory");
		__sync_synchronize();
		asm volatile("" ::: "memory");
		return;
	}
	asm volatile("" ::: "memory");
#  if defined(__i386__) || defined(__x86_64__)
	/* This is implicit on x86. */
#  elif defined(__ppc64__)
	asm volatile("lwsync");
#  elif defined(__ppc__)
	asm volatile("sync");
#  elif defined(__sparc__) && defined(__arch64__)
	if (mo == atomic_memory_order_acquire) {
		asm volatile("membar #LoadLoad | #LoadStore");
	} else if (mo == atomic_memory_order_release) {
		asm volatile("membar #LoadStore | #StoreStore");
	} else {
		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
	}
#  else
	__sync_synchronize();
#  endif
	asm volatile("" ::: "memory");
}

/*
 * A correct implementation of seq_cst loads and stores on weakly ordered
 * architectures could do either of the following:
 *   1. store() is weak-fence -> store -> strong fence, load() is load ->
 *      strong-fence.
 *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
 *      weak-fence.
 * The tricky thing is, load() and store() above can be the load or store
 * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
 * means going with strategy 2.
 * On strongly ordered architectures, the natural strategy is to stick a strong
 * fence after seq_cst stores, and have naked loads.  So we want the strong
 * fences in different places on different architectures.
 * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
 * accomplish this.
 */

ATOMIC_INLINE void
atomic_pre_sc_load_fence() {
#  if defined(__i386__) || defined(__x86_64__) ||			\
    (defined(__sparc__) && defined(__arch64__))
	atomic_fence(atomic_memory_order_relaxed);
#  else
	atomic_fence(atomic_memory_order_seq_cst);
#  endif
}

ATOMIC_INLINE void
atomic_post_sc_store_fence() {
#  if defined(__i386__) || defined(__x86_64__) ||			\
    (defined(__sparc__) && defined(__arch64__))
	atomic_fence(atomic_memory_order_seq_cst);
#  else
	atomic_fence(atomic_memory_order_relaxed);
#  endif

}

#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
    /* unused */ lg_size)						\
typedef struct {							\
	type volatile repr;						\
} atomic_##short_type##_t;						\
									\
ATOMIC_INLINE type							\
atomic_load_##short_type(const atomic_##short_type##_t *a,		\
    atomic_memory_order_t mo) {						\
	if (mo == atomic_memory_order_seq_cst) {			\
		atomic_pre_sc_load_fence();				\
	}								\
	type result = a->repr;						\
	if (mo != atomic_memory_order_relaxed) {			\
		atomic_fence(atomic_memory_order_acquire);		\
	}								\
	return result;							\
}									\
									\
ATOMIC_INLINE void							\
atomic_store_##short_type(atomic_##short_type##_t *a,			\
    type val, atomic_memory_order_t mo) {				\
	if (mo != atomic_memory_order_relaxed) {			\
		atomic_fence(atomic_memory_order_release);		\
	}								\
	a->repr = val;							\
	if (mo == atomic_memory_order_seq_cst) {			\
		atomic_post_sc_store_fence();				\
	}								\
}									\
									\
ATOMIC_INLINE type							\
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
    atomic_memory_order_t mo) {                  					 \
	/*								\
	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
	 */								\
	while (true) {							\
		type old = a->repr;					\
		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
			return old;					\
		}							\
	}								\
}									\
									\
ATOMIC_INLINE bool							\
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
    type *expected, type desired,                                     \
    atomic_memory_order_t success_mo,                          \
    atomic_memory_order_t failure_mo) {				                \
	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
	    desired);							\
	if (prev == *expected) {					\
		return true;						\
	} else {							\
		*expected = prev;					\
		return false;						\
	}								\
}									\
ATOMIC_INLINE bool							\
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
    type *expected, type desired,                                       \
    atomic_memory_order_t success_mo,                            \
    atomic_memory_order_t failure_mo) {                          \
	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
	    desired);							\
	if (prev == *expected) {					\
		return true;						\
	} else {							\
		*expected = prev;					\
		return false;						\
	}								\
}

#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
    /* unused */ lg_size)						\
JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
									\
ATOMIC_INLINE type							\
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
    atomic_memory_order_t mo) {						\
	return __sync_fetch_and_add(&a->repr, val);			\
}									\
									\
ATOMIC_INLINE type							\
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
    atomic_memory_order_t mo) {						\
	return __sync_fetch_and_sub(&a->repr, val);			\
}									\
									\
ATOMIC_INLINE type							\
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
    atomic_memory_order_t mo) {						\
	return __sync_fetch_and_and(&a->repr, val);			\
}									\
									\
ATOMIC_INLINE type							\
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
    atomic_memory_order_t mo) {						\
	return __sync_fetch_and_or(&a->repr, val);			\
}									\
									\
ATOMIC_INLINE type							\
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
    atomic_memory_order_t mo) {						\
	return __sync_fetch_and_xor(&a->repr, val);			\
}

#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */