60 //
61 // - release orders Store|Store, (maps to lwsync)
62 // Load|Store
63 // - acquire orders Load|Store, (maps to lwsync)
64 // Load|Load
65 // - fence orders Store|Store, (maps to sync)
66 // Load|Store,
67 // Load|Load,
68 // Store|Load
69 //
70
71 #define strasm_sync "\n sync \n"
72 #define strasm_lwsync "\n lwsync \n"
73 #define strasm_isync "\n isync \n"
74 #define strasm_release strasm_lwsync
75 #define strasm_acquire strasm_lwsync
76 #define strasm_fence strasm_sync
77 #define strasm_nobarrier ""
78 #define strasm_nobarrier_clobber_memory ""
79
80 template<size_t byte_size>
81 struct Atomic::PlatformAdd
82 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
83 {
84 template<typename I, typename D>
85 D add_and_fetch(I add_value, D volatile* dest) const;
86 };
87
88 template<>
89 template<typename I, typename D>
90 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
91 STATIC_ASSERT(4 == sizeof(I));
92 STATIC_ASSERT(4 == sizeof(D));
93
94 D result;
95
96 __asm__ __volatile__ (
97 strasm_lwsync
98 "1: lwarx %0, 0, %2 \n"
99 " add %0, %0, %1 \n"
100 " stwcx. %0, 0, %2 \n"
101 " bne- 1b \n"
102 strasm_isync
103 : /*%0*/"=&r" (result)
104 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
105 : "cc", "memory" );
106
107 return result;
108 }
109
110
111 template<>
112 template<typename I, typename D>
113 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
114 STATIC_ASSERT(8 == sizeof(I));
115 STATIC_ASSERT(8 == sizeof(D));
116
117 D result;
118
119 __asm__ __volatile__ (
120 strasm_lwsync
121 "1: ldarx %0, 0, %2 \n"
122 " add %0, %0, %1 \n"
123 " stdcx. %0, 0, %2 \n"
124 " bne- 1b \n"
125 strasm_isync
126 : /*%0*/"=&r" (result)
127 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
128 : "cc", "memory" );
129
130 return result;
131 }
132
133 template<>
134 template<typename T>
135 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
136 T volatile* dest) const {
137 // Note that xchg doesn't necessarily do an acquire
138 // (see synchronizer.cpp).
139
140 T old_value;
141 const uint64_t zero = 0;
142
143 __asm__ __volatile__ (
144 /* lwsync */
145 strasm_lwsync
146 /* atomic loop */
147 "1: \n"
148 " lwarx %[old_value], %[dest], %[zero] \n"
149 " stwcx. %[exchange_value], %[dest], %[zero] \n"
190 /* isync */
191 strasm_sync
192 /* exit */
193 "2: \n"
194 /* out */
195 : [old_value] "=&r" (old_value),
196 "=m" (*dest)
197 /* in */
198 : [dest] "b" (dest),
199 [zero] "r" (zero),
200 [exchange_value] "r" (exchange_value),
201 "m" (*dest)
202 /* clobber */
203 : "cc",
204 "memory"
205 );
206
207 return old_value;
208 }
209
210 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
211 if (order != memory_order_relaxed) {
212 __asm__ __volatile__ (
213 /* fence */
214 strasm_sync
215 );
216 }
217 }
218
219 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
220 if (order != memory_order_relaxed) {
221 __asm__ __volatile__ (
222 /* fence */
223 strasm_sync
224 );
225 }
226 }
227
228 template<>
229 template<typename T>
230 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
231 T volatile* dest,
232 T compare_value,
233 cmpxchg_memory_order order) const {
234 STATIC_ASSERT(1 == sizeof(T));
235
236 // Note that cmpxchg guarantees a two-way memory barrier across
237 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
238 // specified otherwise (see atomic.hpp).
239
240 // Using 32 bit internally.
241 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
242
243 #ifdef VM_LITTLE_ENDIAN
244 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
245 #else
246 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
247 #endif
248 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
249 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
250 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
251
252 unsigned int old_value, value32;
253
254 cmpxchg_pre_membar(order);
255
256 __asm__ __volatile__ (
257 /* simple guard */
258 " lbz %[old_value], 0(%[dest]) \n"
259 " cmpw %[masked_compare_val], %[old_value] \n"
260 " bne- 2f \n"
261 /* atomic loop */
262 "1: \n"
263 " lwarx %[value32], 0, %[dest_base] \n"
264 /* extract byte and compare */
265 " srd %[old_value], %[value32], %[shift_amount] \n"
266 " clrldi %[old_value], %[old_value], 56 \n"
267 " cmpw %[masked_compare_val], %[old_value] \n"
268 " bne- 2f \n"
269 /* replace byte and try to store */
270 " xor %[value32], %[xor_value], %[value32] \n"
271 " stwcx. %[value32], 0, %[dest_base] \n"
272 " bne- 1b \n"
273 /* exit */
274 "2: \n"
275 /* out */
276 : [old_value] "=&r" (old_value),
277 [value32] "=&r" (value32),
278 "=m" (*dest),
279 "=m" (*dest_base)
280 /* in */
281 : [dest] "b" (dest),
282 [dest_base] "b" (dest_base),
283 [shift_amount] "r" (shift_amount),
284 [masked_compare_val] "r" (masked_compare_val),
285 [xor_value] "r" (xor_value),
286 "m" (*dest),
287 "m" (*dest_base)
288 /* clobber */
289 : "cc",
290 "memory"
291 );
292
293 cmpxchg_post_membar(order);
294
295 return PrimitiveConversions::cast<T>((unsigned char)old_value);
296 }
297
298 template<>
299 template<typename T>
300 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
301 T volatile* dest,
302 T compare_value,
303 cmpxchg_memory_order order) const {
304 STATIC_ASSERT(4 == sizeof(T));
305
306 // Note that cmpxchg guarantees a two-way memory barrier across
307 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
308 // specified otherwise (see atomic.hpp).
309
310 T old_value;
311 const uint64_t zero = 0;
312
313 cmpxchg_pre_membar(order);
314
315 __asm__ __volatile__ (
316 /* simple guard */
317 " lwz %[old_value], 0(%[dest]) \n"
318 " cmpw %[compare_value], %[old_value] \n"
319 " bne- 2f \n"
320 /* atomic loop */
321 "1: \n"
322 " lwarx %[old_value], %[dest], %[zero] \n"
323 " cmpw %[compare_value], %[old_value] \n"
324 " bne- 2f \n"
325 " stwcx. %[exchange_value], %[dest], %[zero] \n"
326 " bne- 1b \n"
327 /* exit */
328 "2: \n"
329 /* out */
330 : [old_value] "=&r" (old_value),
331 "=m" (*dest)
332 /* in */
333 : [dest] "b" (dest),
334 [zero] "r" (zero),
335 [compare_value] "r" (compare_value),
336 [exchange_value] "r" (exchange_value),
337 "m" (*dest)
338 /* clobber */
339 : "cc",
340 "memory"
341 );
342
343 cmpxchg_post_membar(order);
344
345 return old_value;
346 }
347
348 template<>
349 template<typename T>
350 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
351 T volatile* dest,
352 T compare_value,
353 cmpxchg_memory_order order) const {
354 STATIC_ASSERT(8 == sizeof(T));
355
356 // Note that cmpxchg guarantees a two-way memory barrier across
357 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
358 // specified otherwise (see atomic.hpp).
359
360 T old_value;
361 const uint64_t zero = 0;
362
363 cmpxchg_pre_membar(order);
364
365 __asm__ __volatile__ (
366 /* simple guard */
367 " ld %[old_value], 0(%[dest]) \n"
368 " cmpd %[compare_value], %[old_value] \n"
369 " bne- 2f \n"
370 /* atomic loop */
371 "1: \n"
372 " ldarx %[old_value], %[dest], %[zero] \n"
373 " cmpd %[compare_value], %[old_value] \n"
374 " bne- 2f \n"
375 " stdcx. %[exchange_value], %[dest], %[zero] \n"
376 " bne- 1b \n"
377 /* exit */
378 "2: \n"
379 /* out */
380 : [old_value] "=&r" (old_value),
381 "=m" (*dest)
382 /* in */
383 : [dest] "b" (dest),
384 [zero] "r" (zero),
385 [compare_value] "r" (compare_value),
386 [exchange_value] "r" (exchange_value),
387 "m" (*dest)
388 /* clobber */
389 : "cc",
390 "memory"
391 );
392
393 cmpxchg_post_membar(order);
394
395 return old_value;
396 }
397
398 #undef strasm_sync
399 #undef strasm_lwsync
400 #undef strasm_isync
401 #undef strasm_release
402 #undef strasm_acquire
403 #undef strasm_fence
404 #undef strasm_nobarrier
405 #undef strasm_nobarrier_clobber_memory
406
407 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
|
60 //
61 // - release orders Store|Store, (maps to lwsync)
62 // Load|Store
63 // - acquire orders Load|Store, (maps to lwsync)
64 // Load|Load
65 // - fence orders Store|Store, (maps to sync)
66 // Load|Store,
67 // Load|Load,
68 // Store|Load
69 //
70
71 #define strasm_sync "\n sync \n"
72 #define strasm_lwsync "\n lwsync \n"
73 #define strasm_isync "\n isync \n"
74 #define strasm_release strasm_lwsync
75 #define strasm_acquire strasm_lwsync
76 #define strasm_fence strasm_sync
77 #define strasm_nobarrier ""
78 #define strasm_nobarrier_clobber_memory ""
79
80 inline void pre_membar(cmpxchg_memory_order order) {
81 switch (order) {
82 case memory_order_relaxed:
83 case memory_order_acquire: break;
84 case memory_order_release:
85 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break;
86 default : __asm__ __volatile__ (strasm_sync); break;
87 }
88 }
89
90 inline void post_membar(cmpxchg_memory_order order) {
91 switch (order) {
92 case memory_order_relaxed:
93 case memory_order_release: break;
94 case memory_order_acquire:
95 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break;
96 default : __asm__ __volatile__ (strasm_sync); break;
97 }
98 }
99
100
101 template<size_t byte_size>
102 struct Atomic::PlatformAdd
103 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
104 {
105 template<typename I, typename D>
106 D add_and_fetch(I add_value, D volatile* dest, cmpxchg_memory_order order) const;
107 };
108
109 template<>
110 template<typename I, typename D>
111 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
112 cmpxchg_memory_order order) const {
113 STATIC_ASSERT(4 == sizeof(I));
114 STATIC_ASSERT(4 == sizeof(D));
115
116 D result;
117
118 pre_membar(order);
119
120 __asm__ __volatile__ (
121 "1: lwarx %0, 0, %2 \n"
122 " add %0, %0, %1 \n"
123 " stwcx. %0, 0, %2 \n"
124 " bne- 1b \n"
125 : /*%0*/"=&r" (result)
126 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
127 : "cc", "memory" );
128
129 post_membar(order);
130
131 return result;
132 }
133
134
135 template<>
136 template<typename I, typename D>
137 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
138 cmpxchg_memory_order order) const {
139 STATIC_ASSERT(8 == sizeof(I));
140 STATIC_ASSERT(8 == sizeof(D));
141
142 D result;
143
144 pre_membar(order);
145
146 __asm__ __volatile__ (
147 "1: ldarx %0, 0, %2 \n"
148 " add %0, %0, %1 \n"
149 " stdcx. %0, 0, %2 \n"
150 " bne- 1b \n"
151 : /*%0*/"=&r" (result)
152 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
153 : "cc", "memory" );
154
155 post_membar(order);
156
157 return result;
158 }
159
160 template<>
161 template<typename T>
162 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
163 T volatile* dest) const {
164 // Note that xchg doesn't necessarily do an acquire
165 // (see synchronizer.cpp).
166
167 T old_value;
168 const uint64_t zero = 0;
169
170 __asm__ __volatile__ (
171 /* lwsync */
172 strasm_lwsync
173 /* atomic loop */
174 "1: \n"
175 " lwarx %[old_value], %[dest], %[zero] \n"
176 " stwcx. %[exchange_value], %[dest], %[zero] \n"
217 /* isync */
218 strasm_sync
219 /* exit */
220 "2: \n"
221 /* out */
222 : [old_value] "=&r" (old_value),
223 "=m" (*dest)
224 /* in */
225 : [dest] "b" (dest),
226 [zero] "r" (zero),
227 [exchange_value] "r" (exchange_value),
228 "m" (*dest)
229 /* clobber */
230 : "cc",
231 "memory"
232 );
233
234 return old_value;
235 }
236
237 template<>
238 template<typename T>
239 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
240 T volatile* dest,
241 T compare_value,
242 cmpxchg_memory_order order) const {
243 STATIC_ASSERT(1 == sizeof(T));
244
245 // Note that cmpxchg guarantees a two-way memory barrier across
246 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
247 // specified otherwise (see atomic.hpp).
248
249 // Using 32 bit internally.
250 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
251
252 #ifdef VM_LITTLE_ENDIAN
253 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
254 #else
255 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
256 #endif
257 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
258 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
259 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
260
261 unsigned int old_value, value32;
262
263 pre_membar(order);
264
265 __asm__ __volatile__ (
266 /* simple guard */
267 " lbz %[old_value], 0(%[dest]) \n"
268 " cmpw %[masked_compare_val], %[old_value] \n"
269 " bne- 2f \n"
270 /* atomic loop */
271 "1: \n"
272 " lwarx %[value32], 0, %[dest_base] \n"
273 /* extract byte and compare */
274 " srd %[old_value], %[value32], %[shift_amount] \n"
275 " clrldi %[old_value], %[old_value], 56 \n"
276 " cmpw %[masked_compare_val], %[old_value] \n"
277 " bne- 2f \n"
278 /* replace byte and try to store */
279 " xor %[value32], %[xor_value], %[value32] \n"
280 " stwcx. %[value32], 0, %[dest_base] \n"
281 " bne- 1b \n"
282 /* exit */
283 "2: \n"
284 /* out */
285 : [old_value] "=&r" (old_value),
286 [value32] "=&r" (value32),
287 "=m" (*dest),
288 "=m" (*dest_base)
289 /* in */
290 : [dest] "b" (dest),
291 [dest_base] "b" (dest_base),
292 [shift_amount] "r" (shift_amount),
293 [masked_compare_val] "r" (masked_compare_val),
294 [xor_value] "r" (xor_value),
295 "m" (*dest),
296 "m" (*dest_base)
297 /* clobber */
298 : "cc",
299 "memory"
300 );
301
302 post_membar(order);
303
304 return PrimitiveConversions::cast<T>((unsigned char)old_value);
305 }
306
307 template<>
308 template<typename T>
309 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
310 T volatile* dest,
311 T compare_value,
312 cmpxchg_memory_order order) const {
313 STATIC_ASSERT(4 == sizeof(T));
314
315 // Note that cmpxchg guarantees a two-way memory barrier across
316 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
317 // specified otherwise (see atomic.hpp).
318
319 T old_value;
320 const uint64_t zero = 0;
321
322 pre_membar(order);
323
324 __asm__ __volatile__ (
325 /* simple guard */
326 " lwz %[old_value], 0(%[dest]) \n"
327 " cmpw %[compare_value], %[old_value] \n"
328 " bne- 2f \n"
329 /* atomic loop */
330 "1: \n"
331 " lwarx %[old_value], %[dest], %[zero] \n"
332 " cmpw %[compare_value], %[old_value] \n"
333 " bne- 2f \n"
334 " stwcx. %[exchange_value], %[dest], %[zero] \n"
335 " bne- 1b \n"
336 /* exit */
337 "2: \n"
338 /* out */
339 : [old_value] "=&r" (old_value),
340 "=m" (*dest)
341 /* in */
342 : [dest] "b" (dest),
343 [zero] "r" (zero),
344 [compare_value] "r" (compare_value),
345 [exchange_value] "r" (exchange_value),
346 "m" (*dest)
347 /* clobber */
348 : "cc",
349 "memory"
350 );
351
352 post_membar(order);
353
354 return old_value;
355 }
356
357 template<>
358 template<typename T>
359 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
360 T volatile* dest,
361 T compare_value,
362 cmpxchg_memory_order order) const {
363 STATIC_ASSERT(8 == sizeof(T));
364
365 // Note that cmpxchg guarantees a two-way memory barrier across
366 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
367 // specified otherwise (see atomic.hpp).
368
369 T old_value;
370 const uint64_t zero = 0;
371
372 pre_membar(order);
373
374 __asm__ __volatile__ (
375 /* simple guard */
376 " ld %[old_value], 0(%[dest]) \n"
377 " cmpd %[compare_value], %[old_value] \n"
378 " bne- 2f \n"
379 /* atomic loop */
380 "1: \n"
381 " ldarx %[old_value], %[dest], %[zero] \n"
382 " cmpd %[compare_value], %[old_value] \n"
383 " bne- 2f \n"
384 " stdcx. %[exchange_value], %[dest], %[zero] \n"
385 " bne- 1b \n"
386 /* exit */
387 "2: \n"
388 /* out */
389 : [old_value] "=&r" (old_value),
390 "=m" (*dest)
391 /* in */
392 : [dest] "b" (dest),
393 [zero] "r" (zero),
394 [compare_value] "r" (compare_value),
395 [exchange_value] "r" (exchange_value),
396 "m" (*dest)
397 /* clobber */
398 : "cc",
399 "memory"
400 );
401
402 post_membar(order);
403
404 return old_value;
405 }
406
407 #undef strasm_sync
408 #undef strasm_lwsync
409 #undef strasm_isync
410 #undef strasm_release
411 #undef strasm_acquire
412 #undef strasm_fence
413 #undef strasm_nobarrier
414 #undef strasm_nobarrier_clobber_memory
415
416 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
|