pyatomic.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. #ifndef Py_ATOMIC_H
  2. #define Py_ATOMIC_H
  3. #ifdef Py_BUILD_CORE
  4. #include "dynamic_annotations.h"
  5. #include "pyconfig.h"
  6. #if defined(HAVE_STD_ATOMIC)
  7. #include <stdatomic.h>
  8. #endif
  9. #if defined(_MSC_VER)
  10. #include <intrin.h>
  11. #include <immintrin.h>
  12. #endif
  13. /* This is modeled after the atomics interface from C1x, according to
  14. * the draft at
  15. * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
  16. * Operations and types are named the same except with a _Py_ prefix
  17. * and have the same semantics.
  18. *
  19. * Beware, the implementations here are deep magic.
  20. */
  21. #if defined(HAVE_STD_ATOMIC)
  22. typedef enum _Py_memory_order {
  23. _Py_memory_order_relaxed = memory_order_relaxed,
  24. _Py_memory_order_acquire = memory_order_acquire,
  25. _Py_memory_order_release = memory_order_release,
  26. _Py_memory_order_acq_rel = memory_order_acq_rel,
  27. _Py_memory_order_seq_cst = memory_order_seq_cst
  28. } _Py_memory_order;
  29. typedef struct _Py_atomic_address {
  30. atomic_uintptr_t _value;
  31. } _Py_atomic_address;
  32. typedef struct _Py_atomic_int {
  33. atomic_int _value;
  34. } _Py_atomic_int;
  35. #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
  36. atomic_signal_fence(ORDER)
  37. #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
  38. atomic_thread_fence(ORDER)
  39. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  40. atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
  41. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  42. atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
  43. /* Use builtin atomic operations in GCC >= 4.7 */
  44. #elif defined(HAVE_BUILTIN_ATOMIC)
  45. typedef enum _Py_memory_order {
  46. _Py_memory_order_relaxed = __ATOMIC_RELAXED,
  47. _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
  48. _Py_memory_order_release = __ATOMIC_RELEASE,
  49. _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
  50. _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
  51. } _Py_memory_order;
  52. typedef struct _Py_atomic_address {
  53. uintptr_t _value;
  54. } _Py_atomic_address;
  55. typedef struct _Py_atomic_int {
  56. int _value;
  57. } _Py_atomic_int;
  58. #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
  59. __atomic_signal_fence(ORDER)
  60. #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
  61. __atomic_thread_fence(ORDER)
  62. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  63. (assert((ORDER) == __ATOMIC_RELAXED \
  64. || (ORDER) == __ATOMIC_SEQ_CST \
  65. || (ORDER) == __ATOMIC_RELEASE), \
  66. __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
  67. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  68. (assert((ORDER) == __ATOMIC_RELAXED \
  69. || (ORDER) == __ATOMIC_SEQ_CST \
  70. || (ORDER) == __ATOMIC_ACQUIRE \
  71. || (ORDER) == __ATOMIC_CONSUME), \
  72. __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
  73. /* Only support GCC (for expression statements) and x86 (for simple
  74. * atomic semantics) and MSVC x86/x64/ARM */
  75. #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
  76. typedef enum _Py_memory_order {
  77. _Py_memory_order_relaxed,
  78. _Py_memory_order_acquire,
  79. _Py_memory_order_release,
  80. _Py_memory_order_acq_rel,
  81. _Py_memory_order_seq_cst
  82. } _Py_memory_order;
  83. typedef struct _Py_atomic_address {
  84. uintptr_t _value;
  85. } _Py_atomic_address;
  86. typedef struct _Py_atomic_int {
  87. int _value;
  88. } _Py_atomic_int;
  89. static __inline__ void
  90. _Py_atomic_signal_fence(_Py_memory_order order)
  91. {
  92. if (order != _Py_memory_order_relaxed)
  93. __asm__ volatile("":::"memory");
  94. }
  95. static __inline__ void
  96. _Py_atomic_thread_fence(_Py_memory_order order)
  97. {
  98. if (order != _Py_memory_order_relaxed)
  99. __asm__ volatile("mfence":::"memory");
  100. }
  101. /* Tell the race checker about this operation's effects. */
  102. static __inline__ void
  103. _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
  104. {
  105. (void)address; /* shut up -Wunused-parameter */
  106. switch(order) {
  107. case _Py_memory_order_release:
  108. case _Py_memory_order_acq_rel:
  109. case _Py_memory_order_seq_cst:
  110. _Py_ANNOTATE_HAPPENS_BEFORE(address);
  111. break;
  112. case _Py_memory_order_relaxed:
  113. case _Py_memory_order_acquire:
  114. break;
  115. }
  116. switch(order) {
  117. case _Py_memory_order_acquire:
  118. case _Py_memory_order_acq_rel:
  119. case _Py_memory_order_seq_cst:
  120. _Py_ANNOTATE_HAPPENS_AFTER(address);
  121. break;
  122. case _Py_memory_order_relaxed:
  123. case _Py_memory_order_release:
  124. break;
  125. }
  126. }
  127. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  128. __extension__ ({ \
  129. __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
  130. __typeof__(atomic_val->_value) new_val = NEW_VAL;\
  131. volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
  132. _Py_memory_order order = ORDER; \
  133. _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
  134. \
  135. /* Perform the operation. */ \
  136. _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
  137. switch(order) { \
  138. case _Py_memory_order_release: \
  139. _Py_atomic_signal_fence(_Py_memory_order_release); \
  140. /* fallthrough */ \
  141. case _Py_memory_order_relaxed: \
  142. *volatile_data = new_val; \
  143. break; \
  144. \
  145. case _Py_memory_order_acquire: \
  146. case _Py_memory_order_acq_rel: \
  147. case _Py_memory_order_seq_cst: \
  148. __asm__ volatile("xchg %0, %1" \
  149. : "+r"(new_val) \
  150. : "m"(atomic_val->_value) \
  151. : "memory"); \
  152. break; \
  153. } \
  154. _Py_ANNOTATE_IGNORE_WRITES_END(); \
  155. })
  156. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  157. __extension__ ({ \
  158. __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
  159. __typeof__(atomic_val->_value) result; \
  160. volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
  161. _Py_memory_order order = ORDER; \
  162. _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
  163. \
  164. /* Perform the operation. */ \
  165. _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
  166. switch(order) { \
  167. case _Py_memory_order_release: \
  168. case _Py_memory_order_acq_rel: \
  169. case _Py_memory_order_seq_cst: \
  170. /* Loads on x86 are not releases by default, so need a */ \
  171. /* thread fence. */ \
  172. _Py_atomic_thread_fence(_Py_memory_order_release); \
  173. break; \
  174. default: \
  175. /* No fence */ \
  176. break; \
  177. } \
  178. result = *volatile_data; \
  179. switch(order) { \
  180. case _Py_memory_order_acquire: \
  181. case _Py_memory_order_acq_rel: \
  182. case _Py_memory_order_seq_cst: \
  183. /* Loads on x86 are automatically acquire operations so */ \
  184. /* can get by with just a compiler fence. */ \
  185. _Py_atomic_signal_fence(_Py_memory_order_acquire); \
  186. break; \
  187. default: \
  188. /* No fence */ \
  189. break; \
  190. } \
  191. _Py_ANNOTATE_IGNORE_READS_END(); \
  192. result; \
  193. })
  194. #elif defined(_MSC_VER)
  195. /* _Interlocked* functions provide a full memory barrier and are therefore
  196. enough for acq_rel and seq_cst. If the HLE variants aren't available
  197. in hardware they will fall back to a full memory barrier as well.
  198. This might affect performance but likely only in some very specific and
  199. hard to meassure scenario.
  200. */
  201. #if defined(_M_IX86) || defined(_M_X64)
  202. typedef enum _Py_memory_order {
  203. _Py_memory_order_relaxed,
  204. _Py_memory_order_acquire,
  205. _Py_memory_order_release,
  206. _Py_memory_order_acq_rel,
  207. _Py_memory_order_seq_cst
  208. } _Py_memory_order;
  209. typedef struct _Py_atomic_address {
  210. volatile uintptr_t _value;
  211. } _Py_atomic_address;
  212. typedef struct _Py_atomic_int {
  213. volatile int _value;
  214. } _Py_atomic_int;
  215. #if defined(_M_X64)
  216. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  217. switch (ORDER) { \
  218. case _Py_memory_order_acquire: \
  219. _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
  220. break; \
  221. case _Py_memory_order_release: \
  222. _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
  223. break; \
  224. default: \
  225. _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
  226. break; \
  227. }
  228. #else
  229. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
  230. #endif
  231. #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  232. switch (ORDER) { \
  233. case _Py_memory_order_acquire: \
  234. _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
  235. break; \
  236. case _Py_memory_order_release: \
  237. _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
  238. break; \
  239. default: \
  240. _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
  241. break; \
  242. }
  243. #if defined(_M_X64)
  244. /* This has to be an intptr_t for now.
  245. gil_created() uses -1 as a sentinel value, if this returns
  246. a uintptr_t it will do an unsigned compare and crash
  247. */
  248. inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
  249. __int64 old;
  250. switch (order) {
  251. case _Py_memory_order_acquire:
  252. {
  253. do {
  254. old = *value;
  255. } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
  256. break;
  257. }
  258. case _Py_memory_order_release:
  259. {
  260. do {
  261. old = *value;
  262. } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
  263. break;
  264. }
  265. case _Py_memory_order_relaxed:
  266. old = *value;
  267. break;
  268. default:
  269. {
  270. do {
  271. old = *value;
  272. } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
  273. break;
  274. }
  275. }
  276. return old;
  277. }
  278. #else
  279. #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
  280. #endif
  281. inline int _Py_atomic_load_32bit(volatile int* value, int order) {
  282. long old;
  283. switch (order) {
  284. case _Py_memory_order_acquire:
  285. {
  286. do {
  287. old = *value;
  288. } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
  289. break;
  290. }
  291. case _Py_memory_order_release:
  292. {
  293. do {
  294. old = *value;
  295. } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
  296. break;
  297. }
  298. case _Py_memory_order_relaxed:
  299. old = *value;
  300. break;
  301. default:
  302. {
  303. do {
  304. old = *value;
  305. } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
  306. break;
  307. }
  308. }
  309. return old;
  310. }
  311. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  312. if (sizeof(*ATOMIC_VAL._value) == 8) { \
  313. _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
  314. _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
  315. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  316. ( \
  317. sizeof(*(ATOMIC_VAL._value)) == 8 ? \
  318. _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
  319. _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
  320. )
  321. #elif defined(_M_ARM) || defined(_M_ARM64)
  322. typedef enum _Py_memory_order {
  323. _Py_memory_order_relaxed,
  324. _Py_memory_order_acquire,
  325. _Py_memory_order_release,
  326. _Py_memory_order_acq_rel,
  327. _Py_memory_order_seq_cst
  328. } _Py_memory_order;
  329. typedef struct _Py_atomic_address {
  330. volatile uintptr_t _value;
  331. } _Py_atomic_address;
  332. typedef struct _Py_atomic_int {
  333. volatile int _value;
  334. } _Py_atomic_int;
  335. #if defined(_M_ARM64)
  336. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  337. switch (ORDER) { \
  338. case _Py_memory_order_acquire: \
  339. _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
  340. break; \
  341. case _Py_memory_order_release: \
  342. _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
  343. break; \
  344. default: \
  345. _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
  346. break; \
  347. }
  348. #else
  349. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
  350. #endif
  351. #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  352. switch (ORDER) { \
  353. case _Py_memory_order_acquire: \
  354. _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
  355. break; \
  356. case _Py_memory_order_release: \
  357. _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
  358. break; \
  359. default: \
  360. _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
  361. break; \
  362. }
  363. #if defined(_M_ARM64)
  364. /* This has to be an intptr_t for now.
  365. gil_created() uses -1 as a sentinel value, if this returns
  366. a uintptr_t it will do an unsigned compare and crash
  367. */
  368. inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
  369. uintptr_t old;
  370. switch (order) {
  371. case _Py_memory_order_acquire:
  372. {
  373. do {
  374. old = *value;
  375. } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
  376. break;
  377. }
  378. case _Py_memory_order_release:
  379. {
  380. do {
  381. old = *value;
  382. } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
  383. break;
  384. }
  385. case _Py_memory_order_relaxed:
  386. old = *value;
  387. break;
  388. default:
  389. {
  390. do {
  391. old = *value;
  392. } while(_InterlockedCompareExchange64(value, old, old) != old);
  393. break;
  394. }
  395. }
  396. return old;
  397. }
  398. #else
  399. #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
  400. #endif
  401. inline int _Py_atomic_load_32bit(volatile int* value, int order) {
  402. int old;
  403. switch (order) {
  404. case _Py_memory_order_acquire:
  405. {
  406. do {
  407. old = *value;
  408. } while(_InterlockedCompareExchange_acq(value, old, old) != old);
  409. break;
  410. }
  411. case _Py_memory_order_release:
  412. {
  413. do {
  414. old = *value;
  415. } while(_InterlockedCompareExchange_rel(value, old, old) != old);
  416. break;
  417. }
  418. case _Py_memory_order_relaxed:
  419. old = *value;
  420. break;
  421. default:
  422. {
  423. do {
  424. old = *value;
  425. } while(_InterlockedCompareExchange(value, old, old) != old);
  426. break;
  427. }
  428. }
  429. return old;
  430. }
  431. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  432. if (sizeof(*ATOMIC_VAL._value) == 8) { \
  433. _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
  434. _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
  435. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  436. ( \
  437. sizeof(*(ATOMIC_VAL._value)) == 8 ? \
  438. _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
  439. _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
  440. )
  441. #endif
  442. #else /* !gcc x86 !_msc_ver */
  443. typedef enum _Py_memory_order {
  444. _Py_memory_order_relaxed,
  445. _Py_memory_order_acquire,
  446. _Py_memory_order_release,
  447. _Py_memory_order_acq_rel,
  448. _Py_memory_order_seq_cst
  449. } _Py_memory_order;
  450. typedef struct _Py_atomic_address {
  451. uintptr_t _value;
  452. } _Py_atomic_address;
  453. typedef struct _Py_atomic_int {
  454. int _value;
  455. } _Py_atomic_int;
  456. /* Fall back to other compilers and processors by assuming that simple
  457. volatile accesses are atomic. This is false, so people should port
  458. this. */
  459. #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
  460. #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
  461. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  462. ((ATOMIC_VAL)->_value = NEW_VAL)
  463. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  464. ((ATOMIC_VAL)->_value)
  465. #endif
  466. /* Standardized shortcuts. */
  467. #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
  468. _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
  469. #define _Py_atomic_load(ATOMIC_VAL) \
  470. _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
  471. /* Python-local extensions */
  472. #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
  473. _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
  474. #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
  475. _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
  476. #endif /* Py_BUILD_CORE */
  477. #endif /* Py_ATOMIC_H */