simd_detail.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. // Internal macros for the simd implementation -*- C++ -*-
  2. // Copyright (C) 2020-2021 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
  21. #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
  22. #if __cplusplus >= 201703L
  23. #include <cstddef>
  24. #include <cstdint>
  25. #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
  26. namespace std _GLIBCXX_VISIBILITY(default) \
  27. { \
  28. _GLIBCXX_BEGIN_NAMESPACE_VERSION \
  29. namespace experimental { \
  30. inline namespace parallelism_v2 {
  31. #define _GLIBCXX_SIMD_END_NAMESPACE \
  32. } \
  33. } \
  34. _GLIBCXX_END_NAMESPACE_VERSION \
  35. }
  36. // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
  37. // macros ARM{{{
  38. #if defined __ARM_NEON
  39. #define _GLIBCXX_SIMD_HAVE_NEON 1
  40. #else
  41. #define _GLIBCXX_SIMD_HAVE_NEON 0
  42. #endif
  43. #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
  44. #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
  45. #else
  46. #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
  47. #endif
  48. #if defined __ARM_NEON && defined __aarch64__
  49. #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
  50. #else
  51. #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
  52. #endif
  53. //}}}
  54. // x86{{{
  55. #ifdef __MMX__
  56. #define _GLIBCXX_SIMD_HAVE_MMX 1
  57. #else
  58. #define _GLIBCXX_SIMD_HAVE_MMX 0
  59. #endif
  60. #if defined __SSE__ || defined __x86_64__
  61. #define _GLIBCXX_SIMD_HAVE_SSE 1
  62. #else
  63. #define _GLIBCXX_SIMD_HAVE_SSE 0
  64. #endif
  65. #if defined __SSE2__ || defined __x86_64__
  66. #define _GLIBCXX_SIMD_HAVE_SSE2 1
  67. #else
  68. #define _GLIBCXX_SIMD_HAVE_SSE2 0
  69. #endif
  70. #ifdef __SSE3__
  71. #define _GLIBCXX_SIMD_HAVE_SSE3 1
  72. #else
  73. #define _GLIBCXX_SIMD_HAVE_SSE3 0
  74. #endif
  75. #ifdef __SSSE3__
  76. #define _GLIBCXX_SIMD_HAVE_SSSE3 1
  77. #else
  78. #define _GLIBCXX_SIMD_HAVE_SSSE3 0
  79. #endif
  80. #ifdef __SSE4_1__
  81. #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
  82. #else
  83. #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
  84. #endif
  85. #ifdef __SSE4_2__
  86. #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
  87. #else
  88. #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
  89. #endif
  90. #ifdef __XOP__
  91. #define _GLIBCXX_SIMD_HAVE_XOP 1
  92. #else
  93. #define _GLIBCXX_SIMD_HAVE_XOP 0
  94. #endif
  95. #ifdef __AVX__
  96. #define _GLIBCXX_SIMD_HAVE_AVX 1
  97. #else
  98. #define _GLIBCXX_SIMD_HAVE_AVX 0
  99. #endif
  100. #ifdef __AVX2__
  101. #define _GLIBCXX_SIMD_HAVE_AVX2 1
  102. #else
  103. #define _GLIBCXX_SIMD_HAVE_AVX2 0
  104. #endif
  105. #ifdef __BMI__
  106. #define _GLIBCXX_SIMD_HAVE_BMI1 1
  107. #else
  108. #define _GLIBCXX_SIMD_HAVE_BMI1 0
  109. #endif
  110. #ifdef __BMI2__
  111. #define _GLIBCXX_SIMD_HAVE_BMI2 1
  112. #else
  113. #define _GLIBCXX_SIMD_HAVE_BMI2 0
  114. #endif
  115. #ifdef __LZCNT__
  116. #define _GLIBCXX_SIMD_HAVE_LZCNT 1
  117. #else
  118. #define _GLIBCXX_SIMD_HAVE_LZCNT 0
  119. #endif
  120. #ifdef __SSE4A__
  121. #define _GLIBCXX_SIMD_HAVE_SSE4A 1
  122. #else
  123. #define _GLIBCXX_SIMD_HAVE_SSE4A 0
  124. #endif
  125. #ifdef __FMA__
  126. #define _GLIBCXX_SIMD_HAVE_FMA 1
  127. #else
  128. #define _GLIBCXX_SIMD_HAVE_FMA 0
  129. #endif
  130. #ifdef __FMA4__
  131. #define _GLIBCXX_SIMD_HAVE_FMA4 1
  132. #else
  133. #define _GLIBCXX_SIMD_HAVE_FMA4 0
  134. #endif
  135. #ifdef __F16C__
  136. #define _GLIBCXX_SIMD_HAVE_F16C 1
  137. #else
  138. #define _GLIBCXX_SIMD_HAVE_F16C 0
  139. #endif
  140. #ifdef __POPCNT__
  141. #define _GLIBCXX_SIMD_HAVE_POPCNT 1
  142. #else
  143. #define _GLIBCXX_SIMD_HAVE_POPCNT 0
  144. #endif
  145. #ifdef __AVX512F__
  146. #define _GLIBCXX_SIMD_HAVE_AVX512F 1
  147. #else
  148. #define _GLIBCXX_SIMD_HAVE_AVX512F 0
  149. #endif
  150. #ifdef __AVX512DQ__
  151. #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
  152. #else
  153. #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
  154. #endif
  155. #ifdef __AVX512VL__
  156. #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
  157. #else
  158. #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
  159. #endif
  160. #ifdef __AVX512BW__
  161. #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
  162. #else
  163. #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
  164. #endif
  165. #if _GLIBCXX_SIMD_HAVE_SSE
  166. #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
  167. #else
  168. #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
  169. #endif
  170. #if _GLIBCXX_SIMD_HAVE_SSE2
  171. #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
  172. #else
  173. #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
  174. #endif
  175. #if _GLIBCXX_SIMD_HAVE_AVX
  176. #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
  177. #else
  178. #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
  179. #endif
  180. #if _GLIBCXX_SIMD_HAVE_AVX2
  181. #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
  182. #else
  183. #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
  184. #endif
  185. #if _GLIBCXX_SIMD_HAVE_AVX512F
  186. #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
  187. #else
  188. #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
  189. #endif
  190. #if _GLIBCXX_SIMD_HAVE_AVX512BW
  191. #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
  192. #else
  193. #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
  194. #endif
  195. #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
  196. #error "Use of SSE2 is required on AMD64"
  197. #endif
  198. //}}}
  199. #ifdef __clang__
  200. #define _GLIBCXX_SIMD_NORMAL_MATH
  201. #else
  202. #define _GLIBCXX_SIMD_NORMAL_MATH \
  203. [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
  204. #endif
  205. #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
  206. #define _GLIBCXX_SIMD_INTRINSIC \
  207. [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
  208. #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
  209. #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
  210. #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
  211. #if defined __STRICT_ANSI__ && __STRICT_ANSI__
  212. #define _GLIBCXX_SIMD_CONSTEXPR
  213. #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
  214. #else
  215. #define _GLIBCXX_SIMD_CONSTEXPR constexpr
  216. #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
  217. #endif
  218. #if defined __clang__
  219. #define _GLIBCXX_SIMD_USE_CONSTEXPR const
  220. #else
  221. #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
  222. #endif
  223. #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
  224. #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
  225. #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
  226. __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
  227. #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
  228. _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
  229. #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
  230. _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
  231. #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
  232. _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
  233. #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
  234. #undef _GLIBCXX_SIMD_ALWAYS_INLINE
  235. #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
  236. #undef _GLIBCXX_SIMD_INTRINSIC
  237. #define _GLIBCXX_SIMD_INTRINSIC inline
  238. #endif
  239. #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
  240. #define _GLIBCXX_SIMD_X86INTRIN 1
  241. #else
  242. #define _GLIBCXX_SIMD_X86INTRIN 0
  243. #endif
  244. // workaround macros {{{
  245. // use aliasing loads to help GCC understand the data accesses better
  246. // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
  247. // fixed_size_simd<float, 16> x.
  248. #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
  249. // vector conversions on x86 not optimized:
  250. #if _GLIBCXX_SIMD_X86INTRIN
  251. #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
  252. #endif
  253. // integer division not optimized
  254. #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
  255. // very bad codegen for extraction and concatenation of 128/256 "subregisters"
  256. // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
  257. #if _GLIBCXX_SIMD_X86INTRIN
  258. #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
  259. #endif
  260. // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
  261. #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
  262. // bad codegen for zero-extend using simple concat(__x, 0)
  263. #if _GLIBCXX_SIMD_X86INTRIN
  264. #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
  265. #endif
  266. // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
  267. // of static_simd_cast)
  268. #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
  269. // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
  270. // constraint on (static)_simd_cast)
  271. #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
  272. // }}}
  273. #endif // __cplusplus >= 201703L
  274. #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
  275. // vim: foldmethod=marker