| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306 |
- // Internal macros for the simd implementation -*- C++ -*-
- // Copyright (C) 2020-2021 Free Software Foundation, Inc.
- //
- // This file is part of the GNU ISO C++ Library. This library is free
- // software; you can redistribute it and/or modify it under the
- // terms of the GNU General Public License as published by the
- // Free Software Foundation; either version 3, or (at your option)
- // any later version.
- // This library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- // Under Section 7 of GPL version 3, you are granted additional
- // permissions described in the GCC Runtime Library Exception, version
- // 3.1, as published by the Free Software Foundation.
- // You should have received a copy of the GNU General Public License and
- // a copy of the GCC Runtime Library Exception along with this program;
- // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- // <http://www.gnu.org/licenses/>.
- #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
- #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
- #if __cplusplus >= 201703L
- #include <cstddef>
- #include <cstdint>
- #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
- namespace std _GLIBCXX_VISIBILITY(default) \
- { \
- _GLIBCXX_BEGIN_NAMESPACE_VERSION \
- namespace experimental { \
- inline namespace parallelism_v2 {
- #define _GLIBCXX_SIMD_END_NAMESPACE \
- } \
- } \
- _GLIBCXX_END_NAMESPACE_VERSION \
- }
- // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
- // macros ARM{{{
- #if defined __ARM_NEON
- #define _GLIBCXX_SIMD_HAVE_NEON 1
- #else
- #define _GLIBCXX_SIMD_HAVE_NEON 0
- #endif
- #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
- #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
- #else
- #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
- #endif
- #if defined __ARM_NEON && defined __aarch64__
- #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
- #else
- #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
- #endif
- //}}}
- // x86{{{
- #ifdef __MMX__
- #define _GLIBCXX_SIMD_HAVE_MMX 1
- #else
- #define _GLIBCXX_SIMD_HAVE_MMX 0
- #endif
- #if defined __SSE__ || defined __x86_64__
- #define _GLIBCXX_SIMD_HAVE_SSE 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE 0
- #endif
- #if defined __SSE2__ || defined __x86_64__
- #define _GLIBCXX_SIMD_HAVE_SSE2 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE2 0
- #endif
- #ifdef __SSE3__
- #define _GLIBCXX_SIMD_HAVE_SSE3 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE3 0
- #endif
- #ifdef __SSSE3__
- #define _GLIBCXX_SIMD_HAVE_SSSE3 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSSE3 0
- #endif
- #ifdef __SSE4_1__
- #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
- #endif
- #ifdef __SSE4_2__
- #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
- #endif
- #ifdef __XOP__
- #define _GLIBCXX_SIMD_HAVE_XOP 1
- #else
- #define _GLIBCXX_SIMD_HAVE_XOP 0
- #endif
- #ifdef __AVX__
- #define _GLIBCXX_SIMD_HAVE_AVX 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX 0
- #endif
- #ifdef __AVX2__
- #define _GLIBCXX_SIMD_HAVE_AVX2 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX2 0
- #endif
- #ifdef __BMI__
- #define _GLIBCXX_SIMD_HAVE_BMI1 1
- #else
- #define _GLIBCXX_SIMD_HAVE_BMI1 0
- #endif
- #ifdef __BMI2__
- #define _GLIBCXX_SIMD_HAVE_BMI2 1
- #else
- #define _GLIBCXX_SIMD_HAVE_BMI2 0
- #endif
- #ifdef __LZCNT__
- #define _GLIBCXX_SIMD_HAVE_LZCNT 1
- #else
- #define _GLIBCXX_SIMD_HAVE_LZCNT 0
- #endif
- #ifdef __SSE4A__
- #define _GLIBCXX_SIMD_HAVE_SSE4A 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE4A 0
- #endif
- #ifdef __FMA__
- #define _GLIBCXX_SIMD_HAVE_FMA 1
- #else
- #define _GLIBCXX_SIMD_HAVE_FMA 0
- #endif
- #ifdef __FMA4__
- #define _GLIBCXX_SIMD_HAVE_FMA4 1
- #else
- #define _GLIBCXX_SIMD_HAVE_FMA4 0
- #endif
- #ifdef __F16C__
- #define _GLIBCXX_SIMD_HAVE_F16C 1
- #else
- #define _GLIBCXX_SIMD_HAVE_F16C 0
- #endif
- #ifdef __POPCNT__
- #define _GLIBCXX_SIMD_HAVE_POPCNT 1
- #else
- #define _GLIBCXX_SIMD_HAVE_POPCNT 0
- #endif
- #ifdef __AVX512F__
- #define _GLIBCXX_SIMD_HAVE_AVX512F 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX512F 0
- #endif
- #ifdef __AVX512DQ__
- #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
- #endif
- #ifdef __AVX512VL__
- #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
- #endif
- #ifdef __AVX512BW__
- #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
- #endif
- #if _GLIBCXX_SIMD_HAVE_SSE
- #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
- #else
- #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
- #endif
- #if _GLIBCXX_SIMD_HAVE_SSE2
- #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
- #else
- #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
- #endif
- #if _GLIBCXX_SIMD_HAVE_AVX
- #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
- #endif
- #if _GLIBCXX_SIMD_HAVE_AVX2
- #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
- #else
- #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
- #endif
- #if _GLIBCXX_SIMD_HAVE_AVX512F
- #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
- #else
- #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
- #endif
- #if _GLIBCXX_SIMD_HAVE_AVX512BW
- #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
- #else
- #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
- #endif
- #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
- #error "Use of SSE2 is required on AMD64"
- #endif
- //}}}
- #ifdef __clang__
- #define _GLIBCXX_SIMD_NORMAL_MATH
- #else
- #define _GLIBCXX_SIMD_NORMAL_MATH \
- [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
- #endif
- #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
- #define _GLIBCXX_SIMD_INTRINSIC \
- [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
- #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
- #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
- #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
- #if defined __STRICT_ANSI__ && __STRICT_ANSI__
- #define _GLIBCXX_SIMD_CONSTEXPR
- #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
- #else
- #define _GLIBCXX_SIMD_CONSTEXPR constexpr
- #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
- #endif
- #if defined __clang__
- #define _GLIBCXX_SIMD_USE_CONSTEXPR const
- #else
- #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
- #endif
- #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
- #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
- #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
- __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
- #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
- _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
- #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
- _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
- #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
- _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
- #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
- #undef _GLIBCXX_SIMD_ALWAYS_INLINE
- #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
- #undef _GLIBCXX_SIMD_INTRINSIC
- #define _GLIBCXX_SIMD_INTRINSIC inline
- #endif
- #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
- #define _GLIBCXX_SIMD_X86INTRIN 1
- #else
- #define _GLIBCXX_SIMD_X86INTRIN 0
- #endif
- // workaround macros {{{
- // use aliasing loads to help GCC understand the data accesses better
- // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
- // fixed_size_simd<float, 16> x.
- #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
- // vector conversions on x86 not optimized:
- #if _GLIBCXX_SIMD_X86INTRIN
- #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
- #endif
- // integer division not optimized
- #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
- // very bad codegen for extraction and concatenation of 128/256 "subregisters"
- // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
- #if _GLIBCXX_SIMD_X86INTRIN
- #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
- #endif
- // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
- #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
- // bad codegen for zero-extend using simple concat(__x, 0)
- #if _GLIBCXX_SIMD_X86INTRIN
- #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
- #endif
- // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
- // of static_simd_cast)
- #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
- // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
- // constraint on (static)_simd_cast)
- #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
- // }}}
- #endif // __cplusplus >= 201703L
- #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
- // vim: foldmethod=marker
|