simd_ppc.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. // Simd PowerPC specific implementations -*- C++ -*-
  2. // Copyright (C) 2020-2021 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
  21. #define _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
  22. #if __cplusplus >= 201703L
  23. #ifndef __ALTIVEC__
  24. #error "simd_ppc.h may only be included when AltiVec/VMX is available"
  25. #endif
  26. #include <altivec.h>
  27. _GLIBCXX_SIMD_BEGIN_NAMESPACE
  28. // _SimdImplPpc {{{
  29. template <typename _Abi>
  30. struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
  31. {
  32. using _Base = _SimdImplBuiltin<_Abi>;
  33. // Byte and halfword shift instructions on PPC only consider the low 3 or 4
  34. // bits of the RHS. Consequently, shifting by sizeof(_Tp)*CHAR_BIT (or more)
  35. // is UB without extra measures. To match scalar behavior, byte and halfword
  36. // shifts need an extra fixup step.
  37. // _S_bit_shift_left {{{
  38. template <typename _Tp, size_t _Np>
  39. _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
  40. _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
  41. {
  42. __x = _Base::_S_bit_shift_left(__x, __y);
  43. if constexpr (sizeof(_Tp) < sizeof(int))
  44. __x._M_data
  45. = (__y._M_data < sizeof(_Tp) * __CHAR_BIT__) & __x._M_data;
  46. return __x;
  47. }
  48. template <typename _Tp, size_t _Np>
  49. _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
  50. _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
  51. {
  52. __x = _Base::_S_bit_shift_left(__x, __y);
  53. if constexpr (sizeof(_Tp) < sizeof(int))
  54. {
  55. if (__y >= sizeof(_Tp) * __CHAR_BIT__)
  56. return {};
  57. }
  58. return __x;
  59. }
  60. // }}}
  61. // _S_bit_shift_right {{{
  62. template <typename _Tp, size_t _Np>
  63. _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
  64. _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
  65. {
  66. if constexpr (sizeof(_Tp) < sizeof(int))
  67. {
  68. constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
  69. if constexpr (is_unsigned_v<_Tp>)
  70. return (__y._M_data < __nbits)
  71. & _Base::_S_bit_shift_right(__x, __y)._M_data;
  72. else
  73. {
  74. _Base::_S_masked_assign(_SimdWrapper<_Tp, _Np>(__y._M_data
  75. >= __nbits),
  76. __y, __nbits - 1);
  77. return _Base::_S_bit_shift_right(__x, __y);
  78. }
  79. }
  80. else
  81. return _Base::_S_bit_shift_right(__x, __y);
  82. }
  83. template <typename _Tp, size_t _Np>
  84. _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
  85. _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
  86. {
  87. if constexpr (sizeof(_Tp) < sizeof(int))
  88. {
  89. constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
  90. if (__y >= __nbits)
  91. {
  92. if constexpr (is_unsigned_v<_Tp>)
  93. return {};
  94. else
  95. return _Base::_S_bit_shift_right(__x, __nbits - 1);
  96. }
  97. }
  98. return _Base::_S_bit_shift_right(__x, __y);
  99. }
  100. // }}}
  101. };
  102. // }}}
  103. // _MaskImplPpc {{{
  104. template <typename _Abi>
  105. struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
  106. {
  107. using _Base = _MaskImplBuiltin<_Abi>;
  108. // _S_popcount {{{
  109. template <typename _Tp>
  110. _GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k)
  111. {
  112. const auto __kv = __as_vector(__k);
  113. if constexpr (__have_power10vec)
  114. {
  115. return vec_cntm(__to_intrin(__kv), 1);
  116. }
  117. else if constexpr (sizeof(_Tp) >= sizeof(int))
  118. {
  119. using _Intrin = __intrinsic_type16_t<int>;
  120. const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3];
  121. return __sum / (sizeof(_Tp) / sizeof(int));
  122. }
  123. else
  124. {
  125. const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>());
  126. return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3];
  127. }
  128. }
  129. // }}}
  130. };
  131. // }}}
  132. _GLIBCXX_SIMD_END_NAMESPACE
  133. #endif // __cplusplus >= 201703L
  134. #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
  135. // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100