simd_math.h 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500
  1. // Math overloads for simd -*- C++ -*-
  2. // Copyright (C) 2020-2021 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
  21. #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
  22. #if __cplusplus >= 201703L
  23. #include <utility>
  24. #include <iomanip>
  25. _GLIBCXX_SIMD_BEGIN_NAMESPACE
  26. template <typename _Tp, typename _V>
  27. using _Samesize = fixed_size_simd<_Tp, _V::size()>;
  28. // _Math_return_type {{{
  29. template <typename _DoubleR, typename _Tp, typename _Abi>
  30. struct _Math_return_type;
  31. template <typename _DoubleR, typename _Tp, typename _Abi>
  32. using _Math_return_type_t =
  33. typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
  34. template <typename _Tp, typename _Abi>
  35. struct _Math_return_type<double, _Tp, _Abi>
  36. { using type = simd<_Tp, _Abi>; };
  37. template <typename _Tp, typename _Abi>
  38. struct _Math_return_type<bool, _Tp, _Abi>
  39. { using type = simd_mask<_Tp, _Abi>; };
  40. template <typename _DoubleR, typename _Tp, typename _Abi>
  41. struct _Math_return_type
  42. { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
  43. //}}}
  44. // _GLIBCXX_SIMD_MATH_CALL_ {{{
  45. #define _GLIBCXX_SIMD_MATH_CALL_(__name) \
  46. template <typename _Tp, typename _Abi, typename..., \
  47. typename _R = _Math_return_type_t< \
  48. decltype(std::__name(declval<double>())), _Tp, _Abi>> \
  49. enable_if_t<is_floating_point_v<_Tp>, _R> \
  50. __name(simd<_Tp, _Abi> __x) \
  51. { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
  52. // }}}
  53. //_Extra_argument_type{{{
  54. template <typename _Up, typename _Tp, typename _Abi>
  55. struct _Extra_argument_type;
  56. template <typename _Tp, typename _Abi>
  57. struct _Extra_argument_type<_Tp*, _Tp, _Abi>
  58. {
  59. using type = simd<_Tp, _Abi>*;
  60. static constexpr double* declval();
  61. static constexpr bool __needs_temporary_scalar = true;
  62. _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
  63. { return &__data(*__x); }
  64. };
  65. template <typename _Up, typename _Tp, typename _Abi>
  66. struct _Extra_argument_type<_Up*, _Tp, _Abi>
  67. {
  68. static_assert(is_integral_v<_Up>);
  69. using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
  70. static constexpr _Up* declval();
  71. static constexpr bool __needs_temporary_scalar = true;
  72. _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
  73. { return &__data(*__x); }
  74. };
  75. template <typename _Tp, typename _Abi>
  76. struct _Extra_argument_type<_Tp, _Tp, _Abi>
  77. {
  78. using type = simd<_Tp, _Abi>;
  79. static constexpr double declval();
  80. static constexpr bool __needs_temporary_scalar = false;
  81. _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
  82. _S_data(const type& __x)
  83. { return __data(__x); }
  84. };
  85. template <typename _Up, typename _Tp, typename _Abi>
  86. struct _Extra_argument_type
  87. {
  88. static_assert(is_integral_v<_Up>);
  89. using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
  90. static constexpr _Up declval();
  91. static constexpr bool __needs_temporary_scalar = false;
  92. _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
  93. _S_data(const type& __x)
  94. { return __data(__x); }
  95. };
  96. //}}}
  97. // _GLIBCXX_SIMD_MATH_CALL2_ {{{
  98. #define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \
  99. template < \
  100. typename _Tp, typename _Abi, typename..., \
  101. typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
  102. typename _R = _Math_return_type_t< \
  103. decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
  104. enable_if_t<is_floating_point_v<_Tp>, _R> \
  105. __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
  106. { \
  107. return {__private_init, \
  108. _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
  109. } \
  110. template <typename _Up, typename _Tp, typename _Abi> \
  111. _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
  112. decltype(std::__name( \
  113. declval<double>(), \
  114. declval<enable_if_t< \
  115. conjunction_v< \
  116. is_same<arg2_, _Tp>, \
  117. negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
  118. is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
  119. double>>())), \
  120. _Tp, _Abi> \
  121. __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
  122. { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
  123. // }}}
  124. // _GLIBCXX_SIMD_MATH_CALL3_ {{{
  125. #define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \
  126. template <typename _Tp, typename _Abi, typename..., \
  127. typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
  128. typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \
  129. typename _R = _Math_return_type_t< \
  130. decltype(std::__name(declval<double>(), _Arg2::declval(), \
  131. _Arg3::declval())), \
  132. _Tp, _Abi>> \
  133. enable_if_t<is_floating_point_v<_Tp>, _R> \
  134. __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
  135. const typename _Arg3::type& __z) \
  136. { \
  137. return {__private_init, \
  138. _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
  139. _Arg3::_S_data(__z))}; \
  140. } \
  141. template < \
  142. typename _T0, typename _T1, typename _T2, typename..., \
  143. typename _U0 = __remove_cvref_t<_T0>, \
  144. typename _U1 = __remove_cvref_t<_T1>, \
  145. typename _U2 = __remove_cvref_t<_T2>, \
  146. typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
  147. typename = enable_if_t<conjunction_v< \
  148. is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
  149. is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
  150. negation<conjunction< \
  151. is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
  152. _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
  153. declval<const _Simd&>(), \
  154. declval<const _Simd&>())) \
  155. __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
  156. { \
  157. return __name(_Simd(static_cast<_T0&&>(__xx)), \
  158. _Simd(static_cast<_T1&&>(__yy)), \
  159. _Simd(static_cast<_T2&&>(__zz))); \
  160. }
  161. // }}}
  162. // __cosSeries {{{
  163. template <typename _Abi>
  164. _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
  165. __cosSeries(const simd<float, _Abi>& __x)
  166. {
  167. const simd<float, _Abi> __x2 = __x * __x;
  168. simd<float, _Abi> __y;
  169. __y = 0x1.ap-16f; // 1/8!
  170. __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
  171. __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
  172. return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
  173. }
  174. template <typename _Abi>
  175. _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
  176. __cosSeries(const simd<double, _Abi>& __x)
  177. {
  178. const simd<double, _Abi> __x2 = __x * __x;
  179. simd<double, _Abi> __y;
  180. __y = 0x1.AC00000000000p-45; // 1/16!
  181. __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
  182. __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
  183. __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
  184. __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
  185. __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
  186. __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
  187. return (__y * __x2 - .5f) * __x2 + 1.f;
  188. }
  189. // }}}
  190. // __sinSeries {{{
  191. template <typename _Abi>
  192. _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
  193. __sinSeries(const simd<float, _Abi>& __x)
  194. {
  195. const simd<float, _Abi> __x2 = __x * __x;
  196. simd<float, _Abi> __y;
  197. __y = -0x1.9CC000p-13f; // -1/7!
  198. __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
  199. __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
  200. return __y * (__x2 * __x) + __x;
  201. }
  202. template <typename _Abi>
  203. _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
  204. __sinSeries(const simd<double, _Abi>& __x)
  205. {
  206. // __x = [0, 0.7854 = pi/4]
  207. // __x² = [0, 0.6169 = pi²/8]
  208. const simd<double, _Abi> __x2 = __x * __x;
  209. simd<double, _Abi> __y;
  210. __y = -0x1.ACF0000000000p-41; // -1/15!
  211. __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
  212. __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
  213. __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
  214. __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
  215. __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
  216. __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
  217. return __y * (__x2 * __x) + __x;
  218. }
  219. // }}}
  220. // __zero_low_bits {{{
  221. template <int _Bits, typename _Tp, typename _Abi>
  222. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
  223. __zero_low_bits(simd<_Tp, _Abi> __x)
  224. {
  225. const simd<_Tp, _Abi> __bitmask
  226. = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
  227. return {__private_init,
  228. _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
  229. }
  230. // }}}
  231. // __fold_input {{{
  232. /**@internal
  233. * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
  234. * quadrant 0: [-¼π, ¼π]
  235. * quadrant 1: [ ¼π, ¾π]
  236. * quadrant 2: [ ¾π, 1¼π]
  237. * quadrant 3: [1¼π, 1¾π]
  238. *
  239. * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
  240. * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
  241. * ```
  242. * y = trunc(x / ¼π);
  243. * y += fmod(y, 2);
  244. * ```
  245. * This can be simplified by moving the (implicit) division by 2 into the
  246. * truncation expression. The `+= fmod` effect can the be achieved by using
  247. * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
  248. * `2/π * x` is better (faster).
  249. */
  250. template <typename _Tp, typename _Abi>
  251. struct _Folded
  252. {
  253. simd<_Tp, _Abi> _M_x;
  254. rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
  255. };
  256. namespace __math_float {
  257. inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
  258. inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
  259. inline constexpr float __pi_2_5bits0
  260. = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
  261. inline constexpr float __pi_2_5bits0_rem
  262. = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
  263. } // namespace __math_float
  264. namespace __math_double {
  265. inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
  266. inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
  267. inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
  268. } // namespace __math_double
  269. template <typename _Abi>
  270. _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
  271. __fold_input(const simd<float, _Abi>& __x)
  272. {
  273. using _V = simd<float, _Abi>;
  274. using _IV = rebind_simd_t<int, _V>;
  275. using namespace __math_float;
  276. _Folded<float, _Abi> __r;
  277. __r._M_x = abs(__x);
  278. #if 0
  279. // zero most mantissa bits:
  280. constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
  281. const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
  282. // split π into 4 parts, the first three with 13 trailing zeros (to make the
  283. // following multiplications precise):
  284. constexpr float __pi0 = 0x1.920000p1f;
  285. constexpr float __pi1 = 0x1.fb4000p-11f;
  286. constexpr float __pi2 = 0x1.444000p-23f;
  287. constexpr float __pi3 = 0x1.68c234p-38f;
  288. __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
  289. #else
  290. if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
  291. __r._M_quadrant = 0;
  292. else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
  293. {
  294. const _V __y = nearbyint(__r._M_x * __2_over_pi);
  295. __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
  296. __r._M_x -= __y * __pi_2_5bits0;
  297. __r._M_x -= __y * __pi_2_5bits0_rem;
  298. }
  299. else
  300. {
  301. using __math_double::__2_over_pi;
  302. using __math_double::__pi_2;
  303. using _VD = rebind_simd_t<double, _V>;
  304. _VD __xd = static_simd_cast<_VD>(__r._M_x);
  305. _VD __y = nearbyint(__xd * __2_over_pi);
  306. __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
  307. __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
  308. }
  309. #endif
  310. return __r;
  311. }
  312. template <typename _Abi>
  313. _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
  314. __fold_input(const simd<double, _Abi>& __x)
  315. {
  316. using _V = simd<double, _Abi>;
  317. using _IV = rebind_simd_t<int, _V>;
  318. using namespace __math_double;
  319. _Folded<double, _Abi> __r;
  320. __r._M_x = abs(__x);
  321. if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
  322. {
  323. __r._M_quadrant = 0;
  324. return __r;
  325. }
  326. const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
  327. __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
  328. if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
  329. {
  330. // x - y * pi/2, y uses no more than 11 mantissa bits
  331. __r._M_x -= __y * 0x1.921FB54443000p0;
  332. __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
  333. __r._M_x -= __y * 0x1.45C06E0E68948p-86;
  334. }
  335. else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
  336. {
  337. // x - y * pi/2, y uses no more than 29 mantissa bits
  338. __r._M_x -= __y * 0x1.921FB40000000p0;
  339. __r._M_x -= __y * 0x1.4442D00000000p-24;
  340. __r._M_x -= __y * 0x1.8469898CC5170p-48;
  341. }
  342. else
  343. {
  344. // x - y * pi/2, y may require all mantissa bits
  345. const _V __y_hi = __zero_low_bits<26>(__y);
  346. const _V __y_lo = __y - __y_hi;
  347. const auto __pi_2_1 = 0x1.921FB50000000p0;
  348. const auto __pi_2_2 = 0x1.110B460000000p-26;
  349. const auto __pi_2_3 = 0x1.1A62630000000p-54;
  350. const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
  351. __r._M_x = __r._M_x - __y_hi * __pi_2_1
  352. - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
  353. - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
  354. - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
  355. - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
  356. - max(__y * __pi_2_4, __y_lo * __pi_2_3)
  357. - min(__y * __pi_2_4, __y_lo * __pi_2_3);
  358. }
  359. return __r;
  360. }
  361. // }}}
  362. // __extract_exponent_as_int {{{
  363. template <typename _Tp, typename _Abi>
  364. rebind_simd_t<int, simd<_Tp, _Abi>>
  365. __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
  366. {
  367. using _Vp = simd<_Tp, _Abi>;
  368. using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
  369. using namespace std::experimental::__float_bitwise_operators;
  370. const _Vp __exponent_mask
  371. = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
  372. return static_simd_cast<rebind_simd_t<int, _Vp>>(
  373. __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
  374. >> (__digits_v<_Tp> - 1));
  375. }
  376. // }}}
  377. // __impl_or_fallback {{{
  378. template <typename ImplFun, typename FallbackFun, typename... _Args>
  379. _GLIBCXX_SIMD_INTRINSIC auto
  380. __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
  381. _Args&&... __args)
  382. -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
  383. { return __impl_fun(static_cast<_Args&&>(__args)...); }
  384. template <typename ImplFun, typename FallbackFun, typename... _Args>
  385. inline auto
  386. __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
  387. _Args&&... __args)
  388. -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
  389. { return __fallback_fun(static_cast<_Args&&>(__args)...); }
  390. template <typename... _Args>
  391. _GLIBCXX_SIMD_INTRINSIC auto
  392. __impl_or_fallback(_Args&&... __args)
  393. {
  394. return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
  395. }
  396. //}}}
  397. // trigonometric functions {{{
  398. _GLIBCXX_SIMD_MATH_CALL_(acos)
  399. _GLIBCXX_SIMD_MATH_CALL_(asin)
  400. _GLIBCXX_SIMD_MATH_CALL_(atan)
  401. _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
  402. /*
  403. * algorithm for sine and cosine:
  404. *
  405. * The result can be calculated with sine or cosine depending on the π/4 section
  406. * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
  407. *
  408. * sine:
  409. * Map -__x to __x and invert the output
  410. * Extend precision of __x - n * π/4 by calculating
  411. * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
  412. *
  413. * Calculate Taylor series with tuned coefficients.
  414. * Fix sign.
  415. */
  416. // cos{{{
  417. template <typename _Tp, typename _Abi>
  418. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  419. cos(const simd<_Tp, _Abi>& __x)
  420. {
  421. using _V = simd<_Tp, _Abi>;
  422. if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
  423. return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
  424. else
  425. {
  426. if constexpr (is_same_v<_Tp, float>)
  427. if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
  428. return static_simd_cast<_V>(
  429. cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
  430. const auto __f = __fold_input(__x);
  431. // quadrant | effect
  432. // 0 | cosSeries, +
  433. // 1 | sinSeries, -
  434. // 2 | cosSeries, -
  435. // 3 | sinSeries, +
  436. using namespace std::experimental::__float_bitwise_operators;
  437. const _V __sign_flip
  438. = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
  439. const auto __need_cos = (__f._M_quadrant & 1) == 0;
  440. if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
  441. return __sign_flip ^ __cosSeries(__f._M_x);
  442. else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
  443. return __sign_flip ^ __sinSeries(__f._M_x);
  444. else // some_of(__need_cos)
  445. {
  446. _V __r = __sinSeries(__f._M_x);
  447. where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
  448. return __r ^ __sign_flip;
  449. }
  450. }
  451. }
  452. template <typename _Tp>
  453. _GLIBCXX_SIMD_ALWAYS_INLINE
  454. enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
  455. cos(simd<_Tp, simd_abi::scalar> __x)
  456. { return std::cos(__data(__x)); }
  457. //}}}
  458. // sin{{{
  459. template <typename _Tp, typename _Abi>
  460. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  461. sin(const simd<_Tp, _Abi>& __x)
  462. {
  463. using _V = simd<_Tp, _Abi>;
  464. if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
  465. return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
  466. else
  467. {
  468. if constexpr (is_same_v<_Tp, float>)
  469. if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
  470. return static_simd_cast<_V>(
  471. sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
  472. const auto __f = __fold_input(__x);
  473. // quadrant | effect
  474. // 0 | sinSeries
  475. // 1 | cosSeries
  476. // 2 | sinSeries, sign flip
  477. // 3 | cosSeries, sign flip
  478. using namespace std::experimental::__float_bitwise_operators;
  479. const auto __sign_flip
  480. = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
  481. const auto __need_sin = (__f._M_quadrant & 1) == 0;
  482. if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
  483. return __sign_flip ^ __sinSeries(__f._M_x);
  484. else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
  485. return __sign_flip ^ __cosSeries(__f._M_x);
  486. else // some_of(__need_sin)
  487. {
  488. _V __r = __cosSeries(__f._M_x);
  489. where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
  490. return __sign_flip ^ __r;
  491. }
  492. }
  493. }
  494. template <typename _Tp>
  495. _GLIBCXX_SIMD_ALWAYS_INLINE
  496. enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
  497. sin(simd<_Tp, simd_abi::scalar> __x)
  498. { return std::sin(__data(__x)); }
  499. //}}}
  500. _GLIBCXX_SIMD_MATH_CALL_(tan)
  501. _GLIBCXX_SIMD_MATH_CALL_(acosh)
  502. _GLIBCXX_SIMD_MATH_CALL_(asinh)
  503. _GLIBCXX_SIMD_MATH_CALL_(atanh)
  504. _GLIBCXX_SIMD_MATH_CALL_(cosh)
  505. _GLIBCXX_SIMD_MATH_CALL_(sinh)
  506. _GLIBCXX_SIMD_MATH_CALL_(tanh)
  507. // }}}
  508. // exponential functions {{{
  509. _GLIBCXX_SIMD_MATH_CALL_(exp)
  510. _GLIBCXX_SIMD_MATH_CALL_(exp2)
  511. _GLIBCXX_SIMD_MATH_CALL_(expm1)
  512. // }}}
  513. // frexp {{{
  514. #if _GLIBCXX_SIMD_X86INTRIN
  515. template <typename _Tp, size_t _Np>
  516. _SimdWrapper<_Tp, _Np>
  517. __getexp(_SimdWrapper<_Tp, _Np> __x)
  518. {
  519. if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
  520. return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
  521. else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
  522. return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
  523. else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
  524. return _mm_getexp_pd(__x);
  525. else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
  526. return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
  527. else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
  528. return _mm256_getexp_ps(__x);
  529. else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
  530. return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
  531. else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
  532. return _mm256_getexp_pd(__x);
  533. else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
  534. return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
  535. else if constexpr (__is_avx512_ps<_Tp, _Np>())
  536. return _mm512_getexp_ps(__x);
  537. else if constexpr (__is_avx512_pd<_Tp, _Np>())
  538. return _mm512_getexp_pd(__x);
  539. else
  540. __assert_unreachable<_Tp>();
  541. }
  542. template <typename _Tp, size_t _Np>
  543. _SimdWrapper<_Tp, _Np>
  544. __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
  545. {
  546. if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
  547. return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
  548. _MM_MANT_SIGN_src));
  549. else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
  550. return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
  551. _MM_MANT_NORM_p5_1,
  552. _MM_MANT_SIGN_src));
  553. else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
  554. return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
  555. else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
  556. return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
  557. _MM_MANT_SIGN_src));
  558. else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
  559. return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
  560. else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
  561. return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
  562. _MM_MANT_SIGN_src));
  563. else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
  564. return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
  565. else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
  566. return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
  567. _MM_MANT_SIGN_src));
  568. else if constexpr (__is_avx512_ps<_Tp, _Np>())
  569. return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
  570. else if constexpr (__is_avx512_pd<_Tp, _Np>())
  571. return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
  572. else
  573. __assert_unreachable<_Tp>();
  574. }
  575. #endif // _GLIBCXX_SIMD_X86INTRIN
  576. /**
  577. * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
  578. *
  579. * The return value will be in the range [0.5, 1.0[
  580. * The @p __e value will be an integer defining the power-of-two exponent
  581. */
  582. template <typename _Tp, typename _Abi>
  583. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  584. frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
  585. {
  586. if constexpr (simd_size_v<_Tp, _Abi> == 1)
  587. {
  588. int __tmp;
  589. const auto __r = std::frexp(__x[0], &__tmp);
  590. (*__exp)[0] = __tmp;
  591. return __r;
  592. }
  593. else if constexpr (__is_fixed_size_abi_v<_Abi>)
  594. {
  595. return {__private_init,
  596. _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
  597. #if _GLIBCXX_SIMD_X86INTRIN
  598. }
  599. else if constexpr (__have_avx512f)
  600. {
  601. constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
  602. constexpr size_t _NI = _Np < 4 ? 4 : _Np;
  603. const auto __v = __data(__x);
  604. const auto __isnonzero
  605. = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
  606. const _SimdWrapper<int, _NI> __exp_plus1
  607. = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
  608. const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
  609. _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
  610. _SimdWrapper<int, _NI>(), __exp_plus1));
  611. simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
  612. return {__private_init,
  613. _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
  614. __isnonzero),
  615. __v, __getmant_avx512(__v))};
  616. #endif // _GLIBCXX_SIMD_X86INTRIN
  617. }
  618. else
  619. {
  620. // fallback implementation
  621. static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
  622. using _V = simd<_Tp, _Abi>;
  623. using _IV = rebind_simd_t<int, _V>;
  624. using namespace std::experimental::__proposed;
  625. using namespace std::experimental::__float_bitwise_operators;
  626. constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
  627. constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
  628. constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
  629. _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
  630. = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
  631. _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
  632. = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
  633. _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
  634. const _IV __exponent_bits = __extract_exponent_as_int(__x);
  635. if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
  636. {
  637. *__exp
  638. = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
  639. return __mant;
  640. }
  641. #if __FINITE_MATH_ONLY__
  642. // at least one element of __x is 0 or subnormal, the rest is normal
  643. // (inf and NaN are excluded by -ffinite-math-only)
  644. const auto __iszero_inf_nan = __x == 0;
  645. #else
  646. const auto __as_int
  647. = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
  648. const auto __inf
  649. = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
  650. _V(__infinity_v<_Tp>));
  651. const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
  652. __as_int == 0 || __as_int >= __inf);
  653. #endif
  654. const _V __scaled_subnormal = __x * __subnorm_scale;
  655. const _V __mant_subnormal
  656. = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
  657. where(!isnormal(__x), __mant) = __mant_subnormal;
  658. where(__iszero_inf_nan, __mant) = __x;
  659. _IV __e = __extract_exponent_as_int(__scaled_subnormal);
  660. using _MaskType =
  661. typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
  662. _V, _IV>::mask_type;
  663. const _MaskType __value_isnormal = isnormal(__x).__cvt();
  664. where(__value_isnormal.__cvt(), __e) = __exponent_bits;
  665. static_assert(sizeof(_IV) == sizeof(__value_isnormal));
  666. const _IV __offset
  667. = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
  668. | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
  669. & static_simd_cast<_MaskType>(__x != 0))
  670. & _IV(__exp_adjust + __exp_offset));
  671. *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
  672. return __mant;
  673. }
  674. }
  675. // }}}
  676. _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
  677. _GLIBCXX_SIMD_MATH_CALL_(ilogb)
  678. // logarithms {{{
  679. _GLIBCXX_SIMD_MATH_CALL_(log)
  680. _GLIBCXX_SIMD_MATH_CALL_(log10)
  681. _GLIBCXX_SIMD_MATH_CALL_(log1p)
  682. _GLIBCXX_SIMD_MATH_CALL_(log2)
  683. //}}}
  684. // logb{{{
  685. template <typename _Tp, typename _Abi>
  686. enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
  687. logb(const simd<_Tp, _Abi>& __x)
  688. {
  689. constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
  690. if constexpr (_Np == 1)
  691. return std::logb(__x[0]);
  692. else if constexpr (__is_fixed_size_abi_v<_Abi>)
  693. {
  694. return {__private_init,
  695. __data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
  696. using _V = typename decltype(__impl)::simd_type;
  697. return __data(
  698. std::experimental::logb(_V(__private_init, __xx)));
  699. })};
  700. }
  701. #if _GLIBCXX_SIMD_X86INTRIN // {{{
  702. else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
  703. return {__private_init,
  704. __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
  705. else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
  706. return {__private_init, _mm_getexp_pd(__data(__x))};
  707. else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
  708. return {__private_init, _mm256_getexp_ps(__data(__x))};
  709. else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
  710. return {__private_init, _mm256_getexp_pd(__data(__x))};
  711. else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
  712. return {__private_init,
  713. __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
  714. else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
  715. return {__private_init,
  716. __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
  717. else if constexpr (__is_avx512_ps<_Tp, _Np>())
  718. return {__private_init, _mm512_getexp_ps(__data(__x))};
  719. else if constexpr (__is_avx512_pd<_Tp, _Np>())
  720. return {__private_init, _mm512_getexp_pd(__data(__x))};
  721. #endif // _GLIBCXX_SIMD_X86INTRIN }}}
  722. else
  723. {
  724. using _V = simd<_Tp, _Abi>;
  725. using namespace std::experimental::__proposed;
  726. auto __is_normal = isnormal(__x);
  727. // work on abs(__x) to reflect the return value on Linux for negative
  728. // inputs (domain-error => implementation-defined value is returned)
  729. const _V abs_x = abs(__x);
  730. // __exponent(__x) returns the exponent value (bias removed) as
  731. // simd<_Up> with integral _Up
  732. auto&& __exponent = [](const _V& __v) {
  733. using namespace std::experimental::__proposed;
  734. using _IV = rebind_simd_t<
  735. conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
  736. return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
  737. - (__max_exponent_v<_Tp> - 1);
  738. };
  739. _V __r = static_simd_cast<_V>(__exponent(abs_x));
  740. if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
  741. // without corner cases (nan, inf, subnormal, zero) we have our
  742. // answer:
  743. return __r;
  744. const auto __is_zero = __x == 0;
  745. const auto __is_nan = isnan(__x);
  746. const auto __is_inf = isinf(__x);
  747. where(__is_zero, __r) = -__infinity_v<_Tp>;
  748. where(__is_nan, __r) = __x;
  749. where(__is_inf, __r) = __infinity_v<_Tp>;
  750. __is_normal |= __is_zero || __is_nan || __is_inf;
  751. if (all_of(__is_normal))
  752. // at this point everything but subnormals is handled
  753. return __r;
  754. // subnormals repeat the exponent extraction after multiplication of the
  755. // input with __a floating point value that has 112 (0x70) in its exponent
  756. // (not too big for sp and large enough for dp)
  757. const _V __scaled = abs_x * _Tp(0x1p112);
  758. _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
  759. where(__is_normal, __scaled_exp) = __r;
  760. return __scaled_exp;
  761. }
  762. }
  763. //}}}
  764. template <typename _Tp, typename _Abi>
  765. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  766. modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
  767. {
  768. if constexpr (__is_scalar_abi<_Abi>()
  769. || (__is_fixed_size_abi_v<
  770. _Abi> && simd_size_v<_Tp, _Abi> == 1))
  771. {
  772. _Tp __tmp;
  773. _Tp __r = std::modf(__x[0], &__tmp);
  774. __iptr[0] = __tmp;
  775. return __r;
  776. }
  777. else
  778. {
  779. const auto __integral = trunc(__x);
  780. *__iptr = __integral;
  781. auto __r = __x - __integral;
  782. #if !__FINITE_MATH_ONLY__
  783. where(isinf(__x), __r) = _Tp();
  784. #endif
  785. return copysign(__r, __x);
  786. }
  787. }
  788. _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
  789. _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
  790. _GLIBCXX_SIMD_MATH_CALL_(cbrt)
  791. _GLIBCXX_SIMD_MATH_CALL_(abs)
  792. _GLIBCXX_SIMD_MATH_CALL_(fabs)
  793. // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
  794. // allow signed integral _Tp
  795. template <typename _Tp, typename _Abi>
  796. enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
  797. abs(const simd<_Tp, _Abi>& __x)
  798. { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
  799. template <typename _Tp, typename _Abi>
  800. enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
  801. fabs(const simd<_Tp, _Abi>& __x)
  802. { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
  803. // the following are overloads for functions in <cstdlib> and not covered by
  804. // [parallel.simd.math]. I don't see much value in making them work, though
  805. /*
  806. template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
  807. { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
  808. template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
  809. &__x)
  810. { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
  811. */
  812. #define _GLIBCXX_SIMD_CVTING2(_NAME) \
  813. template <typename _Tp, typename _Abi> \
  814. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  815. const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
  816. { \
  817. return _NAME(__x, __y); \
  818. } \
  819. \
  820. template <typename _Tp, typename _Abi> \
  821. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  822. const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
  823. { \
  824. return _NAME(__x, __y); \
  825. }
  826. #define _GLIBCXX_SIMD_CVTING3(_NAME) \
  827. template <typename _Tp, typename _Abi> \
  828. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  829. const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
  830. const simd<_Tp, _Abi>& __z) \
  831. { \
  832. return _NAME(__x, __y, __z); \
  833. } \
  834. \
  835. template <typename _Tp, typename _Abi> \
  836. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  837. const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
  838. const simd<_Tp, _Abi>& __z) \
  839. { \
  840. return _NAME(__x, __y, __z); \
  841. } \
  842. \
  843. template <typename _Tp, typename _Abi> \
  844. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  845. const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
  846. const __type_identity_t<simd<_Tp, _Abi>>& __z) \
  847. { \
  848. return _NAME(__x, __y, __z); \
  849. } \
  850. \
  851. template <typename _Tp, typename _Abi> \
  852. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  853. const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
  854. const __type_identity_t<simd<_Tp, _Abi>>& __z) \
  855. { \
  856. return _NAME(__x, __y, __z); \
  857. } \
  858. \
  859. template <typename _Tp, typename _Abi> \
  860. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  861. const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
  862. const __type_identity_t<simd<_Tp, _Abi>>& __z) \
  863. { \
  864. return _NAME(__x, __y, __z); \
  865. } \
  866. \
  867. template <typename _Tp, typename _Abi> \
  868. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
  869. const __type_identity_t<simd<_Tp, _Abi>>& __x, \
  870. const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
  871. { \
  872. return _NAME(__x, __y, __z); \
  873. }
  874. template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
  875. _GLIBCXX_SIMD_INTRINSIC _R
  876. __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
  877. const _Tps&... __args)
  878. {
  879. return {__private_init,
  880. __data(__arg0)._M_apply_per_chunk(
  881. [&](auto __impl, const auto&... __inner) {
  882. using _V = typename decltype(__impl)::simd_type;
  883. return __data(__apply(_V(__private_init, __inner)...));
  884. },
  885. __data(__args)...)};
  886. }
  887. template <typename _VV>
  888. __remove_cvref_t<_VV>
  889. __hypot(_VV __x, _VV __y)
  890. {
  891. using _V = __remove_cvref_t<_VV>;
  892. using _Tp = typename _V::value_type;
  893. if constexpr (_V::size() == 1)
  894. return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
  895. else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
  896. {
  897. return __fixed_size_apply<_V>([](auto __a,
  898. auto __b) { return hypot(__a, __b); },
  899. __x, __y);
  900. }
  901. else
  902. {
  903. // A simple solution for _Tp == float would be to cast to double and
  904. // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
  905. // dp. It still needs the Annex F fixups though and isn't faster on
  906. // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
  907. // AVX-512).
  908. using namespace __float_bitwise_operators;
  909. _V __absx = abs(__x); // no error
  910. _V __absy = abs(__y); // no error
  911. _V __hi = max(__absx, __absy); // no error
  912. _V __lo = min(__absy, __absx); // no error
  913. // round __hi down to the next power-of-2:
  914. _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
  915. #ifndef __FAST_MATH__
  916. if constexpr (__have_neon && !__have_neon_a32)
  917. { // With ARMv7 NEON, we have no subnormals and must use slightly
  918. // different strategy
  919. const _V __hi_exp = __hi & __inf;
  920. _V __scale_back = __hi_exp;
  921. // For large exponents (max & max/2) the inversion comes too close
  922. // to subnormals. Subtract 3 from the exponent:
  923. where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
  924. // Invert and adjust for the off-by-one error of inversion via xor:
  925. const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
  926. const _V __h1 = __hi * __scale;
  927. const _V __l1 = __lo * __scale;
  928. _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
  929. // Fix up hypot(0, 0) to not be NaN:
  930. where(__hi == 0, __r) = 0;
  931. return __r;
  932. }
  933. #endif
  934. #ifdef __FAST_MATH__
  935. // With fast-math, ignore precision of subnormals and inputs from
  936. // __finite_max_v/2 to __finite_max_v. This removes all
  937. // branching/masking.
  938. if constexpr (true)
  939. #else
  940. if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
  941. && all_of(isnormal(__y))))
  942. #endif
  943. {
  944. const _V __hi_exp = __hi & __inf;
  945. //((__hi + __hi) & __inf) ^ __inf almost works for computing
  946. //__scale,
  947. // except when (__hi + __hi) & __inf == __inf, in which case __scale
  948. // becomes 0 (should be min/2 instead) and thus loses the
  949. // information from __lo.
  950. #ifdef __FAST_MATH__
  951. using _Ip = __int_for_sizeof_t<_Tp>;
  952. using _IV = rebind_simd_t<_Ip, _V>;
  953. const auto __as_int = __bit_cast<_IV>(__hi_exp);
  954. const _V __scale
  955. = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
  956. #else
  957. const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
  958. #endif
  959. _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
  960. = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
  961. const _V __h1 = (__hi & __mant_mask) | _V(1);
  962. const _V __l1 = __lo * __scale;
  963. return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
  964. }
  965. else
  966. {
  967. // slower path to support subnormals
  968. // if __hi is subnormal, avoid scaling by inf & final mul by 0
  969. // (which yields NaN) by using min()
  970. _V __scale = _V(1 / __norm_min_v<_Tp>);
  971. // invert exponent w/o error and w/o using the slow divider unit:
  972. // xor inverts the exponent but off by 1. Multiplication with .5
  973. // adjusts for the discrepancy.
  974. where(__hi >= __norm_min_v<_Tp>, __scale)
  975. = ((__hi & __inf) ^ __inf) * _Tp(.5);
  976. // adjust final exponent for subnormal inputs
  977. _V __hi_exp = __norm_min_v<_Tp>;
  978. where(__hi >= __norm_min_v<_Tp>, __hi_exp)
  979. = __hi & __inf; // no error
  980. _V __h1 = __hi * __scale; // no error
  981. _V __l1 = __lo * __scale; // no error
  982. // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
  983. // this ensures no overflow in the argument to sqrt
  984. _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
  985. #ifdef __STDC_IEC_559__
  986. // fixup for Annex F requirements
  987. // the naive fixup goes like this:
  988. //
  989. // where(__l1 == 0, __r) = __hi;
  990. // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
  991. // where(isinf(__absx) || isinf(__absy), __r) = __inf;
  992. //
  993. // The fixup can be prepared in parallel with the sqrt, requiring a
  994. // single blend step after hi_exp * sqrt, reducing latency and
  995. // throughput:
  996. _V __fixup = __hi; // __lo == 0
  997. where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
  998. where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
  999. where(!(__lo == 0 || isunordered(__x, __y)
  1000. || (isinf(__absx) || isinf(__absy))),
  1001. __fixup)
  1002. = __r;
  1003. __r = __fixup;
  1004. #endif
  1005. return __r;
  1006. }
  1007. }
  1008. }
  1009. template <typename _Tp, typename _Abi>
  1010. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
  1011. hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
  1012. {
  1013. return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
  1014. const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
  1015. __y);
  1016. }
  1017. _GLIBCXX_SIMD_CVTING2(hypot)
  1018. template <typename _VV>
  1019. __remove_cvref_t<_VV>
  1020. __hypot(_VV __x, _VV __y, _VV __z)
  1021. {
  1022. using _V = __remove_cvref_t<_VV>;
  1023. using _Abi = typename _V::abi_type;
  1024. using _Tp = typename _V::value_type;
  1025. /* FIXME: enable after PR77776 is resolved
  1026. if constexpr (_V::size() == 1)
  1027. return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
  1028. else
  1029. */
  1030. if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
  1031. {
  1032. return __fixed_size_apply<simd<_Tp, _Abi>>(
  1033. [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
  1034. __x, __y, __z);
  1035. }
  1036. else
  1037. {
  1038. using namespace __float_bitwise_operators;
  1039. const _V __absx = abs(__x); // no error
  1040. const _V __absy = abs(__y); // no error
  1041. const _V __absz = abs(__z); // no error
  1042. _V __hi = max(max(__absx, __absy), __absz); // no error
  1043. _V __l0 = min(__absz, max(__absx, __absy)); // no error
  1044. _V __l1 = min(__absy, __absx); // no error
  1045. if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
  1046. && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
  1047. { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
  1048. // NaN. In this case the bit-tricks don't work, they require IEC559
  1049. // binary32 or binary64 format.
  1050. #ifdef __STDC_IEC_559__
  1051. // fixup for Annex F requirements
  1052. if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
  1053. return __infinity_v<_Tp>;
  1054. else if (isunordered(__absx[0], __absy[0] + __absz[0]))
  1055. return __quiet_NaN_v<_Tp>;
  1056. else if (__l0[0] == 0 && __l1[0] == 0)
  1057. return __hi;
  1058. #endif
  1059. _V __hi_exp = __hi;
  1060. const _ULLong __tmp = 0x8000'0000'0000'0000ull;
  1061. __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
  1062. const _V __scale = 1 / __hi_exp;
  1063. __hi *= __scale;
  1064. __l0 *= __scale;
  1065. __l1 *= __scale;
  1066. return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
  1067. }
  1068. else
  1069. {
  1070. // round __hi down to the next power-of-2:
  1071. _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
  1072. #ifndef __FAST_MATH__
  1073. if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
  1074. { // With ARMv7 NEON, we have no subnormals and must use slightly
  1075. // different strategy
  1076. const _V __hi_exp = __hi & __inf;
  1077. _V __scale_back = __hi_exp;
  1078. // For large exponents (max & max/2) the inversion comes too
  1079. // close to subnormals. Subtract 3 from the exponent:
  1080. where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
  1081. // Invert and adjust for the off-by-one error of inversion via
  1082. // xor:
  1083. const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
  1084. const _V __h1 = __hi * __scale;
  1085. __l0 *= __scale;
  1086. __l1 *= __scale;
  1087. _V __lo = __l0 * __l0
  1088. + __l1 * __l1; // add the two smaller values first
  1089. asm("" : "+m"(__lo));
  1090. _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
  1091. // Fix up hypot(0, 0, 0) to not be NaN:
  1092. where(__hi == 0, __r) = 0;
  1093. return __r;
  1094. }
  1095. #endif
  1096. #ifdef __FAST_MATH__
  1097. // With fast-math, ignore precision of subnormals and inputs from
  1098. // __finite_max_v/2 to __finite_max_v. This removes all
  1099. // branching/masking.
  1100. if constexpr (true)
  1101. #else
  1102. if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
  1103. && all_of(isnormal(__y))
  1104. && all_of(isnormal(__z))))
  1105. #endif
  1106. {
  1107. const _V __hi_exp = __hi & __inf;
  1108. //((__hi + __hi) & __inf) ^ __inf almost works for computing
  1109. //__scale, except when (__hi + __hi) & __inf == __inf, in which
  1110. // case __scale
  1111. // becomes 0 (should be min/2 instead) and thus loses the
  1112. // information from __lo.
  1113. #ifdef __FAST_MATH__
  1114. using _Ip = __int_for_sizeof_t<_Tp>;
  1115. using _IV = rebind_simd_t<_Ip, _V>;
  1116. const auto __as_int = __bit_cast<_IV>(__hi_exp);
  1117. const _V __scale
  1118. = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
  1119. #else
  1120. const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
  1121. #endif
  1122. constexpr _Tp __mant_mask
  1123. = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
  1124. const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
  1125. __l0 *= __scale;
  1126. __l1 *= __scale;
  1127. const _V __lo
  1128. = __l0 * __l0
  1129. + __l1 * __l1; // add the two smaller values first
  1130. return __hi_exp * sqrt(__lo + __h1 * __h1);
  1131. }
  1132. else
  1133. {
  1134. // slower path to support subnormals
  1135. // if __hi is subnormal, avoid scaling by inf & final mul by 0
  1136. // (which yields NaN) by using min()
  1137. _V __scale = _V(1 / __norm_min_v<_Tp>);
  1138. // invert exponent w/o error and w/o using the slow divider
  1139. // unit: xor inverts the exponent but off by 1. Multiplication
  1140. // with .5 adjusts for the discrepancy.
  1141. where(__hi >= __norm_min_v<_Tp>, __scale)
  1142. = ((__hi & __inf) ^ __inf) * _Tp(.5);
  1143. // adjust final exponent for subnormal inputs
  1144. _V __hi_exp = __norm_min_v<_Tp>;
  1145. where(__hi >= __norm_min_v<_Tp>, __hi_exp)
  1146. = __hi & __inf; // no error
  1147. _V __h1 = __hi * __scale; // no error
  1148. __l0 *= __scale; // no error
  1149. __l1 *= __scale; // no error
  1150. _V __lo = __l0 * __l0
  1151. + __l1 * __l1; // add the two smaller values first
  1152. _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
  1153. #ifdef __STDC_IEC_559__
  1154. // fixup for Annex F requirements
  1155. _V __fixup = __hi; // __lo == 0
  1156. // where(__lo == 0, __fixup) = __hi;
  1157. where(isunordered(__x, __y + __z), __fixup)
  1158. = __quiet_NaN_v<_Tp>;
  1159. where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
  1160. = __inf;
  1161. // Instead of __lo == 0, the following could depend on __h1² ==
  1162. // __h1² + __lo (i.e. __hi is so much larger than the other two
  1163. // inputs that the result is exactly __hi). While this may
  1164. // improve precision, it is likely to reduce efficiency if the
  1165. // ISA has FMAs (because __h1² + __lo is an FMA, but the
  1166. // intermediate
  1167. // __h1² must be kept)
  1168. where(!(__lo == 0 || isunordered(__x, __y + __z)
  1169. || isinf(__absx) || isinf(__absy) || isinf(__absz)),
  1170. __fixup)
  1171. = __r;
  1172. __r = __fixup;
  1173. #endif
  1174. return __r;
  1175. }
  1176. }
  1177. }
  1178. }
  1179. template <typename _Tp, typename _Abi>
  1180. _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
  1181. hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
  1182. const simd<_Tp, _Abi>& __z)
  1183. {
  1184. return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
  1185. const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
  1186. __y,
  1187. __z);
  1188. }
  1189. _GLIBCXX_SIMD_CVTING3(hypot)
  1190. _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
  1191. _GLIBCXX_SIMD_MATH_CALL_(sqrt)
  1192. _GLIBCXX_SIMD_MATH_CALL_(erf)
  1193. _GLIBCXX_SIMD_MATH_CALL_(erfc)
  1194. _GLIBCXX_SIMD_MATH_CALL_(lgamma)
  1195. _GLIBCXX_SIMD_MATH_CALL_(tgamma)
  1196. _GLIBCXX_SIMD_MATH_CALL_(ceil)
  1197. _GLIBCXX_SIMD_MATH_CALL_(floor)
  1198. _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
  1199. _GLIBCXX_SIMD_MATH_CALL_(rint)
  1200. _GLIBCXX_SIMD_MATH_CALL_(lrint)
  1201. _GLIBCXX_SIMD_MATH_CALL_(llrint)
  1202. _GLIBCXX_SIMD_MATH_CALL_(round)
  1203. _GLIBCXX_SIMD_MATH_CALL_(lround)
  1204. _GLIBCXX_SIMD_MATH_CALL_(llround)
  1205. _GLIBCXX_SIMD_MATH_CALL_(trunc)
  1206. _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
  1207. _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
  1208. _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
  1209. template <typename _Tp, typename _Abi>
  1210. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1211. copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
  1212. {
  1213. if constexpr (simd_size_v<_Tp, _Abi> == 1)
  1214. return std::copysign(__x[0], __y[0]);
  1215. else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
  1216. // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
  1217. // It is necessary, because __signmask below cannot be computed at compile
  1218. // time.
  1219. return simd<_Tp, _Abi>(
  1220. [&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
  1221. else
  1222. {
  1223. using _V = simd<_Tp, _Abi>;
  1224. using namespace std::experimental::__float_bitwise_operators;
  1225. _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
  1226. return (__x & (__x ^ __signmask)) | (__y & __signmask);
  1227. }
  1228. }
  1229. _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
  1230. // not covered in [parallel.simd.math]:
  1231. // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
  1232. _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
  1233. _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
  1234. _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
  1235. _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
  1236. _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
  1237. _GLIBCXX_SIMD_MATH_CALL_(isfinite)
  1238. // isnan and isinf require special treatment because old glibc may declare
  1239. // `int isinf(double)`.
  1240. template <typename _Tp, typename _Abi, typename...,
  1241. typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
  1242. enable_if_t<is_floating_point_v<_Tp>, _R>
  1243. isinf(simd<_Tp, _Abi> __x)
  1244. { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
  1245. template <typename _Tp, typename _Abi, typename...,
  1246. typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
  1247. enable_if_t<is_floating_point_v<_Tp>, _R>
  1248. isnan(simd<_Tp, _Abi> __x)
  1249. { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
  1250. _GLIBCXX_SIMD_MATH_CALL_(isnormal)
  1251. template <typename..., typename _Tp, typename _Abi>
  1252. simd_mask<_Tp, _Abi>
  1253. signbit(simd<_Tp, _Abi> __x)
  1254. {
  1255. if constexpr (is_integral_v<_Tp>)
  1256. {
  1257. if constexpr (is_unsigned_v<_Tp>)
  1258. return simd_mask<_Tp, _Abi>{}; // false
  1259. else
  1260. return __x < 0;
  1261. }
  1262. else
  1263. return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
  1264. }
  1265. _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
  1266. _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
  1267. _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
  1268. _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
  1269. _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
  1270. _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
  1271. /* not covered in [parallel.simd.math]
  1272. template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
  1273. template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
  1274. template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
  1275. template <typename _V> struct simd_div_t {
  1276. _V quot, rem;
  1277. };
  1278. template <typename _Abi>
  1279. simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
  1280. _SCharv<_Abi> denom);
  1281. template <typename _Abi>
  1282. simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
  1283. __shortv<_Abi> denom);
  1284. template <typename _Abi>
  1285. simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
  1286. template <typename _Abi>
  1287. simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
  1288. __longv<_Abi> denom);
  1289. template <typename _Abi>
  1290. simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
  1291. __llongv<_Abi> denom);
  1292. */
  1293. // special math {{{
  1294. template <typename _Tp, typename _Abi>
  1295. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1296. assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1297. const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
  1298. const simd<_Tp, _Abi>& __x)
  1299. {
  1300. return simd<_Tp, _Abi>([&](auto __i) {
  1301. return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
  1302. });
  1303. }
  1304. template <typename _Tp, typename _Abi>
  1305. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1306. assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1307. const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
  1308. const simd<_Tp, _Abi>& __x)
  1309. {
  1310. return simd<_Tp, _Abi>([&](auto __i) {
  1311. return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
  1312. });
  1313. }
  1314. _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
  1315. _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
  1316. _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
  1317. _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
  1318. _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
  1319. _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
  1320. _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
  1321. _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
  1322. _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
  1323. _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
  1324. _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
  1325. _GLIBCXX_SIMD_MATH_CALL_(expint)
  1326. template <typename _Tp, typename _Abi>
  1327. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1328. hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1329. const simd<_Tp, _Abi>& __x)
  1330. {
  1331. return simd<_Tp, _Abi>(
  1332. [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
  1333. }
  1334. template <typename _Tp, typename _Abi>
  1335. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1336. laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1337. const simd<_Tp, _Abi>& __x)
  1338. {
  1339. return simd<_Tp, _Abi>(
  1340. [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
  1341. }
  1342. template <typename _Tp, typename _Abi>
  1343. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1344. legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1345. const simd<_Tp, _Abi>& __x)
  1346. {
  1347. return simd<_Tp, _Abi>(
  1348. [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
  1349. }
  1350. _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
  1351. template <typename _Tp, typename _Abi>
  1352. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1353. sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1354. const simd<_Tp, _Abi>& __x)
  1355. {
  1356. return simd<_Tp, _Abi>(
  1357. [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
  1358. }
  1359. template <typename _Tp, typename _Abi>
  1360. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1361. sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
  1362. const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
  1363. const simd<_Tp, _Abi>& theta)
  1364. {
  1365. return simd<_Tp, _Abi>([&](auto __i) {
  1366. return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
  1367. });
  1368. }
  1369. template <typename _Tp, typename _Abi>
  1370. enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1371. sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1372. const simd<_Tp, _Abi>& __x)
  1373. {
  1374. return simd<_Tp, _Abi>(
  1375. [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
  1376. }
  1377. // }}}
  1378. #undef _GLIBCXX_SIMD_MATH_CALL_
  1379. #undef _GLIBCXX_SIMD_MATH_CALL2_
  1380. #undef _GLIBCXX_SIMD_MATH_CALL3_
  1381. _GLIBCXX_SIMD_END_NAMESPACE
  1382. #endif // __cplusplus >= 201703L
  1383. #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
  1384. // vim: foldmethod=marker sw=2 ts=8 noet sts=2