simd_converter.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. // Generic simd conversions -*- C++ -*-
  2. // Copyright (C) 2020-2021 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
  21. #define _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
  22. #if __cplusplus >= 201703L
  23. _GLIBCXX_SIMD_BEGIN_NAMESPACE
  24. // _SimdConverter scalar -> scalar {{{
  25. template <typename _From, typename _To>
  26. struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar,
  27. enable_if_t<!is_same_v<_From, _To>>>
  28. {
  29. _GLIBCXX_SIMD_INTRINSIC constexpr _To operator()(_From __a) const noexcept
  30. { return static_cast<_To>(__a); }
  31. };
  32. // }}}
  33. // _SimdConverter scalar -> "native" {{{
  34. template <typename _From, typename _To, typename _Abi>
  35. struct _SimdConverter<_From, simd_abi::scalar, _To, _Abi,
  36. enable_if_t<!is_same_v<_Abi, simd_abi::scalar>>>
  37. {
  38. using _Ret = typename _Abi::template __traits<_To>::_SimdMember;
  39. template <typename... _More>
  40. _GLIBCXX_SIMD_INTRINSIC constexpr _Ret
  41. operator()(_From __a, _More... __more) const noexcept
  42. {
  43. static_assert(sizeof...(_More) + 1 == _Abi::template _S_size<_To>);
  44. static_assert(conjunction_v<is_same<_From, _More>...>);
  45. return __make_vector<_To>(__a, __more...);
  46. }
  47. };
  48. // }}}
  49. // _SimdConverter "native 1" -> "native 2" {{{
  50. template <typename _From, typename _To, typename _AFrom, typename _ATo>
  51. struct _SimdConverter<
  52. _From, _AFrom, _To, _ATo,
  53. enable_if_t<!disjunction_v<
  54. __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
  55. is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
  56. conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>>>
  57. {
  58. using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
  59. using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
  60. using _V = __vector_type_t<_To, simd_size_v<_To, _ATo>>;
  61. template <typename... _More>
  62. _GLIBCXX_SIMD_INTRINSIC constexpr _Ret
  63. operator()(_Arg __a, _More... __more) const noexcept
  64. { return __vector_convert<_V>(__a, __more...); }
  65. };
  66. // }}}
  67. // _SimdConverter scalar -> fixed_size<1> {{{1
  68. template <typename _From, typename _To>
  69. struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::fixed_size<1>,
  70. void>
  71. {
  72. _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_To, simd_abi::scalar>
  73. operator()(_From __x) const noexcept
  74. { return {static_cast<_To>(__x)}; }
  75. };
  76. // _SimdConverter fixed_size<1> -> scalar {{{1
  77. template <typename _From, typename _To>
  78. struct _SimdConverter<_From, simd_abi::fixed_size<1>, _To, simd_abi::scalar,
  79. void>
  80. {
  81. _GLIBCXX_SIMD_INTRINSIC constexpr _To
  82. operator()(_SimdTuple<_From, simd_abi::scalar> __x) const noexcept
  83. { return {static_cast<_To>(__x.first)}; }
  84. };
  85. // _SimdConverter fixed_size<_Np> -> fixed_size<_Np> {{{1
  86. template <typename _From, typename _To, int _Np>
  87. struct _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To,
  88. simd_abi::fixed_size<_Np>,
  89. enable_if_t<!is_same_v<_From, _To>>>
  90. {
  91. using _Ret = __fixed_size_storage_t<_To, _Np>;
  92. using _Arg = __fixed_size_storage_t<_From, _Np>;
  93. _GLIBCXX_SIMD_INTRINSIC constexpr _Ret
  94. operator()(const _Arg& __x) const noexcept
  95. {
  96. if constexpr (is_same_v<_From, _To>)
  97. return __x;
  98. // special case (optimize) int signedness casts
  99. else if constexpr (sizeof(_From) == sizeof(_To)
  100. && is_integral_v<_From> && is_integral_v<_To>)
  101. return __bit_cast<_Ret>(__x);
  102. // special case if all ABI tags in _Ret are scalar
  103. else if constexpr (__is_scalar_abi<typename _Ret::_FirstAbi>())
  104. {
  105. return __call_with_subscripts(
  106. __x, make_index_sequence<_Np>(),
  107. [](auto... __values) constexpr->_Ret {
  108. return __make_simd_tuple<_To, decltype((void) __values,
  109. simd_abi::scalar())...>(
  110. static_cast<_To>(__values)...);
  111. });
  112. }
  113. // from one vector to one vector
  114. else if constexpr (_Arg::_S_first_size == _Ret::_S_first_size)
  115. {
  116. _SimdConverter<_From, typename _Arg::_FirstAbi, _To,
  117. typename _Ret::_FirstAbi>
  118. __native_cvt;
  119. if constexpr (_Arg::_S_tuple_size == 1)
  120. return {__native_cvt(__x.first)};
  121. else
  122. {
  123. constexpr size_t _NRemain = _Np - _Arg::_S_first_size;
  124. _SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To,
  125. simd_abi::fixed_size<_NRemain>>
  126. __remainder_cvt;
  127. return {__native_cvt(__x.first), __remainder_cvt(__x.second)};
  128. }
  129. }
  130. // from one vector to multiple vectors
  131. else if constexpr (_Arg::_S_first_size > _Ret::_S_first_size)
  132. {
  133. const auto __multiple_return_chunks
  134. = __convert_all<__vector_type_t<_To, _Ret::_S_first_size>>(
  135. __x.first);
  136. constexpr auto __converted = __multiple_return_chunks.size()
  137. * _Ret::_FirstAbi::template _S_size<_To>;
  138. constexpr auto __remaining = _Np - __converted;
  139. if constexpr (_Arg::_S_tuple_size == 1 && __remaining == 0)
  140. return __to_simd_tuple<_To, _Np>(__multiple_return_chunks);
  141. else if constexpr (_Arg::_S_tuple_size == 1)
  142. { // e.g. <int, 3> -> <double, 2, 1> or <short, 7> -> <double, 4, 2,
  143. // 1>
  144. using _RetRem
  145. = __remove_cvref_t<decltype(__simd_tuple_pop_front<__converted>(
  146. _Ret()))>;
  147. const auto __return_chunks2
  148. = __convert_all<__vector_type_t<_To, _RetRem::_S_first_size>, 0,
  149. __converted>(__x.first);
  150. constexpr auto __converted2
  151. = __converted
  152. + __return_chunks2.size() * _RetRem::_S_first_size;
  153. if constexpr (__converted2 == _Np)
  154. return __to_simd_tuple<_To, _Np>(__multiple_return_chunks,
  155. __return_chunks2);
  156. else
  157. {
  158. using _RetRem2 = __remove_cvref_t<
  159. decltype(__simd_tuple_pop_front<__return_chunks2.size()
  160. * _RetRem::_S_first_size>(
  161. _RetRem()))>;
  162. const auto __return_chunks3 = __convert_all<
  163. __vector_type_t<_To, _RetRem2::_S_first_size>, 0,
  164. __converted2>(__x.first);
  165. constexpr auto __converted3
  166. = __converted2
  167. + __return_chunks3.size() * _RetRem2::_S_first_size;
  168. if constexpr (__converted3 == _Np)
  169. return __to_simd_tuple<_To, _Np>(__multiple_return_chunks,
  170. __return_chunks2,
  171. __return_chunks3);
  172. else
  173. {
  174. using _RetRem3
  175. = __remove_cvref_t<decltype(__simd_tuple_pop_front<
  176. __return_chunks3.size()
  177. * _RetRem2::_S_first_size>(
  178. _RetRem2()))>;
  179. const auto __return_chunks4 = __convert_all<
  180. __vector_type_t<_To, _RetRem3::_S_first_size>, 0,
  181. __converted3>(__x.first);
  182. constexpr auto __converted4
  183. = __converted3
  184. + __return_chunks4.size() * _RetRem3::_S_first_size;
  185. if constexpr (__converted4 == _Np)
  186. return __to_simd_tuple<_To, _Np>(
  187. __multiple_return_chunks, __return_chunks2,
  188. __return_chunks3, __return_chunks4);
  189. else
  190. __assert_unreachable<_To>();
  191. }
  192. }
  193. }
  194. else
  195. {
  196. constexpr size_t _NRemain = _Np - _Arg::_S_first_size;
  197. _SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To,
  198. simd_abi::fixed_size<_NRemain>>
  199. __remainder_cvt;
  200. return __simd_tuple_concat(
  201. __to_simd_tuple<_To, _Arg::_S_first_size>(
  202. __multiple_return_chunks),
  203. __remainder_cvt(__x.second));
  204. }
  205. }
  206. // from multiple vectors to one vector
  207. // _Arg::_S_first_size < _Ret::_S_first_size
  208. // a) heterogeneous input at the end of the tuple (possible with partial
  209. // native registers in _Ret)
  210. else if constexpr (_Ret::_S_tuple_size == 1
  211. && _Np % _Arg::_S_first_size != 0)
  212. {
  213. static_assert(_Ret::_FirstAbi::template _S_is_partial<_To>);
  214. return _Ret{__generate_from_n_evaluations<
  215. _Np, typename _VectorTraits<typename _Ret::_FirstType>::type>(
  216. [&](auto __i) { return static_cast<_To>(__x[__i]); })};
  217. }
  218. else
  219. {
  220. static_assert(_Arg::_S_tuple_size > 1);
  221. constexpr auto __n
  222. = __div_roundup(_Ret::_S_first_size, _Arg::_S_first_size);
  223. return __call_with_n_evaluations<__n>(
  224. [&__x](auto... __uncvted) {
  225. // assuming _Arg Abi tags for all __i are _Arg::_FirstAbi
  226. _SimdConverter<_From, typename _Arg::_FirstAbi, _To,
  227. typename _Ret::_FirstAbi>
  228. __native_cvt;
  229. if constexpr (_Ret::_S_tuple_size == 1)
  230. return _Ret{__native_cvt(__uncvted...)};
  231. else
  232. return _Ret{
  233. __native_cvt(__uncvted...),
  234. _SimdConverter<
  235. _From, simd_abi::fixed_size<_Np - _Ret::_S_first_size>, _To,
  236. simd_abi::fixed_size<_Np - _Ret::_S_first_size>>()(
  237. __simd_tuple_pop_front<_Ret::_S_first_size>(__x))};
  238. },
  239. [&__x](auto __i) { return __get_tuple_at<__i>(__x); });
  240. }
  241. }
  242. };
  243. // _SimdConverter "native" -> fixed_size<_Np> {{{1
  244. // i.e. 1 register to ? registers
  245. template <typename _From, typename _Ap, typename _To, int _Np>
  246. struct _SimdConverter<_From, _Ap, _To, simd_abi::fixed_size<_Np>,
  247. enable_if_t<!__is_fixed_size_abi_v<_Ap>>>
  248. {
  249. static_assert(
  250. _Np == simd_size_v<_From, _Ap>,
  251. "_SimdConverter to fixed_size only works for equal element counts");
  252. using _Ret = __fixed_size_storage_t<_To, _Np>;
  253. _GLIBCXX_SIMD_INTRINSIC constexpr _Ret
  254. operator()(typename _SimdTraits<_From, _Ap>::_SimdMember __x) const noexcept
  255. {
  256. if constexpr (_Ret::_S_tuple_size == 1)
  257. return {__vector_convert<typename _Ret::_FirstType::_BuiltinType>(__x)};
  258. else
  259. {
  260. using _FixedNp = simd_abi::fixed_size<_Np>;
  261. _SimdConverter<_From, _FixedNp, _To, _FixedNp> __fixed_cvt;
  262. using _FromFixedStorage = __fixed_size_storage_t<_From, _Np>;
  263. if constexpr (_FromFixedStorage::_S_tuple_size == 1)
  264. return __fixed_cvt(_FromFixedStorage{__x});
  265. else if constexpr (_FromFixedStorage::_S_tuple_size == 2)
  266. {
  267. _FromFixedStorage __tmp;
  268. static_assert(sizeof(__tmp) <= sizeof(__x));
  269. __builtin_memcpy(&__tmp.first, &__x, sizeof(__tmp.first));
  270. __builtin_memcpy(&__tmp.second.first,
  271. reinterpret_cast<const char*>(&__x)
  272. + sizeof(__tmp.first),
  273. sizeof(__tmp.second.first));
  274. return __fixed_cvt(__tmp);
  275. }
  276. else
  277. __assert_unreachable<_From>();
  278. }
  279. }
  280. };
  281. // _SimdConverter fixed_size<_Np> -> "native" {{{1
  282. // i.e. ? register to 1 registers
  283. template <typename _From, int _Np, typename _To, typename _Ap>
  284. struct _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To, _Ap,
  285. enable_if_t<!__is_fixed_size_abi_v<_Ap>>>
  286. {
  287. static_assert(
  288. _Np == simd_size_v<_To, _Ap>,
  289. "_SimdConverter to fixed_size only works for equal element counts");
  290. using _Arg = __fixed_size_storage_t<_From, _Np>;
  291. _GLIBCXX_SIMD_INTRINSIC constexpr
  292. typename _SimdTraits<_To, _Ap>::_SimdMember
  293. operator()(_Arg __x) const noexcept
  294. {
  295. if constexpr (_Arg::_S_tuple_size == 1)
  296. return __vector_convert<__vector_type_t<_To, _Np>>(__x.first);
  297. else if constexpr (_Arg::_S_is_homogeneous)
  298. return __call_with_n_evaluations<_Arg::_S_tuple_size>(
  299. [](auto... __members) {
  300. if constexpr ((is_convertible_v<decltype(__members), _To> && ...))
  301. return __vector_type_t<_To, _Np>{static_cast<_To>(__members)...};
  302. else
  303. return __vector_convert<__vector_type_t<_To, _Np>>(__members...);
  304. },
  305. [&](auto __i) { return __get_tuple_at<__i>(__x); });
  306. else if constexpr (__fixed_size_storage_t<_To, _Np>::_S_tuple_size == 1)
  307. {
  308. _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To,
  309. simd_abi::fixed_size<_Np>>
  310. __fixed_cvt;
  311. return __fixed_cvt(__x).first;
  312. }
  313. else
  314. {
  315. const _SimdWrapper<_From, _Np> __xv
  316. = __generate_from_n_evaluations<_Np, __vector_type_t<_From, _Np>>(
  317. [&](auto __i) { return __x[__i]; });
  318. return __vector_convert<__vector_type_t<_To, _Np>>(__xv);
  319. }
  320. }
  321. };
  322. // }}}1
  323. _GLIBCXX_SIMD_END_NAMESPACE
  324. #endif // __cplusplus >= 201703L
  325. #endif // _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
  326. // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80